2016-08-20 16:36:43 +00:00
from fanficfare import geturls
2023-06-06 20:02:51 +00:00
from os import listdir , remove , rename , utime , devnull , makedirs
from os . path import isfile , join , exists
2017-09-18 19:41:09 +00:00
from subprocess import check_output , STDOUT , call , PIPE
2016-08-20 16:36:43 +00:00
import logging
from optparse import OptionParser
2016-09-07 04:14:32 +00:00
import re
2020-12-22 09:06:55 +00:00
from configparser import ConfigParser
2016-09-07 10:08:36 +00:00
from tempfile import mkdtemp
2023-06-11 22:13:07 +00:00
from shutil import rmtree , copyfile , copy2
2017-07-03 02:57:37 +00:00
import socket
2017-07-03 03:22:44 +00:00
from time import strftime , localtime
2021-07-01 11:16:34 +00:00
import os
2021-05-04 21:37:01 +00:00
import errno
2016-08-20 16:36:43 +00:00
2017-08-09 09:27:57 +00:00
from multiprocessing import Pool
2016-08-20 16:36:43 +00:00
logging . getLogger ( " fanficfare " ) . setLevel ( logging . ERROR )
2017-09-18 19:41:09 +00:00
2017-07-04 01:57:24 +00:00
class bcolors :
HEADER = ' \033 [95m '
OKBLUE = ' \033 [94m '
OKGREEN = ' \033 [92m '
WARNING = ' \033 [93m '
FAIL = ' \033 [91m '
ENDC = ' \033 [0m '
BOLD = ' \033 [1m '
UNDERLINE = ' \033 [4m '
2017-09-18 19:41:09 +00:00
2017-08-09 09:27:57 +00:00
def log ( msg , color = None , output = True ) :
2017-07-04 01:57:24 +00:00
if color :
col = bcolors . HEADER
if color == ' BLUE ' :
col = bcolors . OKBLUE
elif color == ' GREEN ' :
col = bcolors . OKGREEN
elif color == ' WARNING ' :
col = bcolors . WARNING
elif color == ' FAIL ' :
col = bcolors . FAIL
elif color == ' BOLD ' :
col = bcolors . BOLD
elif color == ' UNDERLINE ' :
col = bcolors . UNDERLINE
2017-09-18 19:41:09 +00:00
line = ' {} {} {} : \t {} {} {} ' . format (
bcolors . BOLD ,
strftime (
' % m/ %d / % Y % H: % M: % S ' ,
localtime ( ) ) ,
bcolors . ENDC ,
col ,
msg ,
bcolors . ENDC )
2017-08-09 09:27:57 +00:00
else :
2017-09-18 19:41:09 +00:00
line = ' {} {} {} : \t {} ' . format (
bcolors . BOLD ,
strftime (
' % m/ %d / % Y % H: % M: % S ' ,
localtime ( ) ) ,
bcolors . ENDC ,
msg )
2017-08-09 09:27:57 +00:00
if output :
2020-12-22 09:06:55 +00:00
print ( line )
2017-08-09 09:27:57 +00:00
return " "
2017-07-04 01:57:24 +00:00
else :
2017-08-09 09:27:57 +00:00
return line + " \n "
2017-07-03 03:22:44 +00:00
2017-09-18 19:41:09 +00:00
2016-09-07 05:39:35 +00:00
def touch ( fname , times = None ) :
with open ( fname , ' a ' ) :
utime ( fname , times )
2016-08-20 16:36:43 +00:00
2016-09-07 05:39:35 +00:00
2021-07-01 11:16:34 +00:00
url_parsers = [ ( re . compile ( ' (fanfiction.net/s/ \ d*/?).* ' ) , " www. " ) , #ffnet
2020-12-22 09:06:55 +00:00
( re . compile ( ' (archiveofourown.org/works/ \ d*)/?.* ' ) , " " ) , #ao3
( re . compile ( ' (fictionpress.com/s/ \ d*)/?.* ' ) , " " ) , #fictionpress
( re . compile ( ' (royalroad.com/fiction/ \ d*)/?.* ' ) , " " ) , #royalroad
( re . compile ( ' https?://(.*) ' ) , " " ) ] #other sites
2016-09-07 11:04:10 +00:00
story_name = re . compile ( ' (.*)-.* ' )
2016-09-07 05:39:35 +00:00
2016-09-07 11:37:56 +00:00
equal_chapters = re . compile ( ' .* already contains \ d* chapters. ' )
2017-09-18 19:41:09 +00:00
chapter_difference = re . compile (
' .* contains \ d* chapters, more than source: \ d*. ' )
bad_chapters = re . compile (
" .* doesn ' t contain any recognizable chapters, probably from a different source. Not updating. " )
2016-09-07 11:37:56 +00:00
no_url = re . compile ( ' No story URL found in epub to update. ' )
2017-09-18 19:41:09 +00:00
more_chapters = re . compile (
" .*File \ (.* \ .epub \ ) Updated \ (.* \ ) more recently than Story \ (.* \ ) - Skipping " )
2016-09-07 11:37:56 +00:00
2016-09-07 05:39:35 +00:00
def parse_url ( url ) :
2020-12-22 09:06:55 +00:00
for cur_parser , cur_prefix in url_parsers :
if cur_parser . search ( url ) :
url = cur_prefix + cur_parser . search ( url ) . group ( 1 )
return url
2016-09-07 05:39:35 +00:00
return url
2017-09-18 19:41:09 +00:00
2016-09-07 10:08:36 +00:00
def get_files ( mypath , filetype = None , fullpath = False ) :
ans = [ ]
if filetype :
2017-09-18 19:41:09 +00:00
ans = [ f for f in listdir ( mypath ) if isfile (
join ( mypath , f ) ) and f . endswith ( filetype ) ]
2016-09-07 10:08:36 +00:00
else :
ans = [ f for f in listdir ( mypath ) if isfile ( join ( mypath , f ) ) ]
if fullpath :
return [ join ( mypath , f ) for f in ans ]
else :
return ans
2017-09-18 19:41:09 +00:00
2016-09-07 11:37:56 +00:00
def check_regexes ( output ) :
if equal_chapters . search ( output ) :
2017-09-18 19:41:09 +00:00
raise ValueError (
" Issue with story, site is broken. Story likely hasn ' t updated on site yet. " )
2016-09-07 11:37:56 +00:00
if bad_chapters . search ( output ) :
2017-09-18 19:41:09 +00:00
raise ValueError (
" Something is messed up with the site or the epub. No chapters found. " )
2016-09-07 11:37:56 +00:00
if no_url . search ( output ) :
raise ValueError ( " No URL in epub to update from. Fix the metadata. " )
2016-09-07 05:39:35 +00:00
2017-09-18 19:41:09 +00:00
2017-08-09 09:27:57 +00:00
def downloader ( args ) :
url , inout_file , path , live = args
2023-06-11 22:13:07 +00:00
loc = mkdtemp ( )
file_backup = ' /config/downloads '
# loc = '/config/downloads'
2023-06-06 20:02:51 +00:00
# loc = '/config/downloads'
2023-06-07 14:43:56 +00:00
if not exists ( loc ) :
makedirs ( loc )
2023-06-06 20:16:36 +00:00
copyfile ( " /config/personal.ini " , " {} /personal.ini " . format ( loc ) )
copyfile ( " /config/defaults.ini " , " {} /defaults.ini " . format ( loc ) )
2017-08-09 09:27:57 +00:00
output = " "
output + = log ( " Working with url {} " . format ( url ) , ' HEADER ' , live )
storyId = None
try :
if path :
try :
2017-09-18 19:41:09 +00:00
storyId = check_output (
' calibredb search " Identifiers: {} " {} ' . format (
2020-12-22 09:06:55 +00:00
url , path ) , shell = True , stderr = STDOUT , stdin = PIPE , ) . decode ( ' utf-8 ' )
2017-08-09 09:27:57 +00:00
output + = log ( " \t Story is in calibre with id {} " . format ( storyId ) , ' BLUE ' , live )
output + = log ( " \t Exporting file " , ' BLUE ' , live )
2017-09-18 19:41:09 +00:00
res = check_output (
' calibredb export {} --dont-save-cover --dont-write-opf --single-dir --to-dir " {} " {} ' . format (
2020-12-22 09:06:55 +00:00
storyId , loc , path ) , shell = True , stdin = PIPE , stderr = STDOUT ) . decode ( ' utf-8 ' )
2023-06-10 21:14:51 +00:00
cur = get_files ( loc , " .epub " , True ) [ 0 ] # This causes the same book to be duplicated a whole bunch of times. TODO: Find solution.
2017-09-18 19:41:09 +00:00
output + = log (
' \t Downloading with fanficfare, updating file " {} " ' . format ( cur ) ,
' GREEN ' ,
live )
moving = " "
except BaseException :
# story is not in calibre
2022-03-26 14:19:32 +00:00
output + = log ( " \t Story is not in Calibre " , ' WARNING ' , live )
2017-08-09 09:27:57 +00:00
cur = url
moving = ' cd " {} " && ' . format ( loc )
2023-06-06 20:16:36 +00:00
# copyfile("/config/personal.ini", "{}/personal.ini".format(loc))
# copyfile("/config/defaults.ini", "{}/defaults.ini".format(loc))
2022-03-25 21:21:17 +00:00
output + = log ( ' \t Running: {} python3.9 -m fanficfare.cli -u " {} " --update-cover --non-interactive ' . format (
2017-09-18 19:41:09 +00:00
moving , cur ) , ' BLUE ' , live )
2023-06-12 21:10:34 +00:00
res = check_output ( ' {} python3.9 -m fanficfare.cli -u " {} " --update-cover --non-interactive --config= {} /personal.ini -o " is_adult=true " ' . format (
2022-03-25 21:21:17 +00:00
moving , cur , loc ) , shell = True , stderr = STDOUT , stdin = PIPE ) . decode ( ' utf-8 ' )
2023-06-12 19:11:13 +00:00
# output += log(f'res: {res}', 'BLUE', live)
2017-08-09 09:27:57 +00:00
check_regexes ( res )
if chapter_difference . search ( res ) or more_chapters . search ( res ) :
2017-09-18 19:41:09 +00:00
output + = log ( " \t Forcing download update due to: " ,
' WARNING ' , live )
2017-08-09 09:27:57 +00:00
for line in res . split ( " \n " ) :
if line :
output + = log ( " \t \t {} " . format ( line ) , ' WARNING ' , live )
2017-09-18 19:41:09 +00:00
res = check_output (
2023-06-12 21:10:34 +00:00
' {} python3.9 -m fanficfare.cli -u " {} " --force --update-cover --non-interactive --config= {} /personal.ini -o " is_adult=true " ' . format (
2022-03-25 21:21:17 +00:00
moving , cur , loc ) , shell = True , stderr = STDOUT , stdin = PIPE ) . decode ( ' utf-8 ' )
2017-08-09 09:27:57 +00:00
check_regexes ( res )
2023-06-12 19:11:13 +00:00
# output += log(f'Cur: {get_files(loc, ".epub", True)}, len(cur): {len(get_files(loc, ".epub", True))}', 'BLUE', live)
2023-06-12 20:41:49 +00:00
cur = get_files ( loc , ' .epub ' , True )
2023-06-12 19:11:13 +00:00
if len ( cur ) > 0 :
cur = cur [ 0 ]
2023-06-12 20:41:49 +00:00
elif ( len ( cur ) == 0 ) :
# else:
2023-06-12 19:12:57 +00:00
raise Exception ( f ' Could not find .epub, work likely did not download. Exception raised on get_files(loc, " .epub " , True)[0]. result from command is: { res } ' )
2023-06-12 19:11:13 +00:00
2017-08-09 09:27:57 +00:00
2017-09-18 19:41:09 +00:00
if storyId :
output + = log ( " \t Removing {} from library " . format ( storyId ) ,
' BLUE ' , live )
2017-08-09 09:27:57 +00:00
try :
2017-09-18 19:41:09 +00:00
res = check_output (
' calibredb remove {} {} ' . format (
path ,
storyId ) ,
shell = True ,
stderr = STDOUT ,
stdin = PIPE ,
2020-12-22 09:06:55 +00:00
) . decode ( ' utf-8 ' )
2017-09-18 19:41:09 +00:00
except BaseException :
if not live :
2020-12-22 09:06:55 +00:00
print ( output . strip ( ) )
2017-08-09 09:27:57 +00:00
raise
2017-09-18 19:41:09 +00:00
2017-08-09 09:27:57 +00:00
output + = log ( " \t Adding {} to library " . format ( cur ) , ' BLUE ' , live )
try :
2017-09-18 19:41:09 +00:00
res = check_output (
2023-06-07 19:42:45 +00:00
' calibredb add -s " FFF " -d {} " {} " ' . format ( path , cur ) , shell = True , stderr = STDOUT , stdin = PIPE , ) . decode ( ' utf-8 ' )
2017-08-09 09:27:57 +00:00
except Exception as e :
output + = log ( e )
2017-09-18 19:41:09 +00:00
if not live :
2020-12-22 09:06:55 +00:00
print ( output . strip ( ) )
2017-08-09 09:27:57 +00:00
raise
try :
2017-09-18 19:41:09 +00:00
res = check_output (
' calibredb search " Identifiers: {} " {} ' . format (
2020-12-22 09:06:55 +00:00
url , path ) , shell = True , stderr = STDOUT , stdin = PIPE ) . decode ( ' utf-8 ' )
2017-09-18 19:41:09 +00:00
output + = log ( " \t Added {} to library with id {} " . format ( cur ,
res ) , ' GREEN ' , live )
except BaseException :
output + = log (
" It ' s been added to library, but not sure what the ID is. " ,
' WARNING ' ,
live )
2018-06-20 20:42:09 +00:00
output + = log ( " Added /Story-file to library with id 0 " , ' GREEN ' , live )
2023-06-11 22:13:07 +00:00
# copy2(cur, f"/config/downloads/{get_files(loc, '.epub', False)[0]}")
copy2 ( cur , f " { file_backup } / { get_files ( loc , ' .epub ' , False ) [ 0 ] } " )
2017-08-09 09:27:57 +00:00
remove ( cur )
else :
2017-09-18 19:41:09 +00:00
res = check_output (
' cd " {} " && fanficfare -u " {} " --update-cover ' . format (
2020-12-22 09:06:55 +00:00
loc , url ) , shell = True , stderr = STDOUT , stdin = PIPE ) . decode ( ' utf-8 ' )
2017-08-09 09:27:57 +00:00
check_regexes ( res )
cur = get_files ( loc , ' .epub ' , True ) [ 0 ]
name = get_files ( loc , ' .epub ' , False ) [ 0 ]
2023-06-11 22:13:07 +00:00
copy2 ( cur , f " { file_backup } / { name } " )
2017-08-09 09:27:57 +00:00
rename ( cur , name )
2017-09-18 19:41:09 +00:00
output + = log (
" Downloaded story {} to {} " . format (
story_name . search ( name ) . group ( 1 ) ,
name ) ,
' GREEN ' ,
live )
if not live :
2020-12-22 09:06:55 +00:00
print ( output . strip ( ) )
2023-06-11 22:13:07 +00:00
rmtree ( loc )
2017-08-09 09:27:57 +00:00
except Exception as e :
output + = log ( " Exception: {} " . format ( e ) , ' FAIL ' , live )
2017-09-18 19:41:09 +00:00
if not live :
2020-12-22 09:06:55 +00:00
print ( output . strip ( ) )
2023-06-11 22:13:07 +00:00
try :
rmtree ( loc )
except BaseException :
pass
2017-08-09 09:27:57 +00:00
with open ( inout_file , " a " ) as fp :
fp . write ( " {} \n " . format ( url ) )
2017-09-18 19:41:09 +00:00
2022-03-26 02:48:11 +00:00
def main ( user , password , server , label , inout_file , path , lib_user , lib_password , live ) :
2017-07-03 03:22:44 +00:00
2016-09-07 11:04:10 +00:00
if path :
2022-03-26 02:48:11 +00:00
path = ' --with-library " {} " --username {} --password {} ' . format (
path , lib_user , lib_password )
2016-09-07 11:04:10 +00:00
try :
with open ( devnull , ' w ' ) as nullout :
2017-09-18 19:41:09 +00:00
call ( [ ' calibredb ' ] , stdout = nullout , stderr = nullout )
2021-07-01 11:16:34 +00:00
except OSError as e :
if e . errno == errno . ENOENT :
2017-07-04 01:57:24 +00:00
log ( " Calibredb is not installed on this system. Cannot search the calibre library or update it. " , ' FAIL ' )
2016-09-07 11:04:10 +00:00
return
2017-09-18 19:41:09 +00:00
2016-09-07 05:39:35 +00:00
touch ( inout_file )
2016-08-20 16:36:43 +00:00
2016-09-07 05:39:35 +00:00
with open ( inout_file , " r " ) as fp :
2016-08-20 16:36:43 +00:00
urls = set ( [ x . replace ( " \n " , " " ) for x in fp . readlines ( ) ] )
2017-09-18 19:41:09 +00:00
2016-09-07 05:39:35 +00:00
with open ( inout_file , " w " ) as fp :
2016-08-20 16:36:43 +00:00
fp . write ( " " )
2017-07-03 02:57:37 +00:00
2023-06-06 19:35:00 +00:00
# try:
# print('attempting imap grab')
# socket.setdefaulttimeout(55)
# urls |= geturls.get_urls_from_imap(server, user, password, label)
# socket.setdefaulttimeout(None)
# except BaseException:
# print('imap grab failed')
# with open(inout_file, "w") as fp:
# for cur in urls:
# fp.write("{}\n".format(cur))
# print(urls)
# return
2017-09-18 19:41:09 +00:00
if not urls :
2017-07-03 02:57:37 +00:00
return
urls = set ( parse_url ( x ) for x in urls )
2017-08-09 09:27:57 +00:00
log ( " URLs to parse ( {} ): " . format ( len ( urls ) ) , ' HEADER ' )
2017-07-12 00:10:41 +00:00
for url in urls :
2017-07-12 05:25:37 +00:00
log ( " \t {} " . format ( url ) , ' BLUE ' )
2017-08-15 23:56:05 +00:00
if len ( urls ) == 1 :
2017-08-16 00:29:40 +00:00
downloader ( [ list ( urls ) [ 0 ] , inout_file , path , True ] )
2017-08-15 23:56:05 +00:00
else :
2022-03-25 21:21:17 +00:00
for url in urls :
downloader ( [ url , inout_file , path , True ] )
with open ( inout_file , " r " ) as fp :
urls = set ( [ x . replace ( " \n " , " " ) for x in fp . readlines ( ) ] )
with open ( inout_file , " w " ) as fp :
fp . writelines ( [ " {} \n " . format ( x ) for x in urls ] )
2017-07-03 02:57:37 +00:00
return
2016-08-20 16:36:43 +00:00
if __name__ == " __main__ " :
option_parser = OptionParser ( usage = " usage: % prog [flags] " )
2017-09-18 19:41:09 +00:00
option_parser . add_option (
' -u ' ,
' --user ' ,
action = ' store ' ,
dest = ' user ' ,
help = ' Email Account Username. Required. ' )
option_parser . add_option (
' -p ' ,
' --password ' ,
action = ' store ' ,
dest = ' password ' ,
help = ' Email Account Password. Required. ' )
option_parser . add_option (
' -s ' ,
' --server ' ,
action = ' store ' ,
dest = ' server ' ,
default = " imap.gmail.com " ,
help = ' Email IMAP Server. Default is " imap.gmail.com " . ' )
option_parser . add_option (
' -m ' ,
' --mailbox ' ,
action = ' store ' ,
dest = ' mailbox ' ,
default = ' INBOX ' ,
help = ' Email Label. Default is " INBOX " . ' )
option_parser . add_option (
' -l ' ,
' --library ' ,
action = ' store ' ,
dest = ' library ' ,
help = " calibre library db location. If none is passed, then this merely scrapes the email and error file for new stories and downloads them into the current directory. " )
option_parser . add_option (
' -i ' ,
' --input ' ,
action = ' store ' ,
dest = ' input ' ,
default = " ./fanfiction.txt " ,
help = " Error file. Any urls that fail will be output here, and file will be read to find any urls that failed previously. If file does not exist will create. File is overwitten every time the program is run. " )
option_parser . add_option (
' -c ' ,
' --config ' ,
action = ' store ' ,
dest = ' config ' ,
help = ' Config file for inputs. Blank config file is provided. No default. If an option is present in whatever config file is passed it, the option will overwrite whatever is passed in through command line arguments unless the option is blank. Do not put any quotation marks in the options. ' )
option_parser . add_option (
' -o ' ,
' --output ' ,
action = ' store_true ' ,
dest = ' live ' ,
help = ' Include this if you want all the output to be saved and posted live. Useful when multithreading. ' )
2022-03-26 02:48:11 +00:00
option_parser . add_option (
' -q ' ,
' --libuser ' ,
action = ' store ' ,
dest = ' libuser ' ,
help = ' Calibre User. Required. ' )
2017-09-18 19:41:09 +00:00
2022-03-26 02:48:11 +00:00
option_parser . add_option (
' -w ' ,
' --libpassword ' ,
action = ' store ' ,
dest = ' libpassword ' ,
help = ' Calibre Password. Required. ' )
2016-08-20 16:36:43 +00:00
( options , args ) = option_parser . parse_args ( )
2017-09-18 19:41:09 +00:00
2016-09-07 04:14:32 +00:00
if options . config :
2016-09-07 11:44:50 +00:00
touch ( options . config )
2016-09-07 05:39:35 +00:00
config = ConfigParser ( allow_no_value = True )
config . read ( options . config )
2017-09-18 19:41:09 +00:00
def updater ( option , newval ) : return newval if newval != " " else option
try :
options . user = updater (
options . user , config . get (
' login ' , ' user ' ) . strip ( ) )
except BaseException :
pass
try :
options . password = updater (
options . password , config . get (
' login ' , ' password ' ) . strip ( ) )
except BaseException :
pass
2022-03-26 02:48:11 +00:00
try :
options . libuser = updater (
options . libuser , config . get (
' login ' , ' libuser ' ) . strip ( ) )
except BaseException :
pass
try :
options . libpassword = updater (
options . libpassword , config . get (
' login ' , ' libpassword ' ) . strip ( ) )
except BaseException :
pass
2017-09-18 19:41:09 +00:00
try :
options . server = updater (
options . server , config . get (
' login ' , ' server ' ) . strip ( ) )
except BaseException :
pass
try :
options . mailbox = updater (
options . mailbox , config . get (
' login ' , ' mailbox ' ) . strip ( ) )
except BaseException :
pass
try :
options . library = updater (
options . library , config . get (
' locations ' , ' library ' ) . strip ( ) )
except BaseException :
pass
try :
options . input = updater (
options . input , config . get (
' locations ' , ' input ' ) . strip ( ) )
except BaseException :
pass
try :
options . live = updater (
options . live , config . getboolean (
' output ' , ' live ' ) . strip ( ) )
except BaseException :
pass
2016-09-07 04:14:32 +00:00
if not ( options . user or options . password ) :
2016-08-20 16:36:43 +00:00
raise ValueError ( " User or Password not given " )
2017-09-18 19:41:09 +00:00
main (
options . user ,
options . password ,
options . server ,
options . mailbox ,
options . input ,
options . library ,
2022-03-26 02:48:11 +00:00
options . libuser ,
options . libpassword ,
2017-09-18 19:41:09 +00:00
options . live )