2016-08-20 16:36:43 +00:00
from fanficfare import geturls
2016-09-07 11:04:10 +00:00
from os import listdir , remove , rename , utime , errno , devnull
2016-08-20 16:36:43 +00:00
from os . path import isfile , join
2016-09-07 11:04:10 +00:00
from subprocess import check_output , STDOUT , call
2016-08-20 16:36:43 +00:00
import logging
from optparse import OptionParser
2016-09-07 04:14:32 +00:00
import re
2016-09-07 05:39:35 +00:00
from ConfigParser import ConfigParser
2016-09-07 10:08:36 +00:00
from tempfile import mkdtemp
from shutil import rmtree
2016-08-20 16:36:43 +00:00
logging . getLogger ( " fanficfare " ) . setLevel ( logging . ERROR )
2016-09-07 05:39:35 +00:00
def touch ( fname , times = None ) :
with open ( fname , ' a ' ) :
utime ( fname , times )
2016-08-20 16:36:43 +00:00
2016-09-07 05:39:35 +00:00
ffnet = re . compile ( ' (fanfiction.net/s/ \ d*)/?.* ' )
neutral = re . compile ( ' https?://(.*) ' )
2016-09-07 11:04:10 +00:00
story_name = re . compile ( ' (.*)-.* ' )
2016-09-07 05:39:35 +00:00
2016-09-07 11:37:56 +00:00
equal_chapters = re . compile ( ' .* already contains \ d* chapters. ' )
chapter_difference = re . compile ( ' .* contains \ d* chapters, more than source: \ d*. ' )
bad_chapters = re . compile ( " .* doesn ' t contain any recognizable chapters, probably from a different source. Not updating. " )
no_url = re . compile ( ' No story URL found in epub to update. ' )
2016-09-07 05:39:35 +00:00
def parse_url ( url ) :
if ffnet . search ( url ) :
url = " www. " + ffnet . search ( url ) . group ( 1 )
elif neutral . search ( url ) :
url = neutral . search ( url ) . group ( 1 )
return url
2016-09-07 10:08:36 +00:00
def get_files ( mypath , filetype = None , fullpath = False ) :
ans = [ ]
if filetype :
ans = [ f for f in listdir ( mypath ) if isfile ( join ( mypath , f ) ) and f . endswith ( filetype ) ]
else :
ans = [ f for f in listdir ( mypath ) if isfile ( join ( mypath , f ) ) ]
if fullpath :
return [ join ( mypath , f ) for f in ans ]
else :
return ans
2016-09-07 11:37:56 +00:00
def check_regexes ( output ) :
if equal_chapters . search ( output ) :
raise ValueError ( " Issue with story, site is broken. Story likely hasn ' t updated on site yet. " )
if bad_chapters . search ( output ) :
raise ValueError ( " Something is messed up with the site or the epub. No chapters found. " )
if no_url . search ( output ) :
raise ValueError ( " No URL in epub to update from. Fix the metadata. " )
2016-09-07 05:39:35 +00:00
2016-09-07 11:04:10 +00:00
def main ( user , password , server , label , inout_file , path ) :
if path :
2016-08-20 16:36:43 +00:00
path = ' --with-library " {} " ' . format ( path )
2016-09-07 11:04:10 +00:00
try :
with open ( devnull , ' w ' ) as nullout :
call ( [ ' calibredb ' ] , stdout = nullout , stderr = nullout )
except OSError as e :
if errno == ENOENT :
print " Calibredb is not installed on this system. Cannot search the calibre library or update it. "
return
2016-09-07 05:39:35 +00:00
touch ( inout_file )
2016-08-20 16:36:43 +00:00
2016-09-07 05:39:35 +00:00
with open ( inout_file , " r " ) as fp :
2016-08-20 16:36:43 +00:00
urls = set ( [ x . replace ( " \n " , " " ) for x in fp . readlines ( ) ] )
2016-09-07 05:39:35 +00:00
with open ( inout_file , " w " ) as fp :
2016-08-20 16:36:43 +00:00
fp . write ( " " )
2016-09-07 05:39:35 +00:00
urls | = geturls . get_urls_from_imap ( server , user , password , label )
urls = set ( parse_url ( x ) for x in urls )
if len ( urls ) != 0 : print " URLs to parse: {} " . format ( " , " . join ( urls ) )
2016-08-20 16:36:43 +00:00
2016-09-07 10:08:36 +00:00
loc = mkdtemp ( )
2016-08-20 16:36:43 +00:00
2016-09-07 10:08:36 +00:00
2016-09-07 05:39:35 +00:00
for url in urls :
print " Working with url {} " . format ( url )
2016-09-07 10:08:36 +00:00
storyId = None
2016-08-20 16:36:43 +00:00
try :
2016-09-07 11:04:10 +00:00
if path :
try :
res = check_output ( ' calibredb search " Identifiers: {} " {} ' . format ( url , path ) , shell = True , stderr = STDOUT )
storyId = res
print " \t Story is in calibre with id {} " . format ( storyId )
print " \t Exporting file "
res = check_output ( ' calibredb export {} --dont-save-cover --dont-write-opf --single-dir --to-dir " {} " {} ' . format ( storyId , loc , path ) , shell = True )
cur = get_files ( loc , " .epub " , True ) [ 0 ]
print ' \t Downloading with fanficfare, updating file " {} " ' . format ( cur )
moving = " "
except :
#story is not in calibre
cur = url
moving = ' cd " {} " && ' . format ( loc )
2016-09-07 11:37:56 +00:00
res = check_output ( ' {} fanficfare -u " {} " --update-cover ' . format ( moving , cur ) , shell = True , stderr = STDOUT )
check_regexes ( res )
2016-09-07 21:11:19 +00:00
if chapter_difference . search ( res ) :
2016-09-07 11:04:10 +00:00
print " \t Forcing download update \n "
res = check_output ( ' {} fanficfare -u " {} " --force --update-cover ' . format ( moving , cur ) , shell = True , stderr = STDOUT )
2016-09-07 11:37:56 +00:00
check_regexes ( res )
2016-09-07 11:04:10 +00:00
cur = get_files ( loc , ' .epub ' , True ) [ 0 ]
if storyId :
print " \t Removing {} from library " . format ( storyId )
res = check_output ( ' calibredb remove {} {} ' . format ( storyId , path ) , shell = True , stderr = STDOUT )
print " \t Adding {} to library " . format ( cur )
res = check_output ( ' calibredb add " {} " {} ' . format ( cur , path ) , shell = True , stderr = STDOUT )
res = check_output ( ' calibredb search " Identifiers: {} " {} ' . format ( url , path ) , shell = True , stderr = STDOUT )
print " \t Added {} to library with id {} " . format ( cur , res )
remove ( cur )
else :
res = check_output ( ' cd " {} " && fanficfare -u " {} " --update-cover ' . format ( loc , url ) , shell = True , stderr = STDOUT )
2016-09-07 11:37:56 +00:00
check_regexes ( res )
2016-09-07 11:04:10 +00:00
cur = get_files ( loc , ' .epub ' , True ) [ 0 ]
name = get_files ( loc , ' .epub ' , False ) [ 0 ]
rename ( cur , name )
print " Downloaded story {} to {} " . format ( story_name . search ( name ) . group ( 1 ) , name )
2016-09-07 10:08:36 +00:00
except Exception as e :
print " Exception: {} " . format ( e )
rmtree ( loc )
loc = mkdtemp ( )
fp . write ( " {} \n " . format ( url ) )
continue
2016-09-07 11:04:10 +00:00
2016-09-07 10:08:36 +00:00
rmtree ( loc )
2016-08-20 16:36:43 +00:00
if __name__ == " __main__ " :
option_parser = OptionParser ( usage = " usage: % prog [flags] " )
option_parser . add_option ( ' -u ' , ' --user ' , action = ' store ' , dest = ' user ' , help = ' Email Account Username. Required. ' )
option_parser . add_option ( ' -p ' , ' --password ' , action = ' store ' , dest = ' password ' , help = ' Email Account Password. Required. ' )
option_parser . add_option ( ' -s ' , ' --server ' , action = ' store ' , dest = ' server ' , default = " imap.gmail.com " , help = ' Email IMAP Server. Default is " imap.gmail.com " . ' )
2016-09-07 05:39:35 +00:00
option_parser . add_option ( ' -m ' , ' --mailbox ' , action = ' store ' , dest = ' mailbox ' , default = ' INBOX ' , help = ' Email Label. Default is " INBOX " . ' )
2016-08-20 16:36:43 +00:00
2016-09-07 11:04:10 +00:00
option_parser . add_option ( ' -l ' , ' --library ' , action = ' store ' , dest = ' library ' , help = " calibre library db location. If none is passed, then this merely scrapes the email and error file for new stories and downloads them into the current directory. " )
2016-08-20 16:36:43 +00:00
2016-09-07 05:39:35 +00:00
option_parser . add_option ( ' -i ' , ' --input ' , action = ' store ' , dest = ' input ' , default = " ./fanfiction.txt " , help = " Error file. Any urls that fail will be output here, and file will be read to find any urls that failed previously. If file does not exist will create. File is overwitten every time the program is run. " )
2016-09-07 05:46:51 +00:00
option_parser . add_option ( ' -c ' , ' --config ' , action = ' store ' , dest = ' config ' , help = ' Config file for inputs. Blank config file is provided. No default. If an option is present in whatever config file is passed it, the option will overwrite whatever is passed in through command line arguments unless the option is blank. Do not put any quotation marks in the options. ' )
2016-09-07 04:14:32 +00:00
2016-08-20 16:36:43 +00:00
( options , args ) = option_parser . parse_args ( )
2016-09-07 04:14:32 +00:00
if options . config :
2016-09-07 11:44:50 +00:00
touch ( options . config )
2016-09-07 05:39:35 +00:00
config = ConfigParser ( allow_no_value = True )
config . read ( options . config )
updater = lambda option , newval : newval if newval != " " else option
2016-09-07 11:44:50 +00:00
try : options . user = updater ( options . user , config . get ( ' login ' , ' user ' ) . strip ( ) )
except : pass
2016-09-07 05:39:35 +00:00
2016-09-07 11:44:50 +00:00
try : options . password = updater ( options . password , config . get ( ' login ' , ' password ' ) . strip ( ) )
except : pass
2016-09-07 05:39:35 +00:00
2016-09-07 11:44:50 +00:00
try : options . server = updater ( options . server , config . get ( ' login ' , ' server ' ) . strip ( ) )
except : pass
2016-09-07 05:39:35 +00:00
2016-09-07 11:44:50 +00:00
try : options . mailbox = updater ( options . mailbox , config . get ( ' login ' , ' mailbox ' ) . strip ( ) )
except : pass
2016-09-07 05:39:35 +00:00
2016-09-07 11:44:50 +00:00
try : options . library = updater ( options . library , config . get ( ' locations ' , ' library ' ) . strip ( ) )
except : pass
2016-09-07 05:39:35 +00:00
2016-09-07 11:44:50 +00:00
try : options . input = updater ( options . input , config . get ( ' locations ' , ' input ' ) . strip ( ) )
except : pass
2016-09-07 05:39:35 +00:00
2016-09-07 04:14:32 +00:00
if not ( options . user or options . password ) :
2016-08-20 16:36:43 +00:00
raise ValueError ( " User or Password not given " )
2016-09-07 05:39:35 +00:00
main ( options . user , options . password , options . server , options . mailbox , options . input , options . library )
2016-08-20 16:36:43 +00:00