diff --git a/root/config.default/defaults.ini b/root/config.default/defaults.ini
index b45ac5e..1328827 100644
--- a/root/config.default/defaults.ini
+++ b/root/config.default/defaults.ini
@@ -1,4 +1,4 @@
-# Copyright 2015 Fanficdownloader team, 2016 FanFicFare team
+# Copyright 2015 Fanficdownloader team, 2021 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@
## titlepage_entries: category,genre, status,dateUpdated,rating
## [epub]
## # overrides defaults & site section
-## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
+## titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated
## [www.whofic.com:epub]
## # overrides defaults, site section & format section
## titlepage_entries: category,genre, status,datePublished
@@ -34,7 +34,7 @@
## titlepage_entries: category
## Some sites also require the user to confirm they are adult for
-## adult content. Uncomment by removing '#' in front of is_adult.
+## adult content. Defaults to false.
is_adult:true
## All available titlepage_entries and the label used for them:
@@ -70,11 +70,11 @@ dateUpdated_label:Updated
dateCreated_label:Packaged
## Rating depends on the site. Some use K,T,M,etc, and some PG,R,NC-17
rating_label:Rating
-## Also depends on the site.
+## Also depends on the site.
warnings_label:Warnings
numChapters_label:Chapters
numWords_label:Words
-## www.fanfiction.net, fictionalley.com, etc.
+## www.fanfiction.net, fictionalley-archive.com, etc.
site_label:Publisher
## ffnet, fpcom, etc.
siteabbrev_label:Site Abbrev
@@ -84,9 +84,17 @@ authorId_label:Author ID
## Primarily to put specific values in dc:subject tags for epub. Will
## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags
-## The version of fanficdownloader
+## The version of FanFicFare
version_label:Downloader Version
+## The metadata entry 'site' predates Calibre integration, and was
+## later labeled Publisher and used to fill Calibre's Publisher field.
+## Because users keep expecting it to be 'publisher' instead,
+## 'publisher' is now a copy of 'site' and the Calibre plugin now uses
+## 'publisher' instead. Default titlepage_entries are also changed.
+include_in_publisher:site
+publisher_label:Publisher
+
## Date formats used by FanFicFare. Published and Update don't have time.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
@@ -94,12 +102,15 @@ dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d
dateUpdated_format:%%Y-%%m-%%d
-## items to include in the title page
-## Empty metadata entries will *not* appear, even if in the list.
-## You can include extra text or HTML that will be included as-is in
-## the title page. Eg: titlepage_entries: ..., ,summary, ,...
+## Items to include in the title page
+## Empty metadata entries will *not* appear, unless .SHOW_EMPTY is
+## appended. Eg: titlepage_entries: ..., characters.SHOW_EMPTY,
+## ships,... will always display Characters, but only show
+## Relationships if there are any. You can include extra text or HTML
+## that will be included as-is in the title page. Eg:
+## titlepage_entries: ..., ,summary, ,...
## All current formats already include title and author.
-titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description
+titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,publisher,description
## Try to collect series name and number of this story in series.
## Some sites (ab)use 'series' for reading lists and personal
@@ -121,11 +132,50 @@ include_tocpage: true
## explicitly set the encoding and order if you need to. The special
## value 'auto' will call chardet and use the encoding it reports if
## it has +90% confidence. 'auto' is not reliable.
-#website_encodings: auto, utf8, Windows-1252
+#website_encodings: utf8, Windows-1252, iso-8859-1
+## For sites (or individual stories) with problematic characters you
+## can include ':ignore' after the encoding. This will discard
+## unrecognized characters, but likely also prevent the rest of the
+## encoding list from ever being used.
+#website_encodings: utf8:ignore, Windows-1252, iso-8859-1
+
+## When using 'auto' in website_encodings, you can tweak the
+## confidence required to use the chardet detected.
+#chardet_confidence_limit:0.9
+
+## python string Template, string with ${title}, ${author} etc, same as titlepage_entries
+## Can include directories.
+#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}
+#output_filename: books/${formatname}/${siteabbrev}/${authorId}/${title}-${siteabbrev}_${storyId}${formatext}
+output_filename: ${title}-${siteabbrev}_${storyId}${formatext}
+
+## Make directories as needed.
+make_directories: true
+
+## Always overwrite output files. Otherwise, the downloader checks
+## the timestamp on the existing file and only overwrites if the story
+## has been updated more recently. Command line version only
+#always_overwrite: true
+
+## put output (with output_filename) in a zip file zip_filename.
+zip_output: false
+
+## Can include directories.
+zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip
+
+## Normally, try to make the filenames 'safe' by removing invalid
+## filename chars. Applies to default_cover_image, force_cover_image,
+## output_filename & zip_filename.
+allow_unsafe_filename: false
+
+## The regex pattern of 'unsafe' filename chars for above. First
+## character . OR any one or more characters that are NOT a letter,
+## number, or one of _. []()&'-
+output_filename_safepattern:(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)
## entries to make epub subjects and calibre tags
## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d"
-include_subject_tags: extratags, genre, category, characters, ships, status
+include_subject_tags: extratags, genre, category, characters, ships, lastupdate, status
## extra tags (comma separated) to include, primarily for epub.
extratags: FanFiction
@@ -139,9 +189,14 @@ extratags: FanFiction
#extraships:
#extrawarnings:
-## Add this to genre if there's more than one category.
+## Add this to *genre* if there's more than one category. Applied
+## *before* genre replace_metadata/include/exclude_metadata
#add_genre_when_multi_category: Crossover
+## Add this category if there's more than one category(before this is added).
+## Applied *after* category replace_metadata/include/exclude_metadata
+#add_category_when_multi_category: Crossover
+
## default_value_(entry) can be used to set the value for a metadata
## entry when no value has been found on the site. For example, some
## sites doesn't have a status metadatum. If uncommented, this will
@@ -150,8 +205,10 @@ extratags: FanFiction
## Can also be used for other metadata values
#default_value_category:FanFiction
-## number of seconds to sleep between calls to the story site. May by
+## number of seconds to sleep between calls to the story site. May be
## useful if pulling large numbers of stories or if the site is slow.
+## The actual sleep time used on each request is a random number
+## between 0.5 and 1.5 times slow_down_sleep_time.
#slow_down_sleep_time:0.5
## How long to wait for each HTTP connection to finish. Longer times
@@ -159,6 +216,32 @@ extratags: FanFiction
## prevent excessive wait when your network or the site is down.
connect_timeout:60.0
+## For use only with CLI version--run a command on the generated file
+## after it's produced. All of the titlepage_entries values are
+## available, plus output_filename.
+#post_process_cmd: addbook -f "${output_filename}" -t "${title}"
+
+## Some operating systems and command shells have problems with some
+## characters. When set, post_process_safepattern will be applied to
+## each metadata item passed to post_process_cmd before it's called.
+#post_process_safepattern:(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)
+
+## For use only with CLI version--run a command *before* the output
+## file is written. All of the titlepage_entries values are
+## available, (but not output_filename). Can be used to generate
+## cover images that are then included in the output ebook using
+## default_cover_image.
+#pre_process_cmd:tenprintcover.py --author "${author}" --title "${title}" --cover cover.png
+
+## Some operating systems and command shells have problems with some
+## characters. When set, pre_process_safepattern will be applied to
+## each metadata item passed to pre_process_cmd before it's called.
+#pre_process_safepattern:(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)
+
+## For use only with CLI version--display a simple progress bar while
+## downloading--one dot(.) per network fetch. Same as using --progressbar
+#progressbar:false
+
## Use regular expressions to find and replace (or remove) metadata.
## For example, you could change Sci-Fi=>SF, remove *-Centered tags,
## etc. See http://docs.python.org/library/re.html (look for re.sub)
@@ -183,12 +266,12 @@ connect_timeout:60.0
## calibre_author: calibre_author_LIST=>^(.{,100}).*$=>\1
##
## You can 'split' one list item into multiple list entries by using
-## \' in the replacement string.
+## \, in the replacement string.
##
## Examples:
#replace_metadata:
# genre,category=>Sci-Fi=>SF
-# Puella Magi Madoka Magica.* => Madoka
+# Puella Magi Madoka Magica.*=>Madoka
# Comedy=>Humor
# Crossover: (.*)=>\1
# title=>(.*)Great(.*)=>\1Moderate\2
@@ -223,6 +306,39 @@ connect_timeout:60.0
## page:
## https://github.com/JimmXinu/FanFicFare/wiki/InExcludeMetadataFeature
+## When set true, conditionals for both replace_metadata and
+## Include/Exclude metadata will check against each list value rather
+## than the entire list as a string which was the case prior to this
+## change (~Dec 2018). replace_metadata conditionals (after &&) can
+## also now use ==, !=, =~ and !~. => is the same as =~ .)
+## Set false to get the old behavior. You can also compare the list
+## as string by using _LIST, such as category_LIST
+conditionals_use_lists:true
+
+## You can exclude chapters from a story by listing their chapter URLs
+## in ignore_chapter_url_list. Chapter URLs will be normalized before
+## being used, so you can use either longer or normalized chapter
+## URLs.
+## NOTE: This will cause numWords(word count) to be off for most sites.
+## This setting should be used in a specific story's section.
+## Example:
+#[https://forums.spacebattles.com/threads/lengthy-thread-name.100849/]
+#ignore_chapter_url_list:
+# https://forums.spacebattles.com/posts/10157294/
+# https://forums.spacebattles.com/posts/10157297/
+# https://forums.spacebattles.com/posts/10157299/
+# https://forums.spacebattles.com/threads/lengthy-thread-name.100849/#post-10157400
+
+## Rarely, some stories on some sites (observed on SpaceBattles) might
+## have the same chapter URL listed more than once. Set true for
+## those stories.
+#dedup_chapter_list:false
+
+## Some sites/authors/stories use several br tags for scene/section
+## breaks. When set replace_xbr_with_hr:X will cause FFF to search
+## for X or more consecutive br tags and replace them with br br hr br.
+#replace_xbr_with_hr:3
+
## Some readers don't show horizontal rule () tags correctly.
## This replaces them all with a centered '* * *'. (Note centering
## doesn't work on some devices either.)
@@ -233,32 +349,6 @@ connect_timeout:60.0
## br paragraphs with p tags while preserving scene breaks.
#replace_br_with_p: false
-## If you have the Generate Cover plugin installed, you can use the
-## generate_cover_settings parameter to intelligently decide which GC
-## setting to run. There are three parts 1) a template of which
-## metadata part(s) to look at, 2) a regular expression to match the
-## template, and 3) the name of the GC setting to use, which must
-## match exactly. Use this parameter in [defaults], or by site eg,
-## [ficwad.com]
-## Make sure to keep at least one space at the start of each line and
-## to escape % to %%, if used.
-## template => regexp to match => GC Setting to use.
-## To use this, make sure you go to the Generate Cover tab in FanFicFare
-## config and check 'Allow generate_cover_settings from personal.ini
-## to override'
-## You can check for an existing cover image in the metadata
-## cover_image. cover_image can be:
-## specific -- The site has an image that is specifically this story's cover
-## first -- The first image in the story or story desc is used as cover
-## default -- A default_cover_image was used
-## old -- There was already a cover image in the epub.
-## This is useful because GC plugin can use the existing image.
-#generate_cover_settings:
-# ${category} => Buffy:? [tT]he Vampire Slayer => BuffyCover
-# ${category} => Star Trek => StarTrekCover
-# ${cover_image} => => CompleteCover
-# ${cover_image} => (specific|first|default) => CoverWithOrigImage
-
## If set false, the summary will have all html stripped.
## Both this and include_images must be true to get images in the
## summary.
@@ -277,11 +367,22 @@ keep_summary_html:true
## Some attributes cause problems for EBook readers. By default,
## FanFicFare will remove all attributes except the ones specified
## from all tags. (The only exception is that tags will also
-## keep src, alt and longdesc attributes.)
+## keep src, alt and longdesc attributes. data-orighref is used by
+## internalize_text_links to preserve links when chapters are
+## inserted.)
## Example: To add 'style', 'title' and 'align' to the list to keep,
## in your personal.ini [defaults] put:
## add_to_keep_html_attrs:,style,title,align
-keep_html_attrs:href,name,class,id
+keep_html_attrs:href,name,class,id,colspan,rowspan,data-orighref
+
+## Some tags, notable chapter div tags from Base eFiction, have
+## class='chapter', which causes calibre convert to identify it as a
+## chapter and 'pagebreak' at that point, aka split the file, which
+## adds unexpected pagebreaks and breaks FFF update if an epub to epub
+## conversion is done. Remove class='chapter' from all tags by
+## default. Also affects previously downloaded chapters on epub
+## update.
+remove_class_chapter:true
## Tags listed here will be replaced with .
## For example: underlined text becomes
@@ -292,6 +393,18 @@ keep_html_attrs:href,name,class,id
## HTML and EPUB standards.
replace_tags_with_spans:u,big,small
+## By default, empty tags are removed as part of cleaning up the
+## source HTML. However, a few tags should be kept even if empty.
+## (Whitespace only, including is considered empty.) This
+## setting can adjust which tags are kept.
+keep_empty_tags:p,td,th
+
+## By default, script and style tags are removed from chapter text as
+## part of cleaning up the source HTML. Found several book readers
+## that didn't treat those tags in correctly. Set to empty if
+## you want to keep those tags.
+remove_tags:script,style
+
## If a chapter range was given, use this pattern for the book title.
## replace_metadata and include/exclude will be applied *after* this.
## Set to empty value to disable.
@@ -338,20 +451,27 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+(?=[^0-9]|$)
## If true, when updating an epub that already has old chapters, new
## chapters will be marked in the TOC and chapter header by using
-## chapter_title_new_pattern and chapter_title_addnew_pattern to set the chapter.
+## chapter_title_new_pattern and chapter_title_addnew_pattern to set
+## the chapter title.
+## If set to latestonly, only new chapters downloaded this time will
+## be marked (new) and existing chapters will have any (new) marks
+## removed.
+## mark_new_chapters can be true, false or latestonly
mark_new_chapters:false
## chapter title patterns use python template substitution. The
-## ${index} is the 'chapter' number and ${title} is the chapter title,
-## after applying chapter_title_strip_pattern. Those are the only
-## variables available.
+## ${number} is the 'chapter' number and ${title} is the chapter
+## title, after applying chapter_title_strip_pattern. ${index04} is
+## chapter number padded with leading zeros (mostly for internal use)
+## such as 0001. ${index} == ${number} for backward compatibility. A
+## few site adapters add additional chapter metadata.
## The basic pattern used when not using add_chapter_numbers or
## mark_new_chapters
chapter_title_def_pattern:${title}
## Pattern used with add_chapter_numbers, but not mark_new_chapters
-chapter_title_add_pattern:${index}. ${title}
+chapter_title_add_pattern:${number}. ${title}
## Pattern used with mark_new_chapters, but not add_chapter_numbers
## (new) is just text and can be changed.
@@ -359,17 +479,7 @@ chapter_title_new_pattern:(new) ${title}
## Pattern used with add_chapter_numbers and mark_new_chapters
## (new) is just text and can be changed.
-chapter_title_addnew_pattern:${index}. (new) ${title}
-
-## Uses a python template substitution. The ${title} is the default
-## title of a new anthology, in the case of a series, or
-## the first book title otherwise. This is only applied to new
-## anthologies.
-anthology_title_pattern:${title} Anthology
-
-## Add tag(s) for anthology (series) books. Set to empty to not add
-## any anthology tags.
-anthology_tags:Anthology
+chapter_title_addnew_pattern:${number}. (new) ${title}
## Reorder ships so b/a and c/b/a become a/b and a/b/c. '/' is no
## longer hard coded and can be changed and added to with
@@ -397,7 +507,7 @@ sort_ships_splits:
#keep_in_order_author:true
## User-agent
-user_agent:FFF/2.X
+user_agent:FFF/4.X
## Added for [base_xenforoforum], but can be used with other sites,
## too. Limit the 'description' to the first X *characters*
@@ -405,57 +515,266 @@ user_agent:FFF/2.X
## non-intuitive.
#description_limit:1000
+## As a work around for certain sites blocking automated downloads,
+## FFF now offers the ability to look for pages in your Chrome(or
+## Chromium-derived) or Firefox browser's cache. Requires both
+## use_browser_cache:true and browser_cache_path to be set.
+##
+## browser_cache_path needs to be set to the location of YOUR browser
+## cache. Here are a few examples for different OS. Note that all
+## have YOUR user name in them somewhere as well as a Profile name,
+## frequently Default. Make sure you have personalized (and
+## uncommented) ONE browser_cache_path setting. Also note that Chrome
+## browsers added an additional directory level, 'Cache_Data', in
+## early 2022.
+
+## Note also that browser_cache_path is recommended under [defaults],
+## but use_browser_cache should go under individual [site] sections.
+
+## windows:
+### Chrome:
+#browser_cache_path:C:\Users\YourUser\AppData\Local\Google\Chrome\User Data\Default\Cache\Cache_Data
+#browser_cache_path:C:\Users\YourUser\AppData\Local\Google\Chrome\User Data\Profile 1\Cache\Cache_Data
+### Firefox
+#browser_cache_path:C:\Users\YourUser\AppData\Local\Mozilla\Firefox\Profiles\ZjwI7Fo4.default\cache2
+
+## mac:
+### Chrome:
+#browser_cache_path:/Users/your.user/Library/Caches/Google/Chrome/Default/Cache/Cache_Data
+#browser_cache_path:/Users/your.user/Library/Caches/Google/Chrome/Profile 2/Cache/Cache_Data
+### Firefox
+#browser_cache_path:/Users/your.user/Library/Caches/Firefox/Profiles/43fkezvc.default-release/cache2
+
+## linux:
+### Chrome:
+#browser_cache_path:/home/youruser/.cache/google-chrome/Default/Cache/Cache_Data
+#browser_cache_path:/home/youruser/.cache/google-chrome/Profile 1/Cache/Cache_Data
+### Firefox
+#browser_cache_path:/home/youruser/.cache/mozilla/firefox/dk4o1y83.default-release/cache2
+
+## It's common for browser cached files to be kept for several hours
+## or more. You can limit the age of cached files FFF will use from
+## browser_cache_path with browser_cache_age_limit. Only cached files
+## that were downloaded within 'browser_cache_age_limit' hours will be
+## used. If set to -1, all cached files will be used. Note that not
+## all sites allow page caching--those will not work with the browser
+## cache feature.
+browser_cache_age_limit:4.0
+
+## If browser_cache_path is set *and* use_browser_cache:true *and*
+## use_browser_cache_only:true, then you can also set
+## open_pages_in_browser:true then FFF to attempt to open each page it
+## can't already find in browser cache in your default browser, then
+## check for it in the cache again. Note that your browser_cache_path
+## setting *must* use your default browser for this to work.
+#open_pages_in_browser:false
+
+## As a (second) work around for certain sites blocking automated
+## downloads, FFF offers the ability to request pages through nsapa's
+## fanfictionnet_ff_proxy and FlareSolverr proxy servers. See
+## https://github.com/JimmXinu/FanFicFare/wiki/ProxyFeatures for more
+## details.
+
+## FlareSolverr (https://github.com/FlareSolverr/FlareSolverr) is a
+## generic proxy that works with several otherwise blocked sites.
+## It's recommended to only set use_flaresolverr_proxy:true for
+## specific sites.
+## FlareSolverr v1 doesn't work with some sites anymore (including
+## ffnet), but FlareSolverr v2+ cannot download images.
+## use_flaresolverr_proxy:true assumes FSv2 and automatically sets
+## include_images:false
+## If you want to use FSv1 with images, you can set
+## use_flaresolverr_proxy:withimages
+## flaresolverr_proxy_timeout is in integer milliseconds
+
+#[www.fanfiction.net]
+#use_flaresolverr_proxy:true
+## option settings, these are the defaults:
+#flaresolverr_proxy_address:localhost
+#flaresolverr_proxy_port:8191
+#flaresolverr_proxy_protocol:http
+#flaresolverr_proxy_timeout:60000
+
+## Because some adapters can pull chapter URLs from human posts, the
+## odds of errors in the chapter URLs can be higher for some
+## sites/stories. You can set continue_on_chapter_error:true to
+## continue on after failing to download a chapter and instead record
+## an error message in the ebook for that chapter.
+continue_on_chapter_error:false
+
+## Append this to chapter titles that errored. Only used with
+## continue_on_chapter_error:true
+## Set empty to not mark failed chapters.
+chapter_title_error_mark:(CHAPTER ERROR)
+
+## The FFF CLI can fetch story URLs from unread emails when configured
+## to read from your IMAP mail server. The example shows GMail, but
+## other services that support IMAP can be used. GMail requires you
+## to turn on an option to enable IMAP access. Only the CLI uses these
+## options--the Calibre Plugin stores these separately.
+##
+## It's safest if you create a separate email account that you use
+## only for your story update notices. FanFicFare cannot guarantee
+## that malicious code cannot get your email password once you've
+## saved it. Use this feature at your own risk.
+##
+#imap_server:imap.gmail.com
+#imap_username:youraddress@gmail.com
+#imap_password:XXXXXXXX
+#imap_folder:INBOX
+
+## Mark mails with story URLs read:
+## imap_mark_read can be 'true', 'false'(default) or 'downloadonly'.
+##
+## If 'true', unread emails will be marked as read when
+## either CLI option --imap to list the story URLs or --download-imap
+## to download story URLs from email are used.
+##
+## If 'downloadonly', unread emails will be marked as read
+## only when CLI --download-imap to download story URLs from email are
+## used.
+##
+## If 'false', unread emails will not be marked as read.
+##
+## Only unread emails will be searched for story URLs, and only emails
+## containing valid story URLs will ever be marked read.
+##
+#imap_mark_read:true
+
+## Some authors use 'Zalgo' text--arbitrary and often excessive
+## added/combined unicode markings--to indicate 'noise' of some kind.
+## While a critical part of some languages, when over used it can also
+## cause problems for some ebook readers, such as Kindle.
+## https://stackoverflow.com/questions/6579844/how-does-zalgo-text-work
+## This setting will reduce the number 'Zalgo' characters. Set to 0
+## to remove all of them, -1 or leave undefined for no limit.
+## Note: This will also remove other 'marked' unicode characters. If
+## set to 0, café will become cafe, déjà -> deja, etc. 1 will keep
+## single marks and is the recommended setting if you use it.
+#max_zalgo:1
+
+## Apply adapter's normalize_chapterurl() to all links in chapter
+## texts, if they match the known pattern(s) for chapter URLs. As of
+## writing, base_xenforoforum, adapter_archiveofourownorg &
+## adapter_tthfanficorg implement normalize_chapterurl().
+normalize_text_links:true
+
+## Search all links in the chapter texts and convert relative links to
+## absolute links so they work from ebooks. Only works with epub and
+## html output formats.
+fix_relative_text_links:true
+
+## Search all links in chapter texts and, if they match any included
+## chapter URLs, replace them with links to the chapter in the
+## download. Only works with epub and html output formats.
+## normalize_text_links will improve URL matching considerably.
+internalize_text_links:true
+
+## Of the ~140 supported sites, only ~50 have been checked to work
+## correctly with a page cache. The page cache is used save already
+## downloaded pages which can be called more than once, especially in
+## the Calibre plugin.
+use_basic_cache:false
+
[base_efiction]
+use_basic_cache:true
+
## At the time of writing, eFiction Base adapters allow downloading
## the whole story in bulk using the 'Print' feature. If 'bulk_load'
## is set to 'true', both metadata and chapters can be loaded in one
## step
bulk_load:true
+## Extra metadata that many base_efiction sites have
+add_to_extra_valid_entries:,storynotes
+storynotes_label:Story Notes
+add_to_extra_titlepage_entries:,storynotes
+
[base_xenforoforum]
+use_basic_cache:true
+## Some sites require login for some stories
+#username:YourName
+#password:yourpassword
+
+## XenForo sites require login for some stories, but don't report that
+## to FFF. To download those, set your username, password and set
+## always_login:true
+#always_login:false
+
+## We've been requested by the site(s) admin to rein in hits. If you
+## download fewer stories less often you can likely get by with
+## reducing this sleep.
+slow_down_sleep_time:6
-cover_exclusion_regexp:/styles/
+## exclude emoji and default avatars.
+cover_exclusion_regexp:(/styles/|xenforo/avatars/avatar.*\.png|https://cdn\.jsdelivr\.net/gh/|https://cdn\.jsdelivr\.net/emojione)
+
+## use author(original poster)'s avatar as cover image when true.
+author_avatar_cover:false
## I saw lots of chapters name simply '1.1' etc during testing.
strip_chapter_numbers:false
## Copy title to tagsfromtitle for parsing tags.
-add_to_extra_valid_entries:,tagsfromtitle,forumtags
+add_to_extra_valid_entries:,tagsfromtitledetect,tagsfromtitle,forumtags,parentforums
## '.NOREPL' tells the system to *not* apply title's
## in/exclude/replace_metadata -- Only works on include_in_ lines.
-include_in_tagsfromtitle:title.NOREPL
+include_in_tagsfromtitledetect:title.NOREPL,threadmarks_title
+include_in_tagsfromtitle:tagsfromtitledetect
tagsfromtitle_label:Tags from Title
forumtags_label:Tags from Forum
+parentforums_label:Parent Forums
+keep_in_order_parentforums:true
-## might want to do this, maybe not. Will often include category, but
-## also often include non-category stuff.
-# include_in_category:tagsfromtitle
-
+## What forum a thread is in can be an indicator of it's category, but
+## most are too general to be useful; only keep the more specific
+## ones. You can replace add to this list in personal.ini
+## [base_xenforoforum], or add to it in the sites' [section].
add_to_include_metadata_pre:
-# only keep tagsfromtitle with ( or [ in.
- tagsfromtitle=~[\[\(]
+ parentforums==Worm
+ parentforums==NSFW
+ parentforums==Original Fiction
## disable chapter range in title because of tagsfromtitle processing.
title_chapter_range_pattern:
add_to_replace_metadata:
+# 'detects' if there are 'tags'. That is, [( in title. Replaces
+# previous version's include_metadata_pre Can't do on tagsfromtitle
+# because that's applied to each part after split.
+ tagsfromtitledetect=>^[^\]\)]+$=>
+# change ][ and )( to , for [AU][Othertag] etc
+ tagsfromtitle=>\] *\[=>,
+ tagsfromtitle=>\) *\(=>,
# for QuestionableQuesting NSFW subforum.
- tagsfromtitle=>^\[NSFW\].*?([\(\[]([^\]\)]+)[\)\]]).*?$=>NSFW,\2
-# remove anything outside () or []
- tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\2
+ tagsfromtitle=>^\[NSFW\].*?((?P \[)|(?P\())(?P(?(br)[^\]]|(?(pr)[^\)]))+)(?(br)\]|(?(pr)\))).*?$=>NSFW\,\g
+# remove anything outside () or []. Note \, at the end used to
+# prevent looping back so '[Worm(AU)]' becomes 'Worm(AU)' not just 'AU'
+ tagsfromtitle=>^.*?((?P \[)|(?P\())(?P(?(br)[^\]]|(?(pr)[^\)]))+)(?(br)\]|(?(pr)\))).*?$=>\g\,
# remove () []
# tagsfromtitle=>[\(\)\[\]]=>
-# change (spaces)slash(or semicolon)(spaces) to comma
- tagsfromtitle=> *[/;] *=>,
+# shield these html entities from the ';' pattern below
+ tagsfromtitle=>&(amp|lt|gt);=>&\1FFF_ENT_MARKER
+# change (spaces)slash(or semicolon, pipe or comma)(spaces) to comma
+ tagsfromtitle=> *[/;|,] *=>,
+# revert html entities
+ tagsfromtitle=>&(amp|lt|gt)FFF_ENT_MARKER=>&\1;
tagsfromtitle=> [xX] =>,
+# split commas into individual list items.
+# these will work on up to 14 splits, should be plenty.
+ tagsfromtitle=>([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),(.+)=>\1\,\2\,\3,\4\,\5\,\6
+ tagsfromtitle=>([^,]+),([^,]+),([^,]+),([^,]+),(.+)=>\1\,\2\,\3,\4\,\5
+ tagsfromtitle=>([^,]+),([^,]+),([^,]+),(.+)=>\1\,\2\,\3,\4
+ tagsfromtitle=>([^,]+),([^,]+),(.+)=>\1\,\2\,\3
+ tagsfromtitle=>([^,]+),(.+)=>\1\,\2
# remove [] or () blocks and leading/trailing spaces/dashes/colons
- title=>[-: ]*[\(\[]([^\]\)]+)[\)\]][-: ]*=>
+ title=>[-: ]*(\[([^\]]+)\]|\(([^\)]+)\))[-: ]*=>
# remove 'Thread' and the next word, usually "Thread 2", "Thread
# four", "Thread iv", "Story Thread", etc
- title,tagsfromtitle=>[-: ]*(Story *)?[Tt]hread [^ ]+[-: ]*=>
+ title,tagsfromtitle=>[-: ,]*(Story *)?[Tt]hread [^ ]+[-: ]*=>
# Normalize 'fanfiction/fanfic/fan-fiction' a little.
forumtags=>[Ff]an-?[Ff]ic(tion)?=>FanFiction
@@ -467,9 +786,13 @@ add_to_extra_titlepage_entries:,tagsfromtitle,forumtags
## replace_metadata.
capitalize_forumtags:true
-## Add both title tags and forumtags to standard (subject) tags.
-## '.SPLIT' tells the system to split by ','
-add_to_include_subject_tags:,tagsfromtitle.SPLIT,forumtags
+## Add forumtags to standard entry genre and tagsfromtitle to
+## category. Can be changed in personal.ini if desired.
+include_in_genre:genre,forumtags
+include_in_category:category,tagsfromtitle,parentforums
+
+## Previously was:
+#add_to_include_subject_tags:,tagsfromtitle.SPLIT,forumtags
## base_xenforoforum reads Published and Updated datetimes from
## Threadmarks if used, or from the posted & updated times of the
@@ -481,13 +804,6 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M
## the description.
description_limit:500
-## Because base_xenforoforum adapters can pull chapter URLs from human
-## posts, the odds of errors in the chapter URLs are vastly higher.
-## You can set continue_on_chapter_error:true to continue on after
-## failing to download a chapter and instead record an error message
-## in the ebook for that chapter.
-continue_on_chapter_error:false
-
## When given a thread URL, use threadmarks as chapter links when
## there are at least this many threadmarks. A number of older
## threads have a single threadmark to an 'index' post. Set to 1 to
@@ -503,69 +819,256 @@ first_post_title:First Post
## thread will be included as the first chapter (with chapter title
## from first_post_title) unless that post is explicitly linked to in
## the collected chapter list. First post is not included when using
-## thread marks.
+## thread marks unless threadmarked.
##
## If always_include_first_post:true, then the given or first post
## will be included as above even if it is a link in the post or even
## if threadmarks are used. Can result in a duplicated chapter.
always_include_first_post:false
+## In normal operation, when updating an existing epub, old chapters
+## will be reused as-is. Normally, that works fine, but forum stories
+## sometimes have an index post as the first 'chapter', and the
+## version in the first chapter gets out of sync.
+##
+## If always_reload_first_chapter:true, then the first chapter will
+## always be downloaded again (ie, reloaded). It will NOT be marked
+## '(new)' (see mark_new_chapters). Because it is reloaded, manual
+## edits made to the first chapter will be lost.
+##
+## While intended for base_xenforoforum sites, this setting can be
+## applied to other sites.
+always_reload_first_chapter:false
+
## In normal operation, forumtags will only be populated when
## threadmarks are used for chapters (see minimum_threadmarks above).
## When always_use_forumtags:true, always populate forumtags.
always_use_forumtags:false
-## Each output format has a section that overrides [defaults]
-[html]
+## base_xenforoforum sites sometimes have 'spoiler' blocks in
+## posts. When viewed in a browser, the block is hidden until a button
+## is clicked. eBook viewers can't handle that and the javascript is
+## disabled. The remove_spoilers option, if uncommented and set true,
+## will remove spoiler blocks entirely.
+#remove_spoilers:false
+
+## This option if uncommented and set true, will put a box around the
+## spoiler blocks with the original spoiler button text as a label
+## using fieldset and legend HTML tags. For a simple box, see the
+## add_to_output_css example for [base_xenforoforum:epub] below.
+#legend_spoilers:false
+
+## True by built-in default, but only applied if using threadmarks for
+## chapters and a 'reader' URL is found in the thread, 'reader mode'
+## will reduce the number of pages fetched by roughly 10 to 1 for a
+## full story download by using the threadmarks-only reader pages.
+#use_reader_mode:true
+
+## In case the number of posts per reader page changes. Built-in default is
+## 10.
+#reader_posts_per_page:10
+
+## xenforoforum has categories of threadmarks. This setting allows
+## you to leave out categories you don't want. Skipping categories
+## will also speed downloads as categories other than 'Threadmarks'
+## don't use Reader Mode.
+## The last known list of categories is:
+## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post
+#skip_threadmarks_categories:Staff Post
+
+## I'm not a fan of using the word 'Apocrypha' for the threadmark
+## category when everybody usually calls them Omake. Set true to
+## change Apocrypha to Omake. Applied *after*
+## skip_threadmarks_categories.
+#apocrypha_to_omake:false
+
+## If desired, you can change the order FFF places threadmark
+## categories. (This setting does not remove categories, you need to
+## use skip_threadmarks_categories for that.)
+#threadmark_category_order:Threadmarks,Sidestory,Apocrypha,Omake,Media,Informational,Staff Post
+
+## By default, FFF orders threadmarks first by category, then in the
+## order they appear as determined by the author. When
+## order_threadmarks_by_date:true, all included threadmarks
+## (skip_threadmarks_categories still works) will instead be ordered
+## by their datetime as reported in threadmarks. Different threadmark
+## categories will be mixed together, and I've seen at least one case
+## of post order on the site disagreeing with post datetime. Note
+## that if order_threadmarks_by_date_categories (see below),
+## order_threadmarks_by_date is ignored.
+#order_threadmarks_by_date:false
+
+## By default, FFF orders threadmarks first by category, then in the
+## order they appear as determined by the author. When
+## order_threadmarks_by_date_categories is set, the listed threadmark
+## categories are sorted by their datetime (as reported in
+## threadmarks), the different threadmark categories mixed together.
+## Categories *not* listed in order_threadmarks_by_date_categories
+## will all be *after* the date sorted threadmarks in category, then
+## threadmark order. Note that if
+## order_threadmarks_by_date_categories is set,
+## order_threadmarks_by_date (see above) is ignored.
+#order_threadmarks_by_date_categories:Threadmarks,Sidestory
+
+## SV & SB (and possibly QQ and AH) like to use sprite smilies --
+## smiley images that are all in one image file for performance and
+## are shown by CSS background with offsets. Epub viewers don't like
+## that.
+##
+## The sprite smiley img tags generally do have alt attributes that get
+## the point across: ":)" ":D" ":(" ":lol" etc. And not all smilies
+## on these sites use sprite images, so some can be used as is.
+##
+## When replace_failed_smilies_with_alt_text:true, any images that
+## failed to download or are src=clear.png *and* have a class containing
+## mceSmilie and an alt attribute will be replaced with:
+## (alt text)
+##
+## The smiley classes are preserved in case the user wants to do
+## something special with them.
+#replace_failed_smilies_with_alt_text:false
+
+## SB, SV & QQ (but not AH) provide word counts with threadmarks. Set
+## use_threadmark_wordcounts:true to sum and use them to fill
+## numWords. skip_threadmarks_categories and ignore_chapter_url_list
+## are applied *before* summing.
+## SB, SV & QQ base_xenforoforum sites using use_threadmark_wordcounts
+## is one of the few cases where numWords is effected by
+## ignore_chapter_url_list.
+## Note: that if always_include_first_post is true, first post will not
+## be included in the word count if not also threadmarked.
+## Note 2: Word counts from the site exclude words inside Spoiler
+## tags.
+use_threadmark_wordcounts:true
+
+## base_xenforoforum stories with threadmarks have a few additional
+## pieces of metadata available that most adapters don't. 'date';
+## 'words'--word count as number only: "12104"; and kwords--the
+## threadmark human-readable string for word count with parens:
+## "(12.1k)" These can be used with custom output (see
+## https://github.com/JimmXinu/FanFicFare/wiki/CustomOutput ) or with
+## chapter_title_*_pattern settings.
+## Examples for html & epub:
+#[base_xenforoforum:html]
+#tocpage_entry:
+# ${chapter} ${date} ${kwords}
+#[base_xenforoforum:epub]
+#tocpage_entry:
+# ${chapter} ${date} ${kwords}
+
+## The 'date' value for chapters mentioned above can be formated with
+## datethreadmark_format. Otherwise it will default to
+## dateCreated_format
+#datethreadmark_format:%%Y-%%m-%%d %%H:%%M
-## include images from img tags in the body and summary of
-## stories. Images will be converted to jpg for size if possible.
-## include_images is *only* available in epub and html output formats.
-## include_images is *not* available in the web service in any format.
-#include_images:false
+## The basic pattern used when not using add_chapter_numbers or
+## mark_new_chapters
+#chapter_title_def_pattern:${title} ${date}
-## This switch prevents FanFicFare from doing any processing on the images.
-## Usually they would be converted to jpg, resized and optionally made
-## grayscale.
-no_image_processing: true
+## Pattern used with add_chapter_numbers, but not mark_new_chapters
+#chapter_title_add_pattern:${number}. ${title} ${date}
-## output background color--only used by html and epub (and ignored in
-## epub by many readers). Included below in output_css--will be
-## ignored if not in output_css.
-background_color: ffffff
+## Pattern used with mark_new_chapters, but not add_chapter_numbers
+## (new) is just text and can be changed.
+#chapter_title_new_pattern:(new) ${title} ${date}
-## Allow customization of CSS. Make sure to keep at least one space
-## at the start of each line and to escape % to %%. Also need
-## background_color to be in the same section, if included in CSS.
-output_css:
- body { background-color: #%(background_color)s; }
- .CI {
- text-align:center;
- margin-top:0px;
- margin-bottom:0px;
- padding:0px;
- }
- .center {text-align: center;}
- .cover {text-align: center;}
- .full {width: 100%%; }
- .quarter {width: 25%%; }
- .smcap {font-variant: small-caps;}
- .u {text-decoration: underline;}
- .bold {font-weight: bold;}
- .big { font-size: larger; }
- .small { font-size: smaller; }
+## Pattern used with add_chapter_numbers and mark_new_chapters
+## (new) is just text and can be changed.
+#chapter_title_addnew_pattern:${number}. (new) ${title} ${date}
+
+## When set true, always_include_first_post_chapters will cause FFF to
+## treat both threadmarks *and* first post links as chapter links.
+#always_include_first_post_chapters:false
+
+## When reveal_invisible_text is set true, adapter will search for
+## tags and remove the style
+## attribute. Also adds class="invisible_text" for custom CSS
+## styling, see invisible_text add_to_output_css example below.
+#reveal_invisible_text:false
+
+[base_xenforoforum:epub]
+
+## See remove_spoilers above for more about 'spoilers'. This example
+## shows how to put a simple line around spoiler blocks. Uncomment
+## all three lines, keep the leading space before .bbCodeSpoilerContainer.
+#add_to_keep_html_attrs:,style
+#add_to_output_css:
+# .bbCodeSpoilerContainer { border: 1px solid black; padding: 2px; }
+
+## When reveal_invisible_text:true, you can style the class
+## invisible_text as you like for forum "invisible text". See
+## reveal_invisible_text above. This is just one example. Note that
+## if you also use the add_to_output_css example above for
+## bbCodeSpoilerContainer spoilers, you need to combine the CSS lines
+## for both under one add_to_output_css setting, with a space leading
+## each CSS line.
+#add_to_output_css:
+# .invisible_text { color:gray; }
+
+[base_xenforo2forum]
+## [base_xenforoforum] also applied, but [base_xenforo2forum] takes
+## precedence.
+
+## Some additional 'thread' metadata entries.
+add_to_extra_valid_entries:,threadmarks_title,threadmarks_description,threadmarks_status,estimatedWords
+#add_to_extra_titlepage_entries:,threadmarks_title,threadmarks_description,threadmarks_status,estimatedWords
+
+# Just to remove '_'.
+threadmarks_title_label:Threadmarks Title
+threadmarks_description_label:Threadmarks Description
+threadmarks_status_label:Threadmarks Status
+estimatedWords_label:Estimated Word Count
+
+## When use_threadmarks_description is set true,
+## threadmarks_description will be used to fill in the standard
+## description metadata entry. Some stories have poor
+## threadmarks_description, you can use this setting to avoid using
+## it.
+use_threadmarks_description:true
+## Increasing description_limit from base_xenforoforum's default of
+## 500 is also useful with use_threadmarks_description
+description_limit:1000
+
+## When use_threadmarks_status is set true, a normalized version of
+## threadmarks_status will be used to fill in the standard status
+## metadata entry. (In-Progress, Completed plus site statuses.)
+use_threadmarks_status:true
+
+## When use_threadmarks_cover is set true, the threadmarks image will
+## be used to fill in the cover image. Set true by default.
+use_threadmarks_cover:true
+
+## Sometimes there's a sticky post first (often a Staff Post?) rather
+## than the true first post by the Original Poster, AKA author. By
+## default, base_xenforo2forum will skip up to two sticky first posts.
+## Configurable due to the extremely small sample size available when
+## this was implemented.
+skip_sticky_first_posts:true
+
+## SV/SB sites include a dice roller that can attach dice roll results
+## to a post. These are outside the actual post text. Setting
+## include_dice_rolls:true will include a text version of those rolls
+## in the FFF chapter that should be usable for all ebook readers.
+## Setting include_dice_rolls:svg will keep the inline