Check for language of work, added config option for this, added comments to code.

This commit is contained in:
samerbam 2023-06-12 14:17:08 -04:00
parent 5ffee95b58
commit feef1186b5
2 changed files with 33 additions and 10 deletions

BIN
root/app/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -1,30 +1,51 @@
import feedparser
import lxml.html
from optparse import OptionParser
from configparser import ConfigParser
def main(config):
# Grab content from RSS Feed using link in config file passed in with -c runtime option under header [locations] value "feed"
feed = feedparser.parse(config.get('locations', 'feed'))
# print(len(feed.entries))
# print(feed.entries[0].link)
# Load existing urls in file located at path in config file under header [locations] value "output"
with open(config.get('locations', 'output'), 'r') as file:
lines = [line.rstrip() for line in file]
# print(lines)
# Keep track of how many works are added to config file (Just for logging purposes)
newworks = 0
with open(config.get('locations', 'output'), 'a') as f: #input=/config/fanfiction_file #output=/output/fanfiction_file
#/Users/sam/Desktop/workspace/FFFRssLinkGrabber/root/config.default/fanfiction_file
for entry in feed.entries:
if entry.link.replace('https://', '') not in lines:
newworks += 1
f.write(f"{entry.link.replace('https://', '')}\n")
# Open file to store urls in using append mode. Getting ready to add new urls
with open(config.get('locations', 'output'), 'a') as f:
#input=/config/fanfiction_file #output=/output/fanfiction_file
#/Users/sam/Desktop/workspace/FFFRssLinkGrabber/root/config.default/fanfiction_file
# iterate through rss feed entries
for entry in feed.entries:
# parse summary html from rss feed entry.
parsedSummary = lxml.html.fragment_fromstring(entry.summary, create_parent='div')
# check if work is in english
if True in ["Language: English" in x for x in parsedSummary.xpath("/div//p/text()")]:
# remove https:// from url, (requirement by FFF which is reading urls from url store file)
if entry.link.replace('https://', '') not in lines:
# count the url as added
newworks += 1
# write url to url store file
f.write(f"{entry.link.replace('https://', '')}\n")
# Log how many new urls are added to url store file
print(f"Added {newworks} works out of {len(feed.entries)} works to list.")
if __name__ == "__main__":
# command line option parser, adds -c option for config file
option_parser = OptionParser(usage="usage: %prog [flags]")
option_parser.add_option(
'-c',
@ -35,8 +56,10 @@ if __name__ == "__main__":
(options, args) = option_parser.parse_args()
# read config file using ConfigParser and assign to config variable
if options.config:
config = ConfigParser(allow_no_value=True)
config.read(options.config)
main(config)
# call main function passing config file in
main(config) #run program with python3 runner_notify.py -c /path/to/config.ini