This repository has been archived by the owner on Sep 6, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
rssingle.py
executable file
·221 lines (179 loc) · 6.94 KB
/
rssingle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python3
# Copyright (c) Dom Rodriguez 2020
# Copyright (c) Andros Fenollosa 2022
# Licensed under the Apache License 2.0
import os
import sys
import feedparser
import logging
import listparser
from os import environ
from feedgen.feed import FeedGenerator
import json
import yaml
# Variables
log = None
CONFIG_PATH = "config.yml"
LOG_LEVEL = environ.get("SR_LOG_LEVEl", "ERROR")
fg = None
FEED_OUT_PATH = None
FEEDS = []
CFG = None
def setup_logging() -> None:
"""
This function intiialises the logger framework.
"""
global log
log = logging.getLogger(__name__)
log.setLevel(LOG_LEVEL)
ch = logging.StreamHandler(sys.stderr)
ch.setLevel(LOG_LEVEL)
ch.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
log.addHandler(ch)
return None
def get_url_from_feed(config) -> str:
"""
This function returns the URL from a feed.
"""
return config["url"] + "/" + config["output"]
def init_feed() -> None:
"""
This function initialises the RSS feed with the
correct attributes.
"""
log.debug("Initialising the feed...")
global fg
try:
fg = FeedGenerator()
# Setup [root] feed attributes
fg.id(get_url_from_feed(CONFIG))
fg.title(CONFIG["title"])
fg.generator("RSSingle/v1.0.0")
fg.link(href=get_url_from_feed(CONFIG), rel="self")
fg.subtitle(CONFIG["description"])
fg.language("en")
except BaseException: # find out what exceptions FeedGenerator can cause as well as KeyError.
logging.exception("Error initialising the feed!")
log.debug("Feed initialised!")
return None
def parse_rss_feed(url) -> feedparser.FeedParserDict:
log.debug("Parsing RSS feed..")
try:
# Hopefully this should parse..
return feedparser.parse(url)
except BaseException: # find out what exceptions .parse() call can cause.
log.warning("Failed to parse RSS feed.")
# Now, we could handle gracefully.
def filter_feed_entries(entry) -> bool:
"""
This function filters feed entries based on strings defined in config.yml.
"""
filter_strings = CONFIG.get("filter_strings", [])
for filter_str in filter_strings:
if filter_str.lower() in entry.get("title", "").lower() or filter_str.lower() in entry.get("summary", "").lower():
log.debug(f"Entry filtered out: {entry['title']}")
return False
return True
def main():
log.debug("Loading feed list into memory..")
log.debug("Iterating over feed list..")
for feed in CONFIG["feeds"]:
rss = parse_rss_feed(feed)
entries = rss.get("entries")
log.debug("Iterating over [input] feed entries..")
for entry in entries[:CONFIG["max_entries"]] if "max_entries" in CONFIG else entries:
log.debug("New feed entry created.")
if not filter_feed_entries(entry):
continue # Skip this entry
fe = fg.add_entry()
log.debug("Working on new feed entry..")
try:
fe.id(entry["id"])
except KeyError:
# Definitely weird...
log.warning("Empty id attribute, defaulting..")
fe.id("about:blank")
try:
fe.title(entry["title"])
except KeyError:
# OK, this is a definite malformed feed!
log.warning("Empty title attribute, defaulting..")
fe.title("Unspecified")
try:
fe.link(href=entry["link"])
except KeyError:
# When we have a empty link attribute, this isn't ideal
# to set a default value.. :/
log.warning("Empty link attribute, defaulting..")
fe.link(href="about:blank")
try:
if entry["sources"]["authors"]:
for author in entry["sources"]["authors"]:
fe.author(author)
elif entry["authors"]:
try:
for author in entry["authors"]:
fe.author(author)
except KeyError:
log.debug("Oh dear, a malformed feed! Adjusting.")
# This is a ugly hack to fix broken feed entries with the author attribute!
author["email"] = author.pop("href")
fe.author(author)
except KeyError:
# Sometimes we don't have ANY author attributes, so we
# have to set a dummy attribute.
log.warning("Empty authors attribute, defaulting..")
fe.author({"name": "Unspecified", "email": "[email protected]"})
try:
if entry["summary"]:
fe.summary(entry["summary"])
fe.description(entry["summary"])
elif entry["description"]:
fe.description(entry["description"])
fe.summary(entry["description"])
fe.content(entry["description"])
except KeyError:
# Sometimes feeds don't provide a summary OR description, so we
# have to set an empty value.
# This is pretty useless for a feed, so hopefully we
# don't have to do it often!
log.warning("Empty description OR summary attribute, defaulting..")
fe.description("Unspecified")
fe.summary("Unspecified")
try:
if entry["published"]:
try:
fe.published(entry["published"])
fe.updated(entry["published"])
except KeyError:
fe.published("1970-01/01T00:00:00+00:00")
fe.updated("1970-01/01T00:00:00+00:00")
continue
except Exception:
# Sometimes feeds don't even provide a publish date, so we default to
# the start date &time of the Unix epoch.
log.warning("Empty publish attribute, defaulting..")
fe.published("1970-01/01T00:00:00+00:00")
fe.updated("1970-01/01T00:00:00+00:00")
if __name__ == "__main__":
setup_logging()
log.debug("Initialising...")
global CONFIG
with open("config.yml", "r") as file:
CONFIG = yaml.safe_load(file)
log.debug("Assiging variables..")
try:
# Configuration is specified with configure variables.
log.debug("Assignment attempt: output")
FEED_OUT_PATH = CONFIG["output"]
except KeyError:
log.error("*** Configure variable missing! ***")
log.error("`output` variable missing.")
log.error("This program will NOT run without that set.")
sys.exit(1)
init_feed()
log.debug("Begin processing feeds...")
main()
fg.rss_file(FEED_OUT_PATH)