From 9918da68eb8ec300eb3c2ec100614fbae3a8c1ef Mon Sep 17 00:00:00 2001 From: mdivecky Date: Wed, 16 Aug 2023 12:41:29 +0200 Subject: [PATCH] add dirty exceptions to skip broken RSS feeds --- processor/app.py | 57 +++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/processor/app.py b/processor/app.py index b13b2c7..58c5bac 100644 --- a/processor/app.py +++ b/processor/app.py @@ -112,30 +112,41 @@ def process_item(article, rc): for feed in config['feeds']: - rss_source = str(feed['rss_source']) - unique_tag = str(feed['unique_tag']) - name = str(feed['name']) + try: + rss_source = str(feed['rss_source']) + unique_tag = str(feed['unique_tag']) + name = str(feed['name']) - rss = feedparser.parse(rss_source) + rss = feedparser.parse(rss_source) - for item in rss['entries']: - rss_id = item[unique_tag] - title = item['title'] - #description = item['description'] ## Don't store description for now, as we don't need it and it's big. - published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed']) - link = item['link'] - article_data = { - 'title' : title, - #'description': description, - 'published' : published, - 'link' : link, - 'medium' : name - } - article = { - 'rss_id' : rss_id, - 'content' : article_data - } - article_count += 1 - process_item(article, rc) + for item in rss['entries']: + try: + rss_id = item[unique_tag] + title = item['title'] + #description = item['description'] ## Don't store description for now, as we don't need it and it's big. + published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed']) + link = item['link'] + article_data = { + 'title' : title, + #'description': description, + 'published' : published, + 'link' : link, + 'medium' : name + } + article = { + 'rss_id' : rss_id, + 'content' : article_data + } + article_count += 1 + process_item(article, rc) + except Exception as e: + print("Parsing article failed") + print(e) + print(item) + except Exception as e: + print("Parsing feed failed.") + print(e) + print(feed) + pass print("Processed articles: " + str(article_count))