Migrate code style to Black

This commit is contained in:
Ondřej Nývlt 2023-08-21 10:38:13 +02:00
parent 986dd93550
commit 6cccea04fb
5 changed files with 189 additions and 165 deletions

View file

@ -7,7 +7,7 @@ insert_final_newline = true
trim_trailing_whitespace = true
[*.py]
indent_style = tab
indent_style = space
indent_size = 4
[*.{html,css}]

View file

@ -1,5 +1,9 @@
{
"files.associations": {
"*.html": "jinja-html"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
}
}

View file

@ -1,7 +1,7 @@
#!/usr/bin/python3
#
# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly.
# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly.
# It takes a while, but it's a one-shot.
#
@ -12,33 +12,27 @@ db_con = sqlite3.connect("../data/diffs.db")
db = db_con.cursor()
def create_article_id(uid, feed):
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
id_string = str(uid) + str(feed)
id_bytes = id_string.encode('utf-8')
article_id = hashlib.sha256(id_bytes).hexdigest()
return(article_id)
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
id_string = str(uid) + str(feed)
id_bytes = id_string.encode("utf-8")
article_id = hashlib.sha256(id_bytes).hexdigest()
return article_id
def update_diff(diff_id, article_id):
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
sql_data = (article_id, diff_id)
db.execute(sql, sql_data)
db_con.commit()
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
sql_data = (article_id, diff_id)
db.execute(sql, sql_data)
db_con.commit()
db.execute(
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
)
diffs = db.fetchall()
for diff in diffs:
article_id = create_article_id(diff[1], diff[2])
update_diff(diff[0], article_id)
print(article_id)
article_id = create_article_id(diff[1], diff[2])
update_diff(diff[0], article_id)
print(article_id)

View file

@ -14,25 +14,26 @@ from diff_match_patch import diff_match_patch
#
# Idea block:
#
# Můžeme zkusit ke každému ID článku přidat taky hash obsahu, s tím že v začátku budeme kontrolovat jenom změnu hashe a až pak obsah stringů.
# Ale nevím jestli to bude reálně efektivnější
# Můžeme zkusit ke každému ID článku přidat taky hash obsahu, s tím že v začátku budeme kontrolovat jenom změnu hashe a až pak obsah stringů.
# Ale nevím jestli to bude reálně efektivnější
#
# Teď budeme kontrolovat jen titulky, ale postupně můžeme přidat i description článku
# Teď budeme kontrolovat jen titulky, ale postupně můžeme přidat i description článku
CONFIG_FILE = "../data/config.yaml"
REDIS_ARTICLE_EXPIRE_SEC = 604800
config = confuse.Configuration('headline', __name__)
config = confuse.Configuration("headline", __name__)
config.set_file(CONFIG_FILE)
dmp = diff_match_patch()
rc = redis.Redis(host='redis', port=6379, db=0)
rc = redis.Redis(host="redis", port=6379, db=0)
db_con = sqlite3.connect("../data/diffs.db")
db = db_con.cursor()
db.executescript("""
db.executescript(
"""
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS diffs (
@ -74,90 +75,100 @@ CREATE TRIGGER IF NOT EXISTS diffs_aupdate AFTER UPDATE ON diffs
INSERT INTO diffs_fts (rowid, title_orig, title_new)
VALUES (new.diff_id, new.title_orig, new.title_new);
END;
""")
"""
)
article_count = 0
def write_article(article, rc):
rval = json.dumps(article['content'])
rc.set(article['rss_id'], rval, ex=REDIS_ARTICLE_EXPIRE_SEC)
rval = json.dumps(article["content"])
rc.set(article["rss_id"], rval, ex=REDIS_ARTICLE_EXPIRE_SEC)
def process_diff(old, new, rss_id):
diff = dmp.diff_main(old['title'], new['title'])
dmp.diff_cleanupSemantic(diff)
html_diff = dmp.diff_prettyHtml(diff)
# print(old['link'])
# print(diff)
diff = dmp.diff_main(old["title"], new["title"])
dmp.diff_cleanupSemantic(diff)
html_diff = dmp.diff_prettyHtml(diff)
# print(old['link'])
# print(diff)
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,?,datetime('now', 'localtime'))"
sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff)
db.execute(sql, sql_data)
db_con.commit()
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,?,datetime('now', 'localtime'))"
sql_data = (
new["article_id"],
old["medium"],
old["link"],
old["title"],
new["title"],
html_diff,
)
db.execute(sql, sql_data)
db_con.commit()
return(True)
return True
def process_item(article, rc):
if rc.exists(article['rss_id']):
old = json.loads(rc.get(article['rss_id']))
new = article['content']
if old['title'] != new['title']:
# print('Article changed. World is fucked.')
diff = process_diff(old, new, article['rss_id'])
write_article(article, rc)
return(True)
else:
# Article is the same. All good!
return(True)
else:
# Article is new, just create it and exit
write_article(article, rc)
if rc.exists(article["rss_id"]):
old = json.loads(rc.get(article["rss_id"]))
new = article["content"]
if old["title"] != new["title"]:
# print('Article changed. World is fucked.')
diff = process_diff(old, new, article["rss_id"])
write_article(article, rc)
return True
else:
# Article is the same. All good!
return True
else:
# Article is new, just create it and exit
write_article(article, rc)
def create_article_id(uid, feed):
# Create a unique ID from RSS unique tag and feed name to reference the article in database
id_string = str(uid) + str(feed)
id_bytes = id_string.encode('utf-8')
article_id = hashlib.sha256(id_bytes).hexdigest()
return(article_id)
# Create a unique ID from RSS unique tag and feed name to reference the article in database
id_string = str(uid) + str(feed)
id_bytes = id_string.encode("utf-8")
article_id = hashlib.sha256(id_bytes).hexdigest()
return article_id
for feed in config['feeds']:
try:
rss_source = str(feed['rss_source'])
unique_tag = str(feed['unique_tag'])
name = str(feed['name'])
for feed in config["feeds"]:
try:
rss_source = str(feed["rss_source"])
unique_tag = str(feed["unique_tag"])
name = str(feed["name"])
rss = feedparser.parse(rss_source)
rss = feedparser.parse(rss_source)
for item in rss['entries']:
try:
rss_id = item[unique_tag]
title = item['title']
article_id = create_article_id(rss_id, name)
#description = item['description'] ## Don't store description for now, as we don't need it and it's big.
published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed'])
link = item['link']
article_data = {
'title' : title,
'article_id': article_id,
#'description': description,
'published' : published,
'link' : link,
'medium' : name
}
article = {
'rss_id' : rss_id,
'content' : article_data
}
article_count += 1
process_item(article, rc)
except Exception as e:
print("Parsing article failed")
print(e)
print(item)
except Exception as e:
print("Parsing feed failed.")
print(e)
print(feed)
pass
for item in rss["entries"]:
try:
rss_id = item[unique_tag]
title = item["title"]
article_id = create_article_id(rss_id, name)
# description = item['description'] ## Don't store description for now, as we don't need it and it's big.
published = time.strftime(
"%Y:%m:%d %H:%M:%S %Z %z", item["published_parsed"]
)
link = item["link"]
article_data = {
"title": title,
"article_id": article_id,
#'description': description,
"published": published,
"link": link,
"medium": name,
}
article = {"rss_id": rss_id, "content": article_data}
article_count += 1
process_item(article, rc)
except Exception as e:
print("Parsing article failed")
print(e)
print(item)
except Exception as e:
print("Parsing feed failed.")
print(e)
print(feed)
pass
print("Processed articles: " + str(article_count))

View file

@ -10,7 +10,7 @@ import re
DATABASE = "../data/diffs.db"
CONFIG_FILE = "../data/config.yaml"
config = confuse.Configuration('headline', __name__)
config = confuse.Configuration("headline", __name__)
config.set_file(CONFIG_FILE)
@ -18,104 +18,119 @@ app = Flask(__name__)
def get_db():
db = getattr(g, '_database', None)
if db is None:
db = g._database = sqlite3.connect(DATABASE)
db.row_factory = sqlite3.Row
return db
db = getattr(g, "_database", None)
if db is None:
db = g._database = sqlite3.connect(DATABASE)
db.row_factory = sqlite3.Row
return db
@app.teardown_appcontext
def close_connection(exception):
db = getattr(g, '_database', None)
if db is not None:
db.close()
db = getattr(g, "_database", None)
if db is not None:
db.close()
def websearch_to_fts_query(search: str):
"""
Converts web searches into fts queries:
'this is "a test"' -> '"this" OR "is" OR "a test"'
"""
return ' OR '.join(['"'+m.group(0)+'"' for m in re.finditer(r'(?<=")[^"]+(?=")|[^\s"]+', search)])
"""
Converts web searches into fts queries:
'this is "a test"' -> '"this" OR "is" OR "a test"'
"""
return " OR ".join(
[
'"' + m.group(0) + '"'
for m in re.finditer(r'(?<=")[^"]+(?=")|[^\s"]+', search)
]
)
@app.route('/')
@app.route("/")
def index():
db = get_db().cursor()
db = get_db().cursor()
search = request.args.get("search", type=str, default="")
query = websearch_to_fts_query(search) if search else None
search = request.args.get("search", type=str, default="")
query = websearch_to_fts_query(search) if search else None
# View options
expand_diffs = request.args.get("expand_diffs") is not None
# View options
expand_diffs = request.args.get("expand_diffs") is not None
db.execute(f"SELECT count(*) FROM diffs{'_fts(?)' if query else ''}", (query,) if query else ())
db.execute(
f"SELECT count(*) FROM diffs{'_fts(?)' if query else ''}",
(query,) if query else (),
)
diff_count = db.fetchall()[0][0]
diff_count = db.fetchall()[0][0]
# flask-paginate
page = request.args.get(get_page_parameter(), type=int, default=1)
#flask-paginate
page = request.args.get(get_page_parameter(), type=int, default=1)
pagination = Pagination(
page=page, total=diff_count, record_name="diffs", css_framework="bootstrap5"
)
pagination = Pagination(page=page, total=diff_count, record_name='diffs', css_framework='bootstrap5')
page_skip = pagination.skip
per_page = pagination.per_page
if query:
db.execute(
"SELECT * FROM diffs JOIN (SELECT rowid FROM diffs_fts(?)) filter ON filter.rowid = diffs.diff_id ORDER BY diff_id DESC LIMIT ? OFFSET ?",
(query, per_page, page_skip),
)
else:
db.execute(
"SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?",
(per_page, page_skip),
)
diffs = db.fetchall()
page_skip = pagination.skip
per_page = pagination.per_page
if query:
db.execute(
"SELECT * FROM diffs JOIN (SELECT rowid FROM diffs_fts(?)) filter ON filter.rowid = diffs.diff_id ORDER BY diff_id DESC LIMIT ? OFFSET ?",
(query,per_page,page_skip)
)
else:
db.execute(
"SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?",
(per_page,page_skip)
)
diffs = db.fetchall()
return render_template('./index.html',
diffs=diffs,
page=page,
pagination=pagination,
diff_count = diff_count,
search=search,
expand_diffs=expand_diffs,
)
return render_template(
"./index.html",
diffs=diffs,
page=page,
pagination=pagination,
diff_count=diff_count,
search=search,
expand_diffs=expand_diffs,
)
@app.route("/article/<path:article_id>")
def article_detail(article_id: str):
db = get_db().cursor()
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
result = db.fetchall()
article_url = result[0]['article_url']
# TODO: Handle if nothing is found and return 404 in that case.
return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result )
db = get_db().cursor()
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
result = db.fetchall()
article_url = result[0]["article_url"]
# TODO: Handle if nothing is found and return 404 in that case.
return render_template(
"article_detail.html",
article_id=article_id,
article_url=article_url,
diffs=result,
)
@app.route('/about')
@app.route("/about")
def about():
return render_template('about.html')
return render_template("about.html")
@app.route('/feeds')
@app.route("/feeds")
def feed_list():
feeds = []
for conf in config['feeds']:
feed = {
'rss_source' : str(conf['rss_source']),
'unique_tag' : str(conf['unique_tag']),
'feed_name' : str(conf['name'])
}
feeds.append(feed)
return render_template('feeds.html', feeds=feeds)
feeds = []
for conf in config["feeds"]:
feed = {
"rss_source": str(conf["rss_source"]),
"unique_tag": str(conf["unique_tag"]),
"feed_name": str(conf["name"]),
}
feeds.append(feed)
return render_template("feeds.html", feeds=feeds)
@app.route('/robots.txt')
@app.route("/robots.txt")
def static_from_root():
return send_from_directory(app.static_folder, request.path[1:])
return send_from_directory(app.static_folder or "static", request.path[1:])
if __name__ == "__main__":
app.run(host="0.0.0.0")
app.run(host="0.0.0.0")