mirror of
https://git.nolog.cz/NoLog.cz/headline.git
synced 2025-01-31 11:53:35 +01:00
give every article ID to enable grouping changes by article
This commit is contained in:
parent
ac2ca35a56
commit
2c97d7ab69
5 changed files with 64 additions and 9 deletions
44
misc/article_id_generator.py
Normal file
44
misc/article_id_generator.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
#
|
||||
# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly.
|
||||
# It takes a while, but it's a one-shot.
|
||||
#
|
||||
|
||||
import sqlite3
|
||||
import hashlib
|
||||
|
||||
db_con = sqlite3.connect("../data/diffs.db")
|
||||
db = db_con.cursor()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def create_article_id(uid, feed):
|
||||
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
|
||||
id_string = str(uid) + str(feed)
|
||||
id_bytes = id_string.encode('utf-8')
|
||||
article_id = hashlib.sha256(id_bytes).hexdigest()
|
||||
return(article_id)
|
||||
|
||||
|
||||
def update_diff(diff_id, article_id):
|
||||
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
|
||||
sql_data = (article_id, diff_id)
|
||||
db.execute(sql, sql_data)
|
||||
db_con.commit()
|
||||
|
||||
|
||||
|
||||
db.execute(
|
||||
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
|
||||
)
|
||||
diffs = db.fetchall()
|
||||
|
||||
for diff in diffs:
|
||||
article_id = create_article_id(diff[1], diff[2])
|
||||
update_diff(diff[0], article_id)
|
||||
print(article_id)
|
|
@ -6,6 +6,7 @@ import redis
|
|||
import time
|
||||
import json
|
||||
import sqlite3
|
||||
import hashlib
|
||||
|
||||
from diff_match_patch import diff_match_patch
|
||||
|
||||
|
@ -34,6 +35,7 @@ db = db_con.cursor()
|
|||
db.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS diffs (
|
||||
diff_id INTEGER PRIMARY KEY,
|
||||
article_id TEXT,
|
||||
feed_name TEXT NOT NULL,
|
||||
article_url TEXT NOT NULL,
|
||||
title_orig TEXT NOT NULL,
|
||||
|
@ -84,8 +86,8 @@ def process_diff(old, new, rss_id):
|
|||
# print(old['link'])
|
||||
# print(diff)
|
||||
|
||||
sql = "INSERT INTO diffs(feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))"
|
||||
sql_data = (old['medium'], old['link'], old['title'], new['title'], html_diff)
|
||||
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))"
|
||||
sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff)
|
||||
db.execute(sql, sql_data)
|
||||
db_con.commit()
|
||||
|
||||
|
@ -108,7 +110,12 @@ def process_item(article, rc):
|
|||
# Article is new, just create it and exit
|
||||
write_article(article, rc)
|
||||
|
||||
|
||||
def create_article_id(uid, feed):
|
||||
# Create a unique ID from RSS unique tag and feed name to reference the article in database
|
||||
id_string = str(uid) + str(feed)
|
||||
id_bytes = id_string.encode('utf-8')
|
||||
article_id = hashlib.sha256(id_bytes).hexdigest()
|
||||
return(article_id)
|
||||
|
||||
|
||||
for feed in config['feeds']:
|
||||
|
@ -123,11 +130,13 @@ for feed in config['feeds']:
|
|||
try:
|
||||
rss_id = item[unique_tag]
|
||||
title = item['title']
|
||||
article_id = create_article_id(rss_id, name)
|
||||
#description = item['description'] ## Don't store description for now, as we don't need it and it's big.
|
||||
published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed'])
|
||||
link = item['link']
|
||||
article_data = {
|
||||
'title' : title,
|
||||
'article_id': article_id,
|
||||
#'description': description,
|
||||
'published' : published,
|
||||
'link' : link,
|
||||
|
|
|
@ -80,13 +80,14 @@ def index():
|
|||
)
|
||||
|
||||
|
||||
@app.route("/article/<path:article_url>")
|
||||
def article_detail(article_url: str):
|
||||
@app.route("/article/<path:article_id>")
|
||||
def article_detail(article_id: str):
|
||||
db = get_db().cursor()
|
||||
db.execute("SELECT * FROM diffs WHERE article_url = ?", (article_url,))
|
||||
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
|
||||
result = db.fetchall()
|
||||
article_url = result[0]['article_url']
|
||||
# TODO: Handle if nothing is found and return 404 in that case.
|
||||
return render_template("article_detail.html", article_url=article_url, diffs=result )
|
||||
return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result )
|
||||
|
||||
|
||||
@app.route('/about')
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
|
||||
{% block body %}
|
||||
<div class="container">
|
||||
<h1>Diffs for the article at {{ article_url }}</h1>
|
||||
<h1>Diffs for the article at <a href="{{ article_url }}">{{ article_url|truncate(50) }}</a></h1>
|
||||
<br>
|
||||
|
||||
<ol class="diffs-list">
|
||||
{% for diff in diffs %}
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 6V8H5V19H16V14H18V20C18 20.5523 17.5523 21 17 21H4C3.44772 21 3 20.5523 3 20V7C3 6.44772 3.44772 6 4 6H10ZM21 3V11H19L18.9999 6.413L11.2071 14.2071L9.79289 12.7929L17.5849 5H13V3H21Z"></path></svg>
|
||||
Display current article
|
||||
</a>
|
||||
<a class="changeset-action" href="/article/{{ diff.article_url }}">
|
||||
<a class="changeset-action" href="/article/{{ diff.article_id }}">
|
||||
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM13 12H17V14H11V7H13V12Z"></path></svg>
|
||||
Show change history
|
||||
</a>
|
||||
|
|
Loading…
Reference in a new issue