mirror of
https://git.nolog.cz/NoLog.cz/headline.git
synced 2025-01-31 11:53:35 +01:00
give every article ID to enable grouping changes by article
This commit is contained in:
parent
ac2ca35a56
commit
2c97d7ab69
5 changed files with 64 additions and 9 deletions
44
misc/article_id_generator.py
Normal file
44
misc/article_id_generator.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly.
|
||||||
|
# It takes a while, but it's a one-shot.
|
||||||
|
#
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
db_con = sqlite3.connect("../data/diffs.db")
|
||||||
|
db = db_con.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_article_id(uid, feed):
|
||||||
|
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
|
||||||
|
id_string = str(uid) + str(feed)
|
||||||
|
id_bytes = id_string.encode('utf-8')
|
||||||
|
article_id = hashlib.sha256(id_bytes).hexdigest()
|
||||||
|
return(article_id)
|
||||||
|
|
||||||
|
|
||||||
|
def update_diff(diff_id, article_id):
|
||||||
|
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
|
||||||
|
sql_data = (article_id, diff_id)
|
||||||
|
db.execute(sql, sql_data)
|
||||||
|
db_con.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
db.execute(
|
||||||
|
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
|
||||||
|
)
|
||||||
|
diffs = db.fetchall()
|
||||||
|
|
||||||
|
for diff in diffs:
|
||||||
|
article_id = create_article_id(diff[1], diff[2])
|
||||||
|
update_diff(diff[0], article_id)
|
||||||
|
print(article_id)
|
|
@ -6,6 +6,7 @@ import redis
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from diff_match_patch import diff_match_patch
|
from diff_match_patch import diff_match_patch
|
||||||
|
|
||||||
|
@ -34,6 +35,7 @@ db = db_con.cursor()
|
||||||
db.executescript("""
|
db.executescript("""
|
||||||
CREATE TABLE IF NOT EXISTS diffs (
|
CREATE TABLE IF NOT EXISTS diffs (
|
||||||
diff_id INTEGER PRIMARY KEY,
|
diff_id INTEGER PRIMARY KEY,
|
||||||
|
article_id TEXT,
|
||||||
feed_name TEXT NOT NULL,
|
feed_name TEXT NOT NULL,
|
||||||
article_url TEXT NOT NULL,
|
article_url TEXT NOT NULL,
|
||||||
title_orig TEXT NOT NULL,
|
title_orig TEXT NOT NULL,
|
||||||
|
@ -84,8 +86,8 @@ def process_diff(old, new, rss_id):
|
||||||
# print(old['link'])
|
# print(old['link'])
|
||||||
# print(diff)
|
# print(diff)
|
||||||
|
|
||||||
sql = "INSERT INTO diffs(feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))"
|
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))"
|
||||||
sql_data = (old['medium'], old['link'], old['title'], new['title'], html_diff)
|
sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff)
|
||||||
db.execute(sql, sql_data)
|
db.execute(sql, sql_data)
|
||||||
db_con.commit()
|
db_con.commit()
|
||||||
|
|
||||||
|
@ -108,7 +110,12 @@ def process_item(article, rc):
|
||||||
# Article is new, just create it and exit
|
# Article is new, just create it and exit
|
||||||
write_article(article, rc)
|
write_article(article, rc)
|
||||||
|
|
||||||
|
def create_article_id(uid, feed):
|
||||||
|
# Create a unique ID from RSS unique tag and feed name to reference the article in database
|
||||||
|
id_string = str(uid) + str(feed)
|
||||||
|
id_bytes = id_string.encode('utf-8')
|
||||||
|
article_id = hashlib.sha256(id_bytes).hexdigest()
|
||||||
|
return(article_id)
|
||||||
|
|
||||||
|
|
||||||
for feed in config['feeds']:
|
for feed in config['feeds']:
|
||||||
|
@ -123,11 +130,13 @@ for feed in config['feeds']:
|
||||||
try:
|
try:
|
||||||
rss_id = item[unique_tag]
|
rss_id = item[unique_tag]
|
||||||
title = item['title']
|
title = item['title']
|
||||||
|
article_id = create_article_id(rss_id, name)
|
||||||
#description = item['description'] ## Don't store description for now, as we don't need it and it's big.
|
#description = item['description'] ## Don't store description for now, as we don't need it and it's big.
|
||||||
published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed'])
|
published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed'])
|
||||||
link = item['link']
|
link = item['link']
|
||||||
article_data = {
|
article_data = {
|
||||||
'title' : title,
|
'title' : title,
|
||||||
|
'article_id': article_id,
|
||||||
#'description': description,
|
#'description': description,
|
||||||
'published' : published,
|
'published' : published,
|
||||||
'link' : link,
|
'link' : link,
|
||||||
|
|
|
@ -80,13 +80,14 @@ def index():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/article/<path:article_url>")
|
@app.route("/article/<path:article_id>")
|
||||||
def article_detail(article_url: str):
|
def article_detail(article_id: str):
|
||||||
db = get_db().cursor()
|
db = get_db().cursor()
|
||||||
db.execute("SELECT * FROM diffs WHERE article_url = ?", (article_url,))
|
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
|
||||||
result = db.fetchall()
|
result = db.fetchall()
|
||||||
|
article_url = result[0]['article_url']
|
||||||
# TODO: Handle if nothing is found and return 404 in that case.
|
# TODO: Handle if nothing is found and return 404 in that case.
|
||||||
return render_template("article_detail.html", article_url=article_url, diffs=result )
|
return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result )
|
||||||
|
|
||||||
|
|
||||||
@app.route('/about')
|
@app.route('/about')
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
|
|
||||||
{% block body %}
|
{% block body %}
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<h1>Diffs for the article at {{ article_url }}</h1>
|
<h1>Diffs for the article at <a href="{{ article_url }}">{{ article_url|truncate(50) }}</a></h1>
|
||||||
|
<br>
|
||||||
|
|
||||||
<ol class="diffs-list">
|
<ol class="diffs-list">
|
||||||
{% for diff in diffs %}
|
{% for diff in diffs %}
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 6V8H5V19H16V14H18V20C18 20.5523 17.5523 21 17 21H4C3.44772 21 3 20.5523 3 20V7C3 6.44772 3.44772 6 4 6H10ZM21 3V11H19L18.9999 6.413L11.2071 14.2071L9.79289 12.7929L17.5849 5H13V3H21Z"></path></svg>
|
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 6V8H5V19H16V14H18V20C18 20.5523 17.5523 21 17 21H4C3.44772 21 3 20.5523 3 20V7C3 6.44772 3.44772 6 4 6H10ZM21 3V11H19L18.9999 6.413L11.2071 14.2071L9.79289 12.7929L17.5849 5H13V3H21Z"></path></svg>
|
||||||
Display current article
|
Display current article
|
||||||
</a>
|
</a>
|
||||||
<a class="changeset-action" href="/article/{{ diff.article_url }}">
|
<a class="changeset-action" href="/article/{{ diff.article_id }}">
|
||||||
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM13 12H17V14H11V7H13V12Z"></path></svg>
|
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM13 12H17V14H11V7H13V12Z"></path></svg>
|
||||||
Show change history
|
Show change history
|
||||||
</a>
|
</a>
|
||||||
|
|
Loading…
Reference in a new issue