From 9800a0825d509a1f972348238cfbec683db38e67 Mon Sep 17 00:00:00 2001 From: bain Date: Wed, 16 Aug 2023 10:28:20 +0200 Subject: [PATCH] Add full-text search to the main page. The user can now search for parts of the changed headline. The search uses each word (text separated by space) as a separate query (logical OR). The user can use quotes to specify that they want to keep multiple words as a single query. --- processor/Dockerfile | 5 +++-- processor/app.py | 36 +++++++++++++++++++++++++++++++++--- view/Dockerfile | 5 +++-- view/app.py | 35 ++++++++++++++++++++++++++++------- view/templates/index.html | 8 +++++++- 5 files changed, 74 insertions(+), 15 deletions(-) diff --git a/processor/Dockerfile b/processor/Dockerfile index f58c41a..1b44e59 100644 --- a/processor/Dockerfile +++ b/processor/Dockerfile @@ -1,7 +1,8 @@ FROM python:3.9-alpine -ADD . /app/ +COPY ./requirements.txt /app/requirements.txt WORKDIR /app RUN pip install -r requirements.txt +ADD . /app/ COPY crontab /etc/cron.d/crontab RUN crontab /etc/cron.d/crontab -CMD ["crond", "-f"] \ No newline at end of file +CMD ["crond", "-f"] diff --git a/processor/app.py b/processor/app.py index 783dc97..b13b2c7 100644 --- a/processor/app.py +++ b/processor/app.py @@ -31,7 +31,8 @@ rc = redis.Redis(host='redis', port=6379, db=0) db_con = sqlite3.connect("../data/diffs.db") db = db_con.cursor() -db.execute("""CREATE TABLE IF NOT EXISTS diffs ( +db.executescript(""" +CREATE TABLE IF NOT EXISTS diffs ( diff_id INTEGER PRIMARY KEY, feed_name TEXT NOT NULL, article_url TEXT NOT NULL, @@ -39,8 +40,37 @@ db.execute("""CREATE TABLE IF NOT EXISTS diffs ( title_new TEXT NOT NULL, diff_html TEXT NOT NULL, diff_time TEXT -);""") +); +CREATE VIRTUAL TABLE IF NOT EXISTS diffs_fts USING fts5( + title_orig, + title_new, + content="diffs", + content_rowid="diff_id", + tokenize="trigram case_sensitive 0" +); +-- rebuild search index +-- useful when creating the table, or when it is externally updated +INSERT INTO diffs_fts(diffs_fts) VALUES ('rebuild'); + +CREATE TRIGGER IF NOT EXISTS diffs_ainsert AFTER INSERT ON diffs + BEGIN + INSERT INTO diffs_fts (rowid, title_orig, title_new) + VALUES (new.diff_id, new.title_orig, new.title_new); + END; +CREATE TRIGGER IF NOT EXISTS diffs_adelete AFTER DELETE ON diffs + BEGIN + INSERT INTO diffs_fts (diffs_fts, rowid, title_orig, title_new) + VALUES ('delete', old.diff_id, old.title_orig, old.title_new); + END; +CREATE TRIGGER IF NOT EXISTS diffs_aupdate AFTER UPDATE ON diffs + BEGIN + INSERT INTO diffs_fts (diffs_fts, rowid, title_orig, title_new) + VALUES ('delete', old.diff_id, old.title_orig, old.title_new); + INSERT INTO diffs_fts (rowid, title_orig, title_new) + VALUES (new.diff_id, new.title_orig, new.title_new); + END; +""") article_count = 0 def write_article(article, rc): @@ -108,4 +138,4 @@ for feed in config['feeds']: article_count += 1 process_item(article, rc) -print("Processed articles: " + str(article_count)) \ No newline at end of file +print("Processed articles: " + str(article_count)) diff --git a/view/Dockerfile b/view/Dockerfile index 8121e0e..21bed1e 100644 --- a/view/Dockerfile +++ b/view/Dockerfile @@ -1,4 +1,5 @@ FROM python:slim-bullseye -ADD . /app/ +COPY ./requirements.txt /app/requirements.txt WORKDIR /app -RUN pip install -r requirements.txt \ No newline at end of file +RUN pip install -r requirements.txt +ADD . /app/ diff --git a/view/app.py b/view/app.py index 3c0baa1..079fadf 100644 --- a/view/app.py +++ b/view/app.py @@ -4,6 +4,7 @@ import sqlite3 from flask import Flask, request, render_template, g, send_from_directory from flask_paginate import Pagination, get_page_parameter import confuse +import re DATABASE = "../data/diffs.db" @@ -16,8 +17,6 @@ config.set_file(CONFIG_FILE) app = Flask(__name__) - - def get_db(): db = getattr(g, '_database', None) if db is None: @@ -25,6 +24,7 @@ def get_db(): db.row_factory = sqlite3.Row return db + @app.teardown_appcontext def close_connection(exception): db = getattr(g, '_database', None) @@ -32,11 +32,23 @@ def close_connection(exception): db.close() +def websearch_to_fts_query(search: str): + """ + Converts web searches into fts queries: + 'this is "a test"' -> '"this" OR "is" OR "a test"' + """ + return ' OR '.join(['"'+m.group(0)+'"' for m in re.finditer(r'(?<=")[^"]+(?=")|[^\s"]+', search)]) + @app.route('/') def index(): db = get_db().cursor() - db.execute('SELECT count(diff_id) FROM diffs') + + search = request.args.get("search", type=str, default="") + query = websearch_to_fts_query(search) if search else None + + db.execute(f"SELECT count(*) FROM diffs{'_fts(?)' if query else ''}", (query,) if query else ()) + diff_count = db.fetchall()[0][0] @@ -48,13 +60,23 @@ def index(): page_skip = pagination.skip per_page = pagination.per_page - db.execute("SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?", (per_page,page_skip)) + if query: + db.execute( + "SELECT * FROM diffs JOIN (SELECT rowid FROM diffs_fts(?)) filter ON filter.rowid = diffs.diff_id ORDER BY diff_id DESC LIMIT ? OFFSET ?", + (query,per_page,page_skip) + ) + else: + db.execute( + "SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?", + (per_page,page_skip) + ) diffs = db.fetchall() return render_template('./index.html', diffs=diffs, pagination=pagination, - diff_count = diff_count + diff_count = diff_count, + search=search, ) @@ -76,10 +98,9 @@ def feed_list(): return render_template('feeds.html', feeds=feeds) - @app.route('/robots.txt') def static_from_root(): return send_from_directory(app.static_folder, request.path[1:]) if __name__ == "__main__": - app.run(host="0.0.0.0") \ No newline at end of file + app.run(host="0.0.0.0") diff --git a/view/templates/index.html b/view/templates/index.html index eb9bec2..4dea652 100644 --- a/view/templates/index.html +++ b/view/templates/index.html @@ -27,6 +27,12 @@
+
+
+ + +
+
@@ -71,4 +77,4 @@ {% include 'parts/footer.html' %} - \ No newline at end of file +