Add full-text search to the main page. The user can now search for parts

of the changed headline. The search uses each word (text separated by
space) as a separate query (logical OR). The user can use quotes to
specify that they want to keep multiple words as a single query.
This commit is contained in:
bain 2023-08-16 10:28:20 +02:00
parent f485239bf3
commit 9800a0825d
5 changed files with 74 additions and 15 deletions

View file

@ -1,7 +1,8 @@
FROM python:3.9-alpine FROM python:3.9-alpine
ADD . /app/ COPY ./requirements.txt /app/requirements.txt
WORKDIR /app WORKDIR /app
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
ADD . /app/
COPY crontab /etc/cron.d/crontab COPY crontab /etc/cron.d/crontab
RUN crontab /etc/cron.d/crontab RUN crontab /etc/cron.d/crontab
CMD ["crond", "-f"] CMD ["crond", "-f"]

View file

@ -31,7 +31,8 @@ rc = redis.Redis(host='redis', port=6379, db=0)
db_con = sqlite3.connect("../data/diffs.db") db_con = sqlite3.connect("../data/diffs.db")
db = db_con.cursor() db = db_con.cursor()
db.execute("""CREATE TABLE IF NOT EXISTS diffs ( db.executescript("""
CREATE TABLE IF NOT EXISTS diffs (
diff_id INTEGER PRIMARY KEY, diff_id INTEGER PRIMARY KEY,
feed_name TEXT NOT NULL, feed_name TEXT NOT NULL,
article_url TEXT NOT NULL, article_url TEXT NOT NULL,
@ -39,8 +40,37 @@ db.execute("""CREATE TABLE IF NOT EXISTS diffs (
title_new TEXT NOT NULL, title_new TEXT NOT NULL,
diff_html TEXT NOT NULL, diff_html TEXT NOT NULL,
diff_time TEXT diff_time TEXT
);""") );
CREATE VIRTUAL TABLE IF NOT EXISTS diffs_fts USING fts5(
title_orig,
title_new,
content="diffs",
content_rowid="diff_id",
tokenize="trigram case_sensitive 0"
);
-- rebuild search index
-- useful when creating the table, or when it is externally updated
INSERT INTO diffs_fts(diffs_fts) VALUES ('rebuild');
CREATE TRIGGER IF NOT EXISTS diffs_ainsert AFTER INSERT ON diffs
BEGIN
INSERT INTO diffs_fts (rowid, title_orig, title_new)
VALUES (new.diff_id, new.title_orig, new.title_new);
END;
CREATE TRIGGER IF NOT EXISTS diffs_adelete AFTER DELETE ON diffs
BEGIN
INSERT INTO diffs_fts (diffs_fts, rowid, title_orig, title_new)
VALUES ('delete', old.diff_id, old.title_orig, old.title_new);
END;
CREATE TRIGGER IF NOT EXISTS diffs_aupdate AFTER UPDATE ON diffs
BEGIN
INSERT INTO diffs_fts (diffs_fts, rowid, title_orig, title_new)
VALUES ('delete', old.diff_id, old.title_orig, old.title_new);
INSERT INTO diffs_fts (rowid, title_orig, title_new)
VALUES (new.diff_id, new.title_orig, new.title_new);
END;
""")
article_count = 0 article_count = 0
def write_article(article, rc): def write_article(article, rc):

View file

@ -1,4 +1,5 @@
FROM python:slim-bullseye FROM python:slim-bullseye
ADD . /app/ COPY ./requirements.txt /app/requirements.txt
WORKDIR /app WORKDIR /app
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
ADD . /app/

View file

@ -4,6 +4,7 @@ import sqlite3
from flask import Flask, request, render_template, g, send_from_directory from flask import Flask, request, render_template, g, send_from_directory
from flask_paginate import Pagination, get_page_parameter from flask_paginate import Pagination, get_page_parameter
import confuse import confuse
import re
DATABASE = "../data/diffs.db" DATABASE = "../data/diffs.db"
@ -16,8 +17,6 @@ config.set_file(CONFIG_FILE)
app = Flask(__name__) app = Flask(__name__)
def get_db(): def get_db():
db = getattr(g, '_database', None) db = getattr(g, '_database', None)
if db is None: if db is None:
@ -25,6 +24,7 @@ def get_db():
db.row_factory = sqlite3.Row db.row_factory = sqlite3.Row
return db return db
@app.teardown_appcontext @app.teardown_appcontext
def close_connection(exception): def close_connection(exception):
db = getattr(g, '_database', None) db = getattr(g, '_database', None)
@ -32,11 +32,23 @@ def close_connection(exception):
db.close() db.close()
def websearch_to_fts_query(search: str):
"""
Converts web searches into fts queries:
'this is "a test"' -> '"this" OR "is" OR "a test"'
"""
return ' OR '.join(['"'+m.group(0)+'"' for m in re.finditer(r'(?<=")[^"]+(?=")|[^\s"]+', search)])
@app.route('/') @app.route('/')
def index(): def index():
db = get_db().cursor() db = get_db().cursor()
db.execute('SELECT count(diff_id) FROM diffs')
search = request.args.get("search", type=str, default="")
query = websearch_to_fts_query(search) if search else None
db.execute(f"SELECT count(*) FROM diffs{'_fts(?)' if query else ''}", (query,) if query else ())
diff_count = db.fetchall()[0][0] diff_count = db.fetchall()[0][0]
@ -48,13 +60,23 @@ def index():
page_skip = pagination.skip page_skip = pagination.skip
per_page = pagination.per_page per_page = pagination.per_page
db.execute("SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?", (per_page,page_skip)) if query:
db.execute(
"SELECT * FROM diffs JOIN (SELECT rowid FROM diffs_fts(?)) filter ON filter.rowid = diffs.diff_id ORDER BY diff_id DESC LIMIT ? OFFSET ?",
(query,per_page,page_skip)
)
else:
db.execute(
"SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?",
(per_page,page_skip)
)
diffs = db.fetchall() diffs = db.fetchall()
return render_template('./index.html', return render_template('./index.html',
diffs=diffs, diffs=diffs,
pagination=pagination, pagination=pagination,
diff_count = diff_count diff_count = diff_count,
search=search,
) )
@ -76,7 +98,6 @@ def feed_list():
return render_template('feeds.html', feeds=feeds) return render_template('feeds.html', feeds=feeds)
@app.route('/robots.txt') @app.route('/robots.txt')
def static_from_root(): def static_from_root():
return send_from_directory(app.static_folder, request.path[1:]) return send_from_directory(app.static_folder, request.path[1:])

View file

@ -27,6 +27,12 @@
<body> <body>
<div class="container"> <div class="container">
<form>
<div class="d-flex">
<input class="m-2 form-control" type="text" id="search" name="search" value="{{ search|e }}" />
<input class="m-2 btn btn-primary" type="submit" formenctype="application/x-www-form-urlencoded" formmethod="get" value="Hledat" />
</div>
</form>
<div class="table-responsive"> <div class="table-responsive">
<table class="table table-hover"> <table class="table table-hover">
<thead> <thead>