diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ab1641 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +data/proc_log.txt +data/diffs.db +*.pyc \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3e05aeb --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3.8" +services: + view: + build: ./view/ + command: python app.py + ports: + - "5000:5000" + volumes: + - ./view:/app + - ./data:/data + container_name: view + restart: unless-stopped + + processor: + build: ./processor/ + volumes: + - ./processor:/app + - ./data:/data + container_name: processor + restart: unless-stopped + + redis: + image: redis:latest + restart: unless-stopped + container_name: redis_db + diff --git a/processor/Dockerfile b/processor/Dockerfile new file mode 100644 index 0000000..f58c41a --- /dev/null +++ b/processor/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.9-alpine +ADD . /app/ +WORKDIR /app +RUN pip install -r requirements.txt +COPY crontab /etc/cron.d/crontab +RUN crontab /etc/cron.d/crontab +CMD ["crond", "-f"] \ No newline at end of file diff --git a/headline.py b/processor/app.py similarity index 74% rename from headline.py rename to processor/app.py index f2e5f6a..c71b9dd 100644 --- a/headline.py +++ b/processor/app.py @@ -5,13 +5,10 @@ import confuse import redis import time import json -import imgkit +import sqlite3 from diff_match_patch import diff_match_patch -from pprint import pprint -import hashlib - # # Idea block: @@ -26,7 +23,20 @@ config.set_file('config.yaml') dmp = diff_match_patch() -rc = redis.Redis(host='localhost', port=6379, db=0) +rc = redis.Redis(host='redis', port=6379, db=0) + +db_con = sqlite3.connect("../data/diffs.db") +db = db_con.cursor() + +db.execute("""CREATE TABLE IF NOT EXISTS diffs ( + diff_id INTEGER PRIMARY KEY, + feed_name TEXT NOT NULL, + article_url TEXT NOT NULL, + title_orig TEXT NOT NULL, + title_new TEXT NOT NULL, + diff_html TEXT NOT NULL, + diff_time TEXT +);""") article_count = 0 @@ -39,9 +49,13 @@ def process_diff(old, new, rss_id): dmp.diff_cleanupSemantic(diff) html_diff = dmp.diff_prettyHtml(diff) print(old['link']) - #print(diff) - #filename = "./img/" + hashlib.md5(rss_id.encode()).hexdigest() + ".jpg" - #image = imgkit.from_string(html_diff, filename, options = {'width': '450', 'quiet': ''}) + print(diff) + + sql = "INSERT INTO diffs(feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))" + sql_data = (old['medium'], old['link'], old['title'], new['title'], html_diff) + db.execute(sql, sql_data) + db_con.commit() + return(True) @@ -52,7 +66,7 @@ def process_item(article, rc): if old['title'] != new['title']: print('Article changed. World is fucked.') diff = process_diff(old, new, article['rss_id']) - #write_article(article, rc) + write_article(article, rc) return(True) else: # Article is the same. All good! diff --git a/config.yaml b/processor/config.yaml similarity index 100% rename from config.yaml rename to processor/config.yaml diff --git a/processor/crontab b/processor/crontab new file mode 100644 index 0000000..7587f3e --- /dev/null +++ b/processor/crontab @@ -0,0 +1 @@ +*/1 * * * * cd /app && /usr/local/bin/python3 app.py >> /data/proc_log.txt 2>&1 \ No newline at end of file diff --git a/requirements.txt b/processor/requirements.txt similarity index 52% rename from requirements.txt rename to processor/requirements.txt index fb3153f..d527e32 100644 --- a/requirements.txt +++ b/processor/requirements.txt @@ -1,5 +1,4 @@ feedparser confuse redis -diff-match-patch -imgkit \ No newline at end of file +diff-match-patch \ No newline at end of file diff --git a/view/Dockerfile b/view/Dockerfile new file mode 100644 index 0000000..8121e0e --- /dev/null +++ b/view/Dockerfile @@ -0,0 +1,4 @@ +FROM python:slim-bullseye +ADD . /app/ +WORKDIR /app +RUN pip install -r requirements.txt \ No newline at end of file diff --git a/view/app.py b/view/app.py new file mode 100644 index 0000000..e289d3f --- /dev/null +++ b/view/app.py @@ -0,0 +1,56 @@ +#!/usr/bin/python3 + +import sqlite3 +from flask import Flask, request, render_template +from flask import g +from flask_paginate import Pagination, get_page_parameter + + +DATABASE = "../data/diffs.db" + +app = Flask(__name__) + + + + +def get_db(): + db = getattr(g, '_database', None) + if db is None: + db = g._database = sqlite3.connect(DATABASE) + db.row_factory = sqlite3.Row + return db + +@app.teardown_appcontext +def close_connection(exception): + db = getattr(g, '_database', None) + if db is not None: + db.close() + + + +@app.route('/') +def index(): + db = get_db().cursor() + db.execute('SELECT count(diff_id) FROM diffs') + diff_count = db.fetchall()[0][0] + + + #flask-paginate + page = request.args.get(get_page_parameter(), type=int, default=1) + + pagination = Pagination(page=page, total=diff_count, record_name='diffs') + + + page_start = pagination.skip + page_stop = page_start + 10 + db.execute("SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ?,?", (page_start,page_stop)) + diffs = db.fetchall() + + return render_template('./index.html', + diffs=diffs, + pagination=pagination, + ) + + +if __name__ == "__main__": + app.run(host="0.0.0.0") \ No newline at end of file diff --git a/view/requirements.txt b/view/requirements.txt new file mode 100644 index 0000000..247b64a --- /dev/null +++ b/view/requirements.txt @@ -0,0 +1,2 @@ +flask +flask-paginate \ No newline at end of file diff --git a/view/templates/index.html b/view/templates/index.html new file mode 100644 index 0000000..456374c --- /dev/null +++ b/view/templates/index.html @@ -0,0 +1,66 @@ + + + + + + Headliner + + + + + +
+
+ + + + + + + + + + + + + {% for diff in diffs %} + + + + + + + + + {% endfor %} + +
#Detection timeSourceDiffOriginal titleChanged title
{{ diff.diff_id }}{{ diff.diff_time }}{{ diff.feed_name }}{{ diff.diff_html|safe }} + {{ diff.title_orig|truncate(15) }} + {{ diff.title_orig }} + + {{ diff.title_new|truncate(15) }} + {{ diff.title_new}} +
+
+ {{ pagination.links }} +
+ + \ No newline at end of file