#!/usr/bin/python3

import feedparser
import confuse
import redis
import time
import json
import sqlite3
import hashlib

from diff_match_patch import diff_match_patch


class DiffThing(diff_match_patch):
    def diff_html(self, diffs):
        """Like diff_prettyHtml, but without inline style attributes
        (makes it easier to style it with ).
        """
        html = []
        for op, data in diffs:
            text = (
                data.replace("&", "&amp;")
                .replace("<", "&lt;")
                .replace(">", "&gt;")
                .replace("\n", "&para;<br>")
            )
            if op == self.DIFF_INSERT:
                html.append("<ins>%s</ins>" % text)
            elif op == self.DIFF_DELETE:
                html.append("<del>%s</del>" % text)
            elif op == self.DIFF_EQUAL:
                html.append("<span>%s</span>" % text)
        return "".join(html)


#
# Idea block:
#
# 	Můžeme zkusit ke každému ID článku přidat taky hash obsahu, s tím že v začátku budeme kontrolovat jenom změnu hashe a až pak obsah stringů.
# 	Ale nevím jestli to bude reálně efektivnější
#
# 	Teď budeme kontrolovat jen titulky, ale postupně můžeme přidat i description článku

CONFIG_FILE = "../data/config.yaml"
REDIS_ARTICLE_EXPIRE_SEC = 604800

config = confuse.Configuration("headline", __name__)
config.set_file(CONFIG_FILE)

dmp = DiffThing()

rc = redis.Redis(host="redis", port=6379, db=0)

db_con = sqlite3.connect("../data/diffs.db")
db = db_con.cursor()

db.executescript(
    """
PRAGMA journal_mode=WAL;

CREATE TABLE IF NOT EXISTS diffs (
	diff_id INTEGER PRIMARY KEY,
	article_id TEXT,
	feed_name TEXT NOT NULL,
	article_url TEXT NOT NULL,
	title_orig TEXT NOT NULL,
	title_new TEXT NOT NULL,
    diff_html TEXT NOT NULL,
    diff_time TEXT
);

CREATE VIRTUAL TABLE IF NOT EXISTS diffs_fts USING fts5(
    title_orig,
    title_new,
    content="diffs",
    content_rowid="diff_id",
    tokenize="trigram case_sensitive 0"
);
-- rebuild search index
-- useful when creating the table, or when it is externally updated
INSERT INTO diffs_fts(diffs_fts) VALUES ('rebuild');

CREATE TRIGGER IF NOT EXISTS diffs_ainsert AFTER INSERT ON diffs
    BEGIN
        INSERT INTO diffs_fts (rowid, title_orig, title_new)
        VALUES (new.diff_id, new.title_orig, new.title_new);
    END;
CREATE TRIGGER IF NOT EXISTS diffs_adelete AFTER DELETE ON diffs
    BEGIN
        INSERT INTO diffs_fts (diffs_fts, rowid, title_orig, title_new)
        VALUES ('delete', old.diff_id, old.title_orig, old.title_new);
    END;
CREATE TRIGGER IF NOT EXISTS diffs_aupdate AFTER UPDATE ON diffs
    BEGIN
        INSERT INTO diffs_fts (diffs_fts, rowid, title_orig, title_new)
        VALUES ('delete', old.diff_id, old.title_orig, old.title_new);
        INSERT INTO diffs_fts (rowid, title_orig, title_new)
        VALUES (new.diff_id, new.title_orig, new.title_new);
    END;
"""
)
article_count = 0


def write_article(article, rc):
    rval = json.dumps(article["content"])
    rc.set(article["rss_id"], rval, ex=REDIS_ARTICLE_EXPIRE_SEC)


def process_diff(old, new, rss_id):
    diff = dmp.diff_main(old["title"], new["title"])
    dmp.diff_cleanupSemantic(diff)
    html_diff = dmp.diff_html(diff)
    # print(old['link'])
    # print(diff)

    sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,?,datetime('now', 'localtime'))"
    sql_data = (
        new["article_id"],
        old["medium"],
        old["link"],
        old["title"],
        new["title"],
        html_diff,
    )
    db.execute(sql, sql_data)
    db_con.commit()

    return True


def process_item(article, rc):
    if rc.exists(article["rss_id"]):
        old = json.loads(rc.get(article["rss_id"]))
        new = article["content"]
        if old["title"] != new["title"]:
            # print('Article changed. World is fucked.')
            diff = process_diff(old, new, article["rss_id"])
            write_article(article, rc)
            return True
        else:
            # Article is the same. All good!
            return True
    else:
        # Article is new, just create it and exit
        write_article(article, rc)


def create_article_id(uid, feed):
    # Create a unique ID from RSS unique tag and feed name to reference the article in database
    id_string = str(uid) + str(feed)
    id_bytes = id_string.encode("utf-8")
    article_id = hashlib.sha256(id_bytes).hexdigest()
    return article_id


for feed in config["feeds"]:
    try:
        rss_source = str(feed["rss_source"])
        unique_tag = str(feed["unique_tag"])
        name = str(feed["name"])

        rss = feedparser.parse(rss_source)

        for item in rss["entries"]:
            try:
                rss_id = item[unique_tag]
                title = item["title"]
                article_id = create_article_id(rss_id, name)
                # description = item['description'] ## Don't store description for now, as we don't need it and it's big.
                published = time.strftime(
                    "%Y:%m:%d %H:%M:%S %Z %z", item["published_parsed"]
                )
                link = item["link"]
                article_data = {
                    "title": title,
                    "article_id": article_id,
                    #'description':	description,
                    "published": published,
                    "link": link,
                    "medium": name,
                }
                article = {"rss_id": rss_id, "content": article_data}
                article_count += 1
                process_item(article, rc)
            except Exception as e:
                print("Parsing article failed")
                print(e)
                print(item)
    except Exception as e:
        print("Parsing feed failed.")
        print(e)
        print(feed)
        pass

print("Processed articles: " + str(article_count))