batchGame/bin/main.py

#!/usr/bin/env python3

'''
Job Site Crawler version 1.1
Sites included: Stadt Zurich/ Sozialinfo

Log:
10.07.2023 - fixed homegate and updated flats page for highlightning
01.06.2023 - sozialinfo changed its results page to a vue.js implementation; rebuilded parser - much easier now
30.05.2023 - added sort by prio
20.05.2023 - finalized INFO_OBJECT
11.05.2023 - added INFO_OBJECT compability to web frontend as modalNotifier
07.05.2023 - added INFO-OBJECT for error msg and captcha check on sozialinfo

by KM maru21
'''

import asyncio
import json
import os
import parseStzh
import parseSozialinfo2

from push import push
from db import db
from pathlib import Path

ntfyLnk = 'https://push.kmuhrer.net'
ntfyTopic = 'sozialinfo'

dataPath = os.path.join(Path(os.path.dirname(__file__)).parent, 'data')
dbFile = dataPath + '/articles.db'
filterFile = dataPath + '/article_filter.conf'


def jsonDump(data, file):
    json.dump(data,open(file,'w'),indent=4)


def prioSort(articles):
    sortArray1 = []
    sortArray2 = []
    sortArray3 = []
    for article in articles:
        if article['prio'] == 'pinned':
            sortArray1.append(article)
        elif article['prio'] == 'important':
            sortArray2.append(article)
        else:
            sortArray3.append(article)

    #return sortArray1 + sortArray2 + sortArray3
    sortArray = sortArray3 + sortArray2
    return sortArray + sortArray1


def compaireArticles(new, old):
    newArticles = []
    for key in new:
        if not key in old:
            #found new
            newArticles.append(new[key])
    return newArticles


def readDBTable(table):
    dbCon = db(dbFile)
    try:
        jd = json.dumps(dbCon.readAll(table))
    except:
        return 'none'
    return jd


def listDBTable():
    dbCon = db(dbFile)
    try:
        jd = json.dumps(dbCon.readTables())
    except:
        return 'none'
    return jd


async def run():

    dbCon = db(dbFile)
    ntfy = push(ntfyLnk, ntfyTopic)


    #scrape new data from server
    currentArticles = json.loads(await main())


    #read data from table from previous run
    dbNewArticles = []
    for line in dbCon.readAll('new_articles'):
        dbNewArticles.append(line[1])


    #check if an error occured
    #check if new articles table exists if not create and exit
    #check if there are new articles and replace new articles table
    errorCheck = True
    if 'INFO-OBJECT' in currentArticles.keys():
        if currentArticles['INFO-OBJECT']['Titel'] == 'ERROR':
            errorCheck = False
    if errorCheck:
        if len(dbNewArticles) <= 0:
            for index in currentArticles:
                if index == 'INFO-OBJECT': continue
                article = currentArticles[index]
                dbCon.writeNew(article)
            exit

        if not len(currentArticles) <= 0:
            dbCon.delete_table('new_articles')
            for index in currentArticles:
                if index == 'INFO-OBJECT': continue
                article = currentArticles[index]
                dbCon.writeNew(article)


    #compaire db and current articles
    newArticles = compaireArticles(currentArticles, dbNewArticles)

    #if there are new articles push them and write permanently to db
    if not len(newArticles) <= 0:

        #sort new articles by priority
        newArticles = prioSort(newArticles)

        for article in newArticles:
            ntfy.send(article)
            if article['id'] == 'INFO-OBJECT': continue
            dbCon.write(article)
    else:
        ntfy.send('none')


async def runAsync(func, ff: str):
    return await asyncio.to_thread(func, ff)


async def main(mode = 'full'):

    ff = filterFile

    parsers = [parseStzh.init(ff, mode), parseSozialinfo2.init(ff, mode)]
    articlesStzh, articlesSozialinfo = await asyncio.gather(*parsers)

    articles = {**articlesStzh, **articlesSozialinfo}

    return json.dumps(articles)


if __name__ == '__main__':
    asyncio.run(run())