code improvements

2024-11-08 18:54:36 +01:00 · 2024-11-08 18:54:36 +01:00 · cafd705a9f
commit cafd705a9f
parent 6c3942f100
4 changed files with 123 additions and 92 deletions
--- a/daemon.py
+++ b/daemon.py
@ -0,0 +1,15 @@
 import time
 import logging
 from main import read_emails
 def run_daemon():
    while True:
        try:
            read_emails()
            logging.info("Finished checking emails. Sleeping for 1 minutes.")
        except Exception as e:
            logging.error(f"An error occurred: {e}")
        time.sleep(60)  # Sleep for 1 minute between checks
 if __name__ == "__main__":
    run_daemon()
--- a/daemon.service
+++ b/daemon.service
@ -0,0 +1,11 @@
 [Unit]
 Description=Amazon Mail Converter Daemon
 [Service]
 ExecStart=/usr/bin/python3 /path/to/your_script.py
 Restart=always
 User=your_username
 WorkingDirectory=/path/to/your_script.py
 [Install]
 WantedBy=multi-user.target
--- a/main.py
+++ b/main.py
@ -1,116 +1,110 @@
-from gettext import find
+import os
 import os.path
 import base64
-import json
+import requests
-import re
+import logging
 import time
 from urllib.parse import parse_qs, urlparse
 from google.auth.transport.requests import Request
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 from googleapiclient.discovery import build
 import logging
 import requests
 from bs4 import BeautifulSoup
 # Configure logging
 logging.basicConfig(level=logging.INFO)
-SCOPES = ['https://www.googleapis.com/auth/gmail.readonly','https://www.googleapis.com/auth/gmail.modify']
+SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.modify']
-search = 'Amazon Kindle Support'
+SEARCH_QUERY = 'Amazon Kindle Support'
-script_dir = os.path.dirname(os.path.abspath(__file__))
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-def readEmails():
+def get_credentials():
    """Shows basic usage of the Gmail API.
    Lists the user's Gmail labels.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
-            flow = InstalledAppFlow.from_client_secrets_file(               
+            flow = InstalledAppFlow.from_client_secrets_file('/path/to/your/credentials.json', SCOPES)
                # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started
                '/home/maru/Dev/git/amznMailConverter/my_cred_file.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds
 def fetch_unread_messages(service, max_results=10):
    try:
-        # Call the Gmail API
+        results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread", maxResults=max_results).execute()
-        service = build('gmail', 'v1', credentials=creds)
+        return results.get('messages', [])
        results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute()
        messages = results.get('messages',[])
        if not messages:
            print('No new messages.')
        else:
            message_count = 0
            for message in messages:
                msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()                
                email_data = msg['payload']['headers']
                for values in email_data:
                    name = values['name']
                    if name == 'From':
                        from_name= values['value'] 
                        #print(f'{search} - {from_name}')
                        if from_name.find(search) != -1:
                            if 'payload' in msg and 'parts' in msg['payload']:   
                                mail_body = ''         
                                for part in msg['payload']['parts']:
                                    try:
                                        data = part['body']["data"]
                                        byte_code = base64.urlsafe_b64decode(data)
                                        text = byte_code.decode("utf-8")
                                        mail_body += str(text)
                                        # mark the message as read (optional)
                                        #msg  = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()                                                       
                                    except BaseException as error:
                                        pass   
                                # Parse the HTML with BeautifulSoup
                                soup = BeautifulSoup(mail_body, "html.parser")
                                # Find the "PDF herunterladen" link
                                link = soup.find("a", string="PDF herunterladen")
                                # Extract the href attribute
                                if link:
                                    pdf_link = link.get("href")
                                        # Download the PDF file
                                    response = requests.get(pdf_link)
                                    # Parse the URL to extract the actual file URL
                                    parsed_url = urlparse(pdf_link)
                                    query_params = parse_qs(parsed_url.query)
                                    actual_file_url = query_params.get('U', [None])[0]  # 'U' is the parameter holding the actual file URL
                                    # Extract the file name from the actual file URL
                                    file_name = os.path.basename(urlparse(actual_file_url).path) if actual_file_url else "downloaded_file.pdf"
                                    if response.status_code == 200:
                                        # Define the file path for saving
                                        data_dir = os.path.join(script_dir, "data")
                                        file_path = os.path.join(data_dir, file_name)
                                        # Save the file
                                        with open(file_path, "wb") as file:
                                            file.write(response.content)
                                        print(f"File downloaded and saved to {file_path}")
                                    else:
                                        print("Failed to download the file. Status code:", response.status_code)
                            else:
                                pass # print(msg['payload']['body'])                   
    except Exception as error:
-        print(f'An error occurred: {error}')
+        logging.error(f"Error fetching unread messages: {error}")
        return []
 def download_pdf(pdf_link):
    # Parse URL and get the actual file URL
    parsed_url = urlparse(pdf_link)
    query_params = parse_qs(parsed_url.query)
    actual_file_url = query_params.get('U', [None])[0]
    if not actual_file_url:
        logging.error("No valid file URL found in PDF link.")
        return
-readEmails()
+    # Extract the file name
    file_name = os.path.basename(urlparse(actual_file_url).path) or "downloaded_file.pdf"
    data_dir = os.path.join(SCRIPT_DIR, "data")
    file_path = os.path.join(data_dir, file_name)
    # Check if file exists
    if os.path.exists(file_path):
        logging.info(f"{file_name} already exists. Skipping download.")
        return
    # Download and save the file
    try:
        response = requests.get(actual_file_url)
        if response.status_code == 200:
            os.makedirs(data_dir, exist_ok=True)
            with open(file_path, "wb") as file:
                file.write(response.content)
            logging.info(f"File downloaded and saved to {file_path}")
        else:
            logging.error(f"Failed to download the file. Status code: {response.status_code}")
    except Exception as e:
        logging.error(f"An error occurred during file download: {e}")
 def process_email(service, message):
    try:
        msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
        headers = msg['payload']['headers']
        if any(SEARCH_QUERY in header.get('value', '') for header in headers if header['name'] == 'From'):
            mail_body = ''
            for part in msg.get('payload', {}).get('parts', []):
                try:
                    data = part['body']["data"]
                    byte_code = base64.urlsafe_b64decode(data)
                    mail_body += byte_code.decode("utf-8")
                except Exception:
                    continue
            # Parse HTML and find the PDF link
            soup = BeautifulSoup(mail_body, "html.parser")
            link = soup.find("a", string="PDF herunterladen")
            if link:
                pdf_link = link.get("href")
                download_pdf(pdf_link)
            else:
                logging.info("No 'PDF herunterladen' link found in this email.")
    except Exception as error:
        logging.error(f"An error occurred while processing email: {error}")
 def read_emails():
    creds = get_credentials()
    service = build('gmail', 'v1', credentials=creds)
    messages = fetch_unread_messages(service)
    if not messages:
        logging.info("No new messages.")
    else:
        for message in messages:
            process_email(service, message)
 if __name__ == "__main__":
    read_emails()
--- a/web.service
+++ b/web.service
@ -0,0 +1,11 @@
 [Unit]
 Description=Amazon Mail Converter Daemon
 [Service]
 ExecStart=/usr/bin/python3 /path/to/your_script.py
 Restart=always
 User=your_username
 WorkingDirectory=/path/to/your_script.py
 [Install]
 WantedBy=multi-user.target