code improvements

2024-11-08 19:00:23 +01:00 · 2024-11-08 18:56:40 +01:00 · 2024-11-08 18:54:36 +01:00
5 changed files with 224 additions and 94 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,102 @@
-# amznMailConverter

-Checks your Gmail Inbox for Mails send from Kindle downloads pdf and stores it
+# Amazon Kindle Email PDF Downloader
+
+This project is a Python daemon that connects to your Gmail account, searches for unread emails from "Amazon Kindle Support" containing PDF download links, and automatically downloads the PDFs to a specified local directory if they don’t already exist. The script runs as a background service and checks for new emails at a specified interval.
+
+## Features
+- Automatically reads unread emails from Gmail.
+- Searches for emails from "Amazon Kindle Support" with PDF download links.
+- Parses the email to extract the actual PDF download link.
+- Downloads the PDF file and saves it to a local `data` directory.
+- Runs as a daemon, checking for new emails periodically.
+
+## Requirements
+- Python 3.7+
+- Google API credentials for Gmail API access
+- Gmail API access enabled in Google Cloud Console
+
+## Installation
+
+1. **Clone the Repository**:
+   ```bash
+   git clone https://github.com/yourusername/amznMailConverter.git
+   cd amznMailConverter
+   ```
+
+2. **Install Dependencies**:
+   ```bash
+   pip install -r requirements.txt
+   ```
+   The `requirements.txt` should include:
+   ```plaintext
+   google-auth
+   google-auth-oauthlib
+   google-auth-httplib2
+   google-api-python-client
+   requests
+   beautifulsoup4
+   python-daemon  # Only if using the daemon library
+   ```
+
+3. **Set Up Google API Credentials**:
+   - Create a project in the [Google Cloud Console](https://console.cloud.google.com/).
+   - Enable the Gmail API.
+   - Download the OAuth 2.0 Client Credentials JSON file, rename it to `credentials.json`, and place it in the project root directory.
+
+4. **Authorize Access**:
+   - On first run, the script will prompt you to authorize Gmail API access. Follow the instructions in the browser to grant permissions.
+   - After authorization, a `token.json` file will be generated to store your access token.
+
+## Usage
+
+To run the script as a daemon, use one of the following methods:
+
+### 1. Using Python Daemon Library (Recommended for Simplicity)
+
+```bash
+python3 email_pdf_downloader.py
+```
+
+The script will run in the background, checking for new emails every 5 minutes.
+
+### 2. Using Systemd (Linux)
+
+1. **Create a Systemd Service**:
+   - Create a file `/etc/systemd/system/email_reader.service`:
+     ```ini
+     [Unit]
+     Description=Email PDF Downloader Daemon
+
+     [Service]
+     ExecStart=/usr/bin/python3 /path/to/email_pdf_downloader.py
+     Restart=always
+     User=your_username
+     WorkingDirectory=/path/to/project
+
+     [Install]
+     WantedBy=multi-user.target
+     ```
+
+2. **Enable and Start the Service**:
+   ```bash
+   sudo systemctl daemon-reload
+   sudo systemctl enable email_reader.service
+   sudo systemctl start email_reader.service
+   ```
+
+3. **Check Status**:
+   ```bash
+   sudo systemctl status email_reader.service
+   ```
+
+## Configuration
+- **SCOPES**: OAuth scopes for Gmail API access.
+- **SEARCH_QUERY**: Query to filter emails by sender ("Amazon Kindle Support").
+- **SCRIPT_DIR**: Base directory where files are saved.
+- **Interval**: The script is set to check for new emails every 5 minutes (300 seconds). You can adjust this interval in the `run_daemon` function in `email_pdf_downloader.py`.
+
+## Logging
+Logging information (success messages, errors) is printed to the console or saved to `systemd` logs when running as a systemd service.
+
+## License
+This project is licensed under the MIT License.
--- a/daemon.py
+++ b/daemon.py
@ -0,0 +1,15 @@
+import time
+import logging
+from main import read_emails
+
+def run_daemon():
+    while True:
+        try:
+            read_emails()
+            logging.info("Finished checking emails. Sleeping for 1 minutes.")
+        except Exception as e:
+            logging.error(f"An error occurred: {e}")
+        time.sleep(60)  # Sleep for 1 minute between checks
+
+if __name__ == "__main__":
+    run_daemon()
--- a/daemon.service
+++ b/daemon.service
@ -0,0 +1,11 @@
+[Unit]
+Description=Amazon Mail Converter Daemon
+
+[Service]
+ExecStart=/usr/bin/python3 /path/to/your_script.py
+Restart=always
+User=your_username
+WorkingDirectory=/path/to/your_script.py
+
+[Install]
+WantedBy=multi-user.target
--- a/main.py
+++ b/main.py
@ -1,116 +1,110 @@
-from gettext import find
-import os.path
+import os
 import base64
-import json
-import re
-import time
+import requests
+import logging
 from urllib.parse import parse_qs, urlparse
 from google.auth.transport.requests import Request
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 from googleapiclient.discovery import build
-import logging
-import requests
 from bs4 import BeautifulSoup

+# Configure logging
+logging.basicConfig(level=logging.INFO)

-SCOPES = ['https://www.googleapis.com/auth/gmail.readonly','https://www.googleapis.com/auth/gmail.modify']
-search = 'Amazon Kindle Support'
-script_dir = os.path.dirname(os.path.abspath(__file__))
+SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.modify']
+SEARCH_QUERY = 'Amazon Kindle Support'
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

-def readEmails():
-    """Shows basic usage of the Gmail API.
-    Lists the user's Gmail labels.
-    """
+def get_credentials():
    creds = None
-    # The file token.json stores the user's access and refresh tokens, and is
-    # created automatically when the authorization flow completes for the first
-    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
-    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
-            flow = InstalledAppFlow.from_client_secrets_file(               
-                # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started
-                '/home/maru/Dev/git/amznMailConverter/my_cred_file.json', SCOPES)
+            flow = InstalledAppFlow.from_client_secrets_file('/path/to/your/credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
-        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
+    return creds
+
+def fetch_unread_messages(service, max_results=10):
    try:
-        # Call the Gmail API
-        service = build('gmail', 'v1', credentials=creds)
-        results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute()
-        messages = results.get('messages',[])
-        if not messages:
-            print('No new messages.')
-        else:
-            message_count = 0
-            for message in messages:
-                msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()                
-                email_data = msg['payload']['headers']
-
-                for values in email_data:
-                    name = values['name']
-                    if name == 'From':
-                        from_name= values['value'] 
-                        #print(f'{search} - {from_name}')
-                        if from_name.find(search) != -1:
-                            if 'payload' in msg and 'parts' in msg['payload']:   
-                                mail_body = ''         
-                                for part in msg['payload']['parts']:
-                                    try:
-                                        data = part['body']["data"]
-                                        byte_code = base64.urlsafe_b64decode(data)
-
-                                        text = byte_code.decode("utf-8")
-                                        mail_body += str(text)
-
-                                        # mark the message as read (optional)
-                                        #msg  = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()                                                       
-                                    except BaseException as error:
-                                        pass   
-                                # Parse the HTML with BeautifulSoup
-                                soup = BeautifulSoup(mail_body, "html.parser")
-
-                                # Find the "PDF herunterladen" link
-                                link = soup.find("a", string="PDF herunterladen")
-
-                                # Extract the href attribute
-                                if link:
-                                    pdf_link = link.get("href")
-                                        # Download the PDF file
-                                    response = requests.get(pdf_link)
-
-                                    # Parse the URL to extract the actual file URL
-                                    parsed_url = urlparse(pdf_link)
-                                    query_params = parse_qs(parsed_url.query)
-                                    actual_file_url = query_params.get('U', [None])[0]  # 'U' is the parameter holding the actual file URL
-
-                                    # Extract the file name from the actual file URL
-                                    file_name = os.path.basename(urlparse(actual_file_url).path) if actual_file_url else "downloaded_file.pdf"
-
-                                    
-                                    if response.status_code == 200:
-                                        # Define the file path for saving
-                                        data_dir = os.path.join(script_dir, "data")
-                                        file_path = os.path.join(data_dir, file_name)
-                                        
-                                        # Save the file
-                                        with open(file_path, "wb") as file:
-                                            file.write(response.content)
-                                        
-                                        print(f"File downloaded and saved to {file_path}")
-                                    else:
-                                        print("Failed to download the file. Status code:", response.status_code)
-
-                            else:
-                                pass # print(msg['payload']['body'])                   
+        results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread", maxResults=max_results).execute()
+        return results.get('messages', [])
    except Exception as error:
-        print(f'An error occurred: {error}')
+        logging.error(f"Error fetching unread messages: {error}")
+        return []

+def download_pdf(pdf_link):
+    # Parse URL and get the actual file URL
+    parsed_url = urlparse(pdf_link)
+    query_params = parse_qs(parsed_url.query)
+    actual_file_url = query_params.get('U', [None])[0]
+    if not actual_file_url:
+        logging.error("No valid file URL found in PDF link.")
+        return

-readEmails()
+    # Extract the file name
+    file_name = os.path.basename(urlparse(actual_file_url).path) or "downloaded_file.pdf"
+    data_dir = os.path.join(SCRIPT_DIR, "data")
+    file_path = os.path.join(data_dir, file_name)
+
+    # Check if file exists
+    if os.path.exists(file_path):
+        logging.info(f"{file_name} already exists. Skipping download.")
+        return
+
+    # Download and save the file
+    try:
+        response = requests.get(actual_file_url)
+        if response.status_code == 200:
+            os.makedirs(data_dir, exist_ok=True)
+            with open(file_path, "wb") as file:
+                file.write(response.content)
+            logging.info(f"File downloaded and saved to {file_path}")
+        else:
+            logging.error(f"Failed to download the file. Status code: {response.status_code}")
+    except Exception as e:
+        logging.error(f"An error occurred during file download: {e}")
+
+def process_email(service, message):
+    try:
+        msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
+        headers = msg['payload']['headers']
+        if any(SEARCH_QUERY in header.get('value', '') for header in headers if header['name'] == 'From'):
+            mail_body = ''
+            for part in msg.get('payload', {}).get('parts', []):
+                try:
+                    data = part['body']["data"]
+                    byte_code = base64.urlsafe_b64decode(data)
+                    mail_body += byte_code.decode("utf-8")
+                except Exception:
+                    continue
+
+            # Parse HTML and find the PDF link
+            soup = BeautifulSoup(mail_body, "html.parser")
+            link = soup.find("a", string="PDF herunterladen")
+            if link:
+                pdf_link = link.get("href")
+                download_pdf(pdf_link)
+            else:
+                logging.info("No 'PDF herunterladen' link found in this email.")
+    except Exception as error:
+        logging.error(f"An error occurred while processing email: {error}")
+
+def read_emails():
+    creds = get_credentials()
+    service = build('gmail', 'v1', credentials=creds)
+    messages = fetch_unread_messages(service)
+
+    if not messages:
+        logging.info("No new messages.")
+    else:
+        for message in messages:
+            process_email(service, message)
+
+if __name__ == "__main__":
+    read_emails()
--- a/web.service
+++ b/web.service
@ -0,0 +1,11 @@
+[Unit]
+Description=Amazon Mail Converter Daemon
+
+[Service]
+ExecStart=/usr/bin/python3 /path/to/your_script.py
+Restart=always
+User=your_username
+WorkingDirectory=/path/to/your_script.py
+
+[Install]
+WantedBy=multi-user.target
Author	SHA1	Message	Date
maru21	70dc17dd75		2024-11-08 19:00:23 +01:00
maru21	ace2e6714b		2024-11-08 18:56:40 +01:00
maru21	cafd705a9f	code improvements	2024-11-08 18:54:36 +01:00