amznMailConverter/main.py

from gettext import find
import os.path
import base64
import json
import re
import time
from urllib.parse import parse_qs, urlparse
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
import logging
import requests
from bs4 import BeautifulSoup


SCOPES = ['https://www.googleapis.com/auth/gmail.readonly','https://www.googleapis.com/auth/gmail.modify']
search = 'Amazon Kindle Support'
script_dir = os.path.dirname(os.path.abspath(__file__))

def readEmails():
    """Shows basic usage of the Gmail API.
    Lists the user's Gmail labels.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started
                '/home/maru/Dev/git/amznMailConverter/my_cred_file.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    try:
        # Call the Gmail API
        service = build('gmail', 'v1', credentials=creds)
        results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute()
        messages = results.get('messages',[])
        if not messages:
            print('No new messages.')
        else:
            message_count = 0
            for message in messages:
                msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
                email_data = msg['payload']['headers']

                for values in email_data:
                    name = values['name']
                    if name == 'From':
                        from_name= values['value']
                        #print(f'{search} - {from_name}')
                        if from_name.find(search) != -1:
                            if 'payload' in msg and 'parts' in msg['payload']:
                                mail_body = ''
                                for part in msg['payload']['parts']:
                                    try:
                                        data = part['body']["data"]
                                        byte_code = base64.urlsafe_b64decode(data)

                                        text = byte_code.decode("utf-8")
                                        mail_body += str(text)

                                        # mark the message as read (optional)
                                        #msg  = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()
                                    except BaseException as error:
                                        pass
                                # Parse the HTML with BeautifulSoup
                                soup = BeautifulSoup(mail_body, "html.parser")

                                # Find the "PDF herunterladen" link
                                link = soup.find("a", string="PDF herunterladen")

                                # Extract the href attribute
                                if link:
                                    pdf_link = link.get("href")
                                        # Download the PDF file
                                    response = requests.get(pdf_link)

                                    # Parse the URL to extract the actual file URL
                                    parsed_url = urlparse(pdf_link)
                                    query_params = parse_qs(parsed_url.query)
                                    actual_file_url = query_params.get('U', [None])[0]  # 'U' is the parameter holding the actual file URL

                                    # Extract the file name from the actual file URL
                                    file_name = os.path.basename(urlparse(actual_file_url).path) if actual_file_url else "downloaded_file.pdf"


                                    if response.status_code == 200:
                                        # Define the file path for saving
                                        data_dir = os.path.join(script_dir, "data")
                                        file_path = os.path.join(data_dir, file_name)

                                        # Save the file
                                        with open(file_path, "wb") as file:
                                            file.write(response.content)

                                        print(f"File downloaded and saved to {file_path}")
                                    else:
                                        print("Failed to download the file. Status code:", response.status_code)

                            else:
                                pass # print(msg['payload']['body'])
    except Exception as error:
        print(f'An error occurred: {error}')


readEmails()