diff --git a/.gitignore b/.gitignore index d4238ce..69ada1a 100644 --- a/.gitignore +++ b/.gitignore @@ -174,4 +174,6 @@ cython_debug/ # Built Visual Studio Code Extensions *.vsix -*.json \ No newline at end of file +*.json +*.pdf +data/* \ No newline at end of file diff --git a/main.py b/main.py index e1be6a5..45d21cd 100644 --- a/main.py +++ b/main.py @@ -1,16 +1,22 @@ +from gettext import find import os.path import base64 import json import re import time +from urllib.parse import parse_qs, urlparse from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build import logging import requests +from bs4 import BeautifulSoup + SCOPES = ['https://www.googleapis.com/auth/gmail.readonly','https://www.googleapis.com/auth/gmail.modify'] +search = 'Amazon Kindle Support' +script_dir = os.path.dirname(os.path.abspath(__file__)) def readEmails(): """Shows basic usage of the Gmail API. @@ -29,7 +35,7 @@ def readEmails(): else: flow = InstalledAppFlow.from_client_secrets_file( # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started - 'my_cred_file.json', SCOPES) + '/home/maru/Dev/git/amznMailConverter/my_cred_file.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.json', 'w') as token: @@ -38,29 +44,73 @@ def readEmails(): # Call the Gmail API service = build('gmail', 'v1', credentials=creds) results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute() - messages = results.get('messages',[]); + messages = results.get('messages',[]) if not messages: print('No new messages.') else: message_count = 0 for message in messages: - msg = service.users().messages().get(userId='me', id=message['id']).execute() + msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute() email_data = msg['payload']['headers'] + for values in email_data: name = values['name'] if name == 'From': - from_name= values['value'] - for part in msg['payload']['parts']: - try: - data = part['body']["data"] - byte_code = base64.urlsafe_b64decode(data) + from_name= values['value'] + #print(f'{search} - {from_name}') + if from_name.find(search) != -1: + if 'payload' in msg and 'parts' in msg['payload']: + mail_body = '' + for part in msg['payload']['parts']: + try: + data = part['body']["data"] + byte_code = base64.urlsafe_b64decode(data) - text = byte_code.decode("utf-8") - print ("This is the message: "+ str(text)) + text = byte_code.decode("utf-8") + mail_body += str(text) - # mark the message as read (optional) - msg = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute() - except BaseException as error: - pass + # mark the message as read (optional) + #msg = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute() + except BaseException as error: + pass + # Parse the HTML with BeautifulSoup + soup = BeautifulSoup(mail_body, "html.parser") + + # Find the "PDF herunterladen" link + link = soup.find("a", string="PDF herunterladen") + + # Extract the href attribute + if link: + pdf_link = link.get("href") + # Download the PDF file + response = requests.get(pdf_link) + + # Parse the URL to extract the actual file URL + parsed_url = urlparse(pdf_link) + query_params = parse_qs(parsed_url.query) + actual_file_url = query_params.get('U', [None])[0] # 'U' is the parameter holding the actual file URL + + # Extract the file name from the actual file URL + file_name = os.path.basename(urlparse(actual_file_url).path) if actual_file_url else "downloaded_file.pdf" + + + if response.status_code == 200: + # Define the file path for saving + data_dir = os.path.join(script_dir, "data") + file_path = os.path.join(data_dir, file_name) + + # Save the file + with open(file_path, "wb") as file: + file.write(response.content) + + print(f"File downloaded and saved to {file_path}") + else: + print("Failed to download the file. Status code:", response.status_code) + + else: + pass # print(msg['payload']['body']) except Exception as error: - print(f'An error occurred: {error}') \ No newline at end of file + print(f'An error occurred: {error}') + + +readEmails() \ No newline at end of file diff --git a/web.py b/web.py index bf15963..3df67f9 100644 --- a/web.py +++ b/web.py @@ -75,6 +75,6 @@ class PDFServer: # Usage if __name__ == "__main__": - directory = '/path/to/your/directory' + directory = '/home/maru/Dev/git/amznMailConverter/data/' server = PDFServer(directory, port=8000) server.start_server() \ No newline at end of file