from gettext import find import os.path import base64 import json import re import time from urllib.parse import parse_qs, urlparse from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build import logging import requests from bs4 import BeautifulSoup SCOPES = ['https://www.googleapis.com/auth/gmail.readonly','https://www.googleapis.com/auth/gmail.modify'] search = 'Amazon Kindle Support' script_dir = os.path.dirname(os.path.abspath(__file__)) def readEmails(): """Shows basic usage of the Gmail API. Lists the user's Gmail labels. """ creds = None # The file token.json stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. if os.path.exists('token.json'): creds = Credentials.from_authorized_user_file('token.json', SCOPES) # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started '/home/maru/Dev/git/amznMailConverter/my_cred_file.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.json', 'w') as token: token.write(creds.to_json()) try: # Call the Gmail API service = build('gmail', 'v1', credentials=creds) results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute() messages = results.get('messages',[]) if not messages: print('No new messages.') else: message_count = 0 for message in messages: msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute() email_data = msg['payload']['headers'] for values in email_data: name = values['name'] if name == 'From': from_name= values['value'] #print(f'{search} - {from_name}') if from_name.find(search) != -1: if 'payload' in msg and 'parts' in msg['payload']: mail_body = '' for part in msg['payload']['parts']: try: data = part['body']["data"] byte_code = base64.urlsafe_b64decode(data) text = byte_code.decode("utf-8") mail_body += str(text) # mark the message as read (optional) #msg = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute() except BaseException as error: pass # Parse the HTML with BeautifulSoup soup = BeautifulSoup(mail_body, "html.parser") # Find the "PDF herunterladen" link link = soup.find("a", string="PDF herunterladen") # Extract the href attribute if link: pdf_link = link.get("href") # Download the PDF file response = requests.get(pdf_link) # Parse the URL to extract the actual file URL parsed_url = urlparse(pdf_link) query_params = parse_qs(parsed_url.query) actual_file_url = query_params.get('U', [None])[0] # 'U' is the parameter holding the actual file URL # Extract the file name from the actual file URL file_name = os.path.basename(urlparse(actual_file_url).path) if actual_file_url else "downloaded_file.pdf" if response.status_code == 200: # Define the file path for saving data_dir = os.path.join(script_dir, "data") file_path = os.path.join(data_dir, file_name) # Save the file with open(file_path, "wb") as file: file.write(response.content) print(f"File downloaded and saved to {file_path}") else: print("Failed to download the file. Status code:", response.status_code) else: pass # print(msg['payload']['body']) except Exception as error: print(f'An error occurred: {error}') readEmails()