#!/scrapy/venvs/amznMailConverter/bin/python import os import base64 import requests import logging from urllib.parse import parse_qs, urlparse from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build from bs4 import BeautifulSoup # Configure logging logging.basicConfig(level=logging.INFO) SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.modify'] SEARCH_QUERY = 'Amazon Kindle Support' SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) def get_credentials(): creds = None if os.path.exists(SCRIPT_DIR+'/token.json'): creds = Credentials.from_authorized_user_file(SCRIPT_DIR+'/token.json', SCOPES) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file(SCRIPT_DIR+'/credentials.json', SCOPES) creds = flow.run_local_server(port=0) with open(SCRIPT_DIR+'/token.json', 'w') as token: token.write(creds.to_json()) return creds def fetch_unread_messages(service, max_results=10): try: results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread", maxResults=max_results).execute() return results.get('messages', []) except Exception as error: logging.error(f"Error fetching unread messages: {error}") return [] def download_pdf(pdf_link): # Parse URL and get the actual file URL parsed_url = urlparse(pdf_link) query_params = parse_qs(parsed_url.query) actual_file_url = query_params.get('U', [None])[0] if not actual_file_url: logging.error("No valid file URL found in PDF link.") return # Extract the file name file_name = os.path.basename(urlparse(actual_file_url).path) or "downloaded_file.pdf" data_dir = os.path.join(SCRIPT_DIR, "data") file_path = os.path.join(data_dir, file_name) # Check if file exists if os.path.exists(file_path): logging.info(f"{file_name} already exists. Skipping download.") return # Download and save the file try: response = requests.get(actual_file_url) if response.status_code == 200: os.makedirs(data_dir, exist_ok=True) with open(file_path, "wb") as file: file.write(response.content) logging.info(f"File downloaded and saved to {file_path}") else: logging.error(f"Failed to download the file. Status code: {response.status_code}") except Exception as e: logging.error(f"An error occurred during file download: {e}") def process_email(service, message): try: msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute() headers = msg['payload']['headers'] if any(SEARCH_QUERY in header.get('value', '') for header in headers if header['name'] == 'From'): mail_body = '' for part in msg.get('payload', {}).get('parts', []): try: data = part['body']["data"] byte_code = base64.urlsafe_b64decode(data) mail_body += byte_code.decode("utf-8") except Exception: continue # Parse HTML and find the PDF link soup = BeautifulSoup(mail_body, "html.parser") link = soup.find("a", string="PDF herunterladen") if link: pdf_link = link.get("href") download_pdf(pdf_link) else: logging.info("No 'PDF herunterladen' link found in this email.") except Exception as error: logging.error(f"An error occurred while processing email: {error}") def read_emails(): creds = get_credentials() service = build('gmail', 'v1', credentials=creds) messages = fetch_unread_messages(service) if not messages: logging.info("No new messages.") else: for message in messages: process_email(service, message) if __name__ == "__main__": read_emails()