added file appendix -1
This commit is contained in:
parent
9a769907b4
commit
63e6f2aea9
@ -1,178 +0,0 @@
|
||||
import os
|
||||
import base64
|
||||
import requests
|
||||
import logging
|
||||
import hashlib
|
||||
import re
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from googleapiclient.discovery import build
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.modify']
|
||||
SEARCH_QUERY = 'Amazon Kindle Support'
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
def get_credentials():
|
||||
creds = None
|
||||
token_path = os.getenv("TOKEN_PATH", SCRIPT_DIR+'/token.json')
|
||||
credentials_path = os.getenv("CREDENTIALS_PATH", SCRIPT_DIR+'/credentials.json')
|
||||
|
||||
if os.path.exists(token_path):
|
||||
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
|
||||
if not creds or not creds.valid:
|
||||
try:
|
||||
if creds and creds.expired and creds.refresh_token:
|
||||
creds.refresh(Request())
|
||||
else:
|
||||
flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
with open(token_path, 'w') as token:
|
||||
token.write(creds.to_json())
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to obtain credentials: {e}")
|
||||
return None
|
||||
return creds
|
||||
|
||||
def fetch_unread_messages(service, max_results=10):
|
||||
try:
|
||||
results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread", maxResults=max_results).execute()
|
||||
return results.get('messages', [])
|
||||
except Exception as error:
|
||||
logging.error(f"Error fetching unread messages: {error}")
|
||||
return []
|
||||
|
||||
def calculate_file_hash(file_path):
|
||||
"""Calculate SHA-256 hash of a file."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading file {file_path} for hashing: {e}")
|
||||
return None
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def calculate_content_hash(content):
|
||||
"""Calculate SHA-256 hash of content."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
sha256_hash.update(content)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def get_existing_versions(file_path):
|
||||
"""Get all existing versions of a file (with suffixes) in the same directory."""
|
||||
base, ext = os.path.splitext(file_path)
|
||||
dir_path = os.path.dirname(file_path)
|
||||
base_name = os.path.basename(base)
|
||||
|
||||
existing_files = []
|
||||
for filename in os.listdir(dir_path):
|
||||
if filename.startswith(base_name) and filename.endswith(ext):
|
||||
# Match files with suffix pattern `-1`, `-2`, etc., or the base file itself
|
||||
if re.match(rf"{re.escape(base_name)}(-\d+)?{re.escape(ext)}$", filename):
|
||||
existing_files.append(os.path.join(dir_path, filename))
|
||||
return existing_files
|
||||
|
||||
def get_unique_file_path(file_path):
|
||||
"""Generate a unique file path by adding a suffix if the file already exists."""
|
||||
base, ext = os.path.splitext(file_path)
|
||||
counter = 1
|
||||
new_file_path = f"{base}-{counter}{ext}"
|
||||
while os.path.exists(new_file_path):
|
||||
counter += 1
|
||||
new_file_path = f"{base}-{counter}{ext}"
|
||||
return new_file_path
|
||||
|
||||
def download_pdf(pdf_link):
|
||||
parsed_url = urlparse(pdf_link)
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
actual_file_url = query_params.get('U', [None])[0]
|
||||
if not actual_file_url:
|
||||
logging.error("No valid file URL found in PDF link.")
|
||||
return
|
||||
|
||||
file_name = os.path.basename(urlparse(actual_file_url).path) or "downloaded_file.pdf"
|
||||
data_dir = os.path.join(SCRIPT_DIR, "data")
|
||||
file_path = os.path.join(data_dir, file_name)
|
||||
|
||||
try:
|
||||
response = requests.get(actual_file_url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
new_content_hash = calculate_content_hash(response.content)
|
||||
|
||||
# Check all existing versions of the file
|
||||
existing_files = get_existing_versions(file_path)
|
||||
for existing_file in existing_files:
|
||||
existing_file_hash = calculate_file_hash(existing_file)
|
||||
if existing_file_hash == new_content_hash:
|
||||
logging.info(f"An identical file already exists as {existing_file}. Skipping download.")
|
||||
return
|
||||
|
||||
# No identical file found, save as a new version
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
if os.path.exists(file_path):
|
||||
# If base file exists, find a unique file name with suffix
|
||||
file_path = get_unique_file_path(file_path)
|
||||
with open(file_path, "wb") as file:
|
||||
file.write(response.content)
|
||||
logging.info(f"File downloaded and saved to {file_path}")
|
||||
else:
|
||||
logging.error(f"Failed to download the file. Status code: {response.status_code}")
|
||||
except requests.exceptions.Timeout:
|
||||
logging.error("Request timed out while downloading PDF.")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"An error occurred during file download: {e}")
|
||||
|
||||
def process_email(service, message):
|
||||
try:
|
||||
msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
|
||||
headers = msg['payload']['headers']
|
||||
if any(SEARCH_QUERY in header.get('value', '') for header in headers if header['name'] == 'From'):
|
||||
mail_body = ''
|
||||
for part in msg.get('payload', {}).get('parts', []):
|
||||
try:
|
||||
data = part['body']["data"]
|
||||
byte_code = base64.urlsafe_b64decode(data)
|
||||
mail_body += byte_code.decode("utf-8")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(mail_body, "html.parser")
|
||||
link = soup.find("a", string="PDF herunterladen")
|
||||
if link:
|
||||
pdf_link = link.get("href")
|
||||
download_pdf(pdf_link)
|
||||
|
||||
service.users().messages().delete(userId='me', id=message['id']).execute()
|
||||
logging.info(f"Email with ID {message['id']} successfully deleted after PDF download.")
|
||||
else:
|
||||
logging.info("No 'PDF herunterladen' link found in this email.")
|
||||
except Exception as error:
|
||||
logging.error(f"An error occurred while processing email ID {message['id']}: {error}")
|
||||
|
||||
def read_emails():
|
||||
creds = get_credentials()
|
||||
if not creds:
|
||||
logging.error("No valid credentials found.")
|
||||
return
|
||||
|
||||
try:
|
||||
service = build('gmail', 'v1', credentials=creds)
|
||||
messages = fetch_unread_messages(service)
|
||||
|
||||
if not messages:
|
||||
logging.info("No new messages.")
|
||||
else:
|
||||
for message in messages:
|
||||
process_email(service, message)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to initialize Gmail service: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
read_emails()
|
||||
@ -2,6 +2,8 @@ import os
|
||||
import base64
|
||||
import requests
|
||||
import logging
|
||||
import hashlib
|
||||
import re
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
@ -18,16 +20,23 @@ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
def get_credentials():
|
||||
creds = None
|
||||
if os.path.exists(SCRIPT_DIR+'/token.json'):
|
||||
creds = Credentials.from_authorized_user_file(SCRIPT_DIR+'/token.json', SCOPES)
|
||||
token_path = os.getenv("TOKEN_PATH", SCRIPT_DIR+'/token.json')
|
||||
credentials_path = os.getenv("CREDENTIALS_PATH", SCRIPT_DIR+'/credentials.json')
|
||||
|
||||
if os.path.exists(token_path):
|
||||
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
|
||||
if not creds or not creds.valid:
|
||||
try:
|
||||
if creds and creds.expired and creds.refresh_token:
|
||||
creds.refresh(Request())
|
||||
else:
|
||||
flow = InstalledAppFlow.from_client_secrets_file(SCRIPT_DIR+'/credentials.json', SCOPES)
|
||||
flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
with open(SCRIPT_DIR+'/token.json', 'w') as token:
|
||||
with open(token_path, 'w') as token:
|
||||
token.write(creds.to_json())
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to obtain credentials: {e}")
|
||||
return None
|
||||
return creds
|
||||
|
||||
def fetch_unread_messages(service, max_results=10):
|
||||
@ -38,8 +47,49 @@ def fetch_unread_messages(service, max_results=10):
|
||||
logging.error(f"Error fetching unread messages: {error}")
|
||||
return []
|
||||
|
||||
def calculate_file_hash(file_path):
|
||||
"""Calculate SHA-256 hash of a file."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading file {file_path} for hashing: {e}")
|
||||
return None
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def calculate_content_hash(content):
|
||||
"""Calculate SHA-256 hash of content."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
sha256_hash.update(content)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def get_existing_versions(file_path):
|
||||
"""Get all existing versions of a file (with suffixes) in the same directory."""
|
||||
base, ext = os.path.splitext(file_path)
|
||||
dir_path = os.path.dirname(file_path)
|
||||
base_name = os.path.basename(base)
|
||||
|
||||
existing_files = []
|
||||
for filename in os.listdir(dir_path):
|
||||
if filename.startswith(base_name) and filename.endswith(ext):
|
||||
# Match files with suffix pattern `-1`, `-2`, etc., or the base file itself
|
||||
if re.match(rf"{re.escape(base_name)}(-\d+)?{re.escape(ext)}$", filename):
|
||||
existing_files.append(os.path.join(dir_path, filename))
|
||||
return existing_files
|
||||
|
||||
def get_unique_file_path(file_path):
|
||||
"""Generate a unique file path by adding a suffix if the file already exists."""
|
||||
base, ext = os.path.splitext(file_path)
|
||||
counter = 1
|
||||
new_file_path = f"{base}-{counter}{ext}"
|
||||
while os.path.exists(new_file_path):
|
||||
counter += 1
|
||||
new_file_path = f"{base}-{counter}{ext}"
|
||||
return new_file_path
|
||||
|
||||
def download_pdf(pdf_link):
|
||||
# Parse URL and get the actual file URL
|
||||
parsed_url = urlparse(pdf_link)
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
actual_file_url = query_params.get('U', [None])[0]
|
||||
@ -47,27 +97,36 @@ def download_pdf(pdf_link):
|
||||
logging.error("No valid file URL found in PDF link.")
|
||||
return
|
||||
|
||||
# Extract the file name
|
||||
file_name = os.path.basename(urlparse(actual_file_url).path) or "downloaded_file.pdf"
|
||||
data_dir = os.path.join(SCRIPT_DIR, "data")
|
||||
file_path = os.path.join(data_dir, file_name)
|
||||
|
||||
# Check if file exists
|
||||
if os.path.exists(file_path):
|
||||
logging.info(f"{file_name} already exists. Skipping download.")
|
||||
try:
|
||||
response = requests.get(actual_file_url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
new_content_hash = calculate_content_hash(response.content)
|
||||
|
||||
# Check all existing versions of the file
|
||||
existing_files = get_existing_versions(file_path)
|
||||
for existing_file in existing_files:
|
||||
existing_file_hash = calculate_file_hash(existing_file)
|
||||
if existing_file_hash == new_content_hash:
|
||||
logging.info(f"An identical file already exists as {existing_file}. Skipping download.")
|
||||
return
|
||||
|
||||
# Download and save the file
|
||||
try:
|
||||
response = requests.get(actual_file_url)
|
||||
if response.status_code == 200:
|
||||
# No identical file found, save as a new version
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
if os.path.exists(file_path):
|
||||
# If base file exists, find a unique file name with suffix
|
||||
file_path = get_unique_file_path(file_path)
|
||||
with open(file_path, "wb") as file:
|
||||
file.write(response.content)
|
||||
logging.info(f"File downloaded and saved to {file_path}")
|
||||
else:
|
||||
logging.error(f"Failed to download the file. Status code: {response.status_code}")
|
||||
except Exception as e:
|
||||
except requests.exceptions.Timeout:
|
||||
logging.error("Request timed out while downloading PDF.")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"An error occurred during file download: {e}")
|
||||
|
||||
def process_email(service, message):
|
||||
@ -84,23 +143,26 @@ def process_email(service, message):
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Parse HTML and find the PDF link
|
||||
soup = BeautifulSoup(mail_body, "html.parser")
|
||||
link = soup.find("a", string="PDF herunterladen")
|
||||
if link:
|
||||
pdf_link = link.get("href")
|
||||
download_pdf(pdf_link)
|
||||
|
||||
# After successful download, delete the email
|
||||
service.users().messages().delete(userId='me', id=message['id']).execute()
|
||||
logging.info(f"Email with ID {message['id']} successfully deleted after PDF download.")
|
||||
else:
|
||||
logging.info("No 'PDF herunterladen' link found in this email.")
|
||||
except Exception as error:
|
||||
logging.error(f"An error occurred while processing email: {error}")
|
||||
logging.error(f"An error occurred while processing email ID {message['id']}: {error}")
|
||||
|
||||
def read_emails():
|
||||
creds = get_credentials()
|
||||
if not creds:
|
||||
logging.error("No valid credentials found.")
|
||||
return
|
||||
|
||||
try:
|
||||
service = build('gmail', 'v1', credentials=creds)
|
||||
messages = fetch_unread_messages(service)
|
||||
|
||||
@ -109,6 +171,8 @@ def read_emails():
|
||||
else:
|
||||
for message in messages:
|
||||
process_email(service, message)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to initialize Gmail service: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
read_emails()
|
||||
Loading…
Reference in New Issue
Block a user