Compare commits
No commits in common. "70dc17dd751a830c65014f6a1917b2e8bb0601e9" and "6c3942f100ea9317115db9162842d5363bdb238b" have entirely different histories.
70dc17dd75
...
6c3942f100
103
README.md
103
README.md
@ -1,102 +1,3 @@
|
|||||||
|
# amznMailConverter
|
||||||
|
|
||||||
# Amazon Kindle Email PDF Downloader
|
Checks your Gmail Inbox for Mails send from Kindle downloads pdf and stores it
|
||||||
|
|
||||||
This project is a Python daemon that connects to your Gmail account, searches for unread emails from "Amazon Kindle Support" containing PDF download links, and automatically downloads the PDFs to a specified local directory if they don’t already exist. The script runs as a background service and checks for new emails at a specified interval.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
- Automatically reads unread emails from Gmail.
|
|
||||||
- Searches for emails from "Amazon Kindle Support" with PDF download links.
|
|
||||||
- Parses the email to extract the actual PDF download link.
|
|
||||||
- Downloads the PDF file and saves it to a local `data` directory.
|
|
||||||
- Runs as a daemon, checking for new emails periodically.
|
|
||||||
|
|
||||||
## Requirements
|
|
||||||
- Python 3.7+
|
|
||||||
- Google API credentials for Gmail API access
|
|
||||||
- Gmail API access enabled in Google Cloud Console
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
1. **Clone the Repository**:
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/yourusername/amznMailConverter.git
|
|
||||||
cd amznMailConverter
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Install Dependencies**:
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
The `requirements.txt` should include:
|
|
||||||
```plaintext
|
|
||||||
google-auth
|
|
||||||
google-auth-oauthlib
|
|
||||||
google-auth-httplib2
|
|
||||||
google-api-python-client
|
|
||||||
requests
|
|
||||||
beautifulsoup4
|
|
||||||
python-daemon # Only if using the daemon library
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Set Up Google API Credentials**:
|
|
||||||
- Create a project in the [Google Cloud Console](https://console.cloud.google.com/).
|
|
||||||
- Enable the Gmail API.
|
|
||||||
- Download the OAuth 2.0 Client Credentials JSON file, rename it to `credentials.json`, and place it in the project root directory.
|
|
||||||
|
|
||||||
4. **Authorize Access**:
|
|
||||||
- On first run, the script will prompt you to authorize Gmail API access. Follow the instructions in the browser to grant permissions.
|
|
||||||
- After authorization, a `token.json` file will be generated to store your access token.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
To run the script as a daemon, use one of the following methods:
|
|
||||||
|
|
||||||
### 1. Using Python Daemon Library (Recommended for Simplicity)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python3 email_pdf_downloader.py
|
|
||||||
```
|
|
||||||
|
|
||||||
The script will run in the background, checking for new emails every 5 minutes.
|
|
||||||
|
|
||||||
### 2. Using Systemd (Linux)
|
|
||||||
|
|
||||||
1. **Create a Systemd Service**:
|
|
||||||
- Create a file `/etc/systemd/system/email_reader.service`:
|
|
||||||
```ini
|
|
||||||
[Unit]
|
|
||||||
Description=Email PDF Downloader Daemon
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
ExecStart=/usr/bin/python3 /path/to/email_pdf_downloader.py
|
|
||||||
Restart=always
|
|
||||||
User=your_username
|
|
||||||
WorkingDirectory=/path/to/project
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Enable and Start the Service**:
|
|
||||||
```bash
|
|
||||||
sudo systemctl daemon-reload
|
|
||||||
sudo systemctl enable email_reader.service
|
|
||||||
sudo systemctl start email_reader.service
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Check Status**:
|
|
||||||
```bash
|
|
||||||
sudo systemctl status email_reader.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
- **SCOPES**: OAuth scopes for Gmail API access.
|
|
||||||
- **SEARCH_QUERY**: Query to filter emails by sender ("Amazon Kindle Support").
|
|
||||||
- **SCRIPT_DIR**: Base directory where files are saved.
|
|
||||||
- **Interval**: The script is set to check for new emails every 5 minutes (300 seconds). You can adjust this interval in the `run_daemon` function in `email_pdf_downloader.py`.
|
|
||||||
|
|
||||||
## Logging
|
|
||||||
Logging information (success messages, errors) is printed to the console or saved to `systemd` logs when running as a systemd service.
|
|
||||||
|
|
||||||
## License
|
|
||||||
This project is licensed under the MIT License.
|
|
||||||
15
daemon.py
15
daemon.py
@ -1,15 +0,0 @@
|
|||||||
import time
|
|
||||||
import logging
|
|
||||||
from main import read_emails
|
|
||||||
|
|
||||||
def run_daemon():
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
read_emails()
|
|
||||||
logging.info("Finished checking emails. Sleeping for 1 minutes.")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"An error occurred: {e}")
|
|
||||||
time.sleep(60) # Sleep for 1 minute between checks
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run_daemon()
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=Amazon Mail Converter Daemon
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
ExecStart=/usr/bin/python3 /path/to/your_script.py
|
|
||||||
Restart=always
|
|
||||||
User=your_username
|
|
||||||
WorkingDirectory=/path/to/your_script.py
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
172
main.py
172
main.py
@ -1,110 +1,116 @@
|
|||||||
import os
|
from gettext import find
|
||||||
|
import os.path
|
||||||
import base64
|
import base64
|
||||||
import requests
|
import json
|
||||||
import logging
|
import re
|
||||||
|
import time
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
from google.auth.transport.requests import Request
|
from google.auth.transport.requests import Request
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||||
from googleapiclient.discovery import build
|
from googleapiclient.discovery import build
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.modify']
|
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly','https://www.googleapis.com/auth/gmail.modify']
|
||||||
SEARCH_QUERY = 'Amazon Kindle Support'
|
search = 'Amazon Kindle Support'
|
||||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
def get_credentials():
|
def readEmails():
|
||||||
|
"""Shows basic usage of the Gmail API.
|
||||||
|
Lists the user's Gmail labels.
|
||||||
|
"""
|
||||||
creds = None
|
creds = None
|
||||||
|
# The file token.json stores the user's access and refresh tokens, and is
|
||||||
|
# created automatically when the authorization flow completes for the first
|
||||||
|
# time.
|
||||||
if os.path.exists('token.json'):
|
if os.path.exists('token.json'):
|
||||||
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
|
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
|
||||||
|
# If there are no (valid) credentials available, let the user log in.
|
||||||
if not creds or not creds.valid:
|
if not creds or not creds.valid:
|
||||||
if creds and creds.expired and creds.refresh_token:
|
if creds and creds.expired and creds.refresh_token:
|
||||||
creds.refresh(Request())
|
creds.refresh(Request())
|
||||||
else:
|
else:
|
||||||
flow = InstalledAppFlow.from_client_secrets_file('/path/to/your/credentials.json', SCOPES)
|
flow = InstalledAppFlow.from_client_secrets_file(
|
||||||
|
# your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started
|
||||||
|
'/home/maru/Dev/git/amznMailConverter/my_cred_file.json', SCOPES)
|
||||||
creds = flow.run_local_server(port=0)
|
creds = flow.run_local_server(port=0)
|
||||||
|
# Save the credentials for the next run
|
||||||
with open('token.json', 'w') as token:
|
with open('token.json', 'w') as token:
|
||||||
token.write(creds.to_json())
|
token.write(creds.to_json())
|
||||||
return creds
|
|
||||||
|
|
||||||
def fetch_unread_messages(service, max_results=10):
|
|
||||||
try:
|
try:
|
||||||
results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread", maxResults=max_results).execute()
|
# Call the Gmail API
|
||||||
return results.get('messages', [])
|
service = build('gmail', 'v1', credentials=creds)
|
||||||
except Exception as error:
|
results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute()
|
||||||
logging.error(f"Error fetching unread messages: {error}")
|
messages = results.get('messages',[])
|
||||||
return []
|
if not messages:
|
||||||
|
print('No new messages.')
|
||||||
def download_pdf(pdf_link):
|
|
||||||
# Parse URL and get the actual file URL
|
|
||||||
parsed_url = urlparse(pdf_link)
|
|
||||||
query_params = parse_qs(parsed_url.query)
|
|
||||||
actual_file_url = query_params.get('U', [None])[0]
|
|
||||||
if not actual_file_url:
|
|
||||||
logging.error("No valid file URL found in PDF link.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract the file name
|
|
||||||
file_name = os.path.basename(urlparse(actual_file_url).path) or "downloaded_file.pdf"
|
|
||||||
data_dir = os.path.join(SCRIPT_DIR, "data")
|
|
||||||
file_path = os.path.join(data_dir, file_name)
|
|
||||||
|
|
||||||
# Check if file exists
|
|
||||||
if os.path.exists(file_path):
|
|
||||||
logging.info(f"{file_name} already exists. Skipping download.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Download and save the file
|
|
||||||
try:
|
|
||||||
response = requests.get(actual_file_url)
|
|
||||||
if response.status_code == 200:
|
|
||||||
os.makedirs(data_dir, exist_ok=True)
|
|
||||||
with open(file_path, "wb") as file:
|
|
||||||
file.write(response.content)
|
|
||||||
logging.info(f"File downloaded and saved to {file_path}")
|
|
||||||
else:
|
else:
|
||||||
logging.error(f"Failed to download the file. Status code: {response.status_code}")
|
message_count = 0
|
||||||
except Exception as e:
|
for message in messages:
|
||||||
logging.error(f"An error occurred during file download: {e}")
|
msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
|
||||||
|
email_data = msg['payload']['headers']
|
||||||
|
|
||||||
def process_email(service, message):
|
for values in email_data:
|
||||||
try:
|
name = values['name']
|
||||||
msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
|
if name == 'From':
|
||||||
headers = msg['payload']['headers']
|
from_name= values['value']
|
||||||
if any(SEARCH_QUERY in header.get('value', '') for header in headers if header['name'] == 'From'):
|
#print(f'{search} - {from_name}')
|
||||||
mail_body = ''
|
if from_name.find(search) != -1:
|
||||||
for part in msg.get('payload', {}).get('parts', []):
|
if 'payload' in msg and 'parts' in msg['payload']:
|
||||||
try:
|
mail_body = ''
|
||||||
data = part['body']["data"]
|
for part in msg['payload']['parts']:
|
||||||
byte_code = base64.urlsafe_b64decode(data)
|
try:
|
||||||
mail_body += byte_code.decode("utf-8")
|
data = part['body']["data"]
|
||||||
except Exception:
|
byte_code = base64.urlsafe_b64decode(data)
|
||||||
continue
|
|
||||||
|
|
||||||
# Parse HTML and find the PDF link
|
text = byte_code.decode("utf-8")
|
||||||
soup = BeautifulSoup(mail_body, "html.parser")
|
mail_body += str(text)
|
||||||
link = soup.find("a", string="PDF herunterladen")
|
|
||||||
if link:
|
# mark the message as read (optional)
|
||||||
pdf_link = link.get("href")
|
#msg = service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()
|
||||||
download_pdf(pdf_link)
|
except BaseException as error:
|
||||||
else:
|
pass
|
||||||
logging.info("No 'PDF herunterladen' link found in this email.")
|
# Parse the HTML with BeautifulSoup
|
||||||
|
soup = BeautifulSoup(mail_body, "html.parser")
|
||||||
|
|
||||||
|
# Find the "PDF herunterladen" link
|
||||||
|
link = soup.find("a", string="PDF herunterladen")
|
||||||
|
|
||||||
|
# Extract the href attribute
|
||||||
|
if link:
|
||||||
|
pdf_link = link.get("href")
|
||||||
|
# Download the PDF file
|
||||||
|
response = requests.get(pdf_link)
|
||||||
|
|
||||||
|
# Parse the URL to extract the actual file URL
|
||||||
|
parsed_url = urlparse(pdf_link)
|
||||||
|
query_params = parse_qs(parsed_url.query)
|
||||||
|
actual_file_url = query_params.get('U', [None])[0] # 'U' is the parameter holding the actual file URL
|
||||||
|
|
||||||
|
# Extract the file name from the actual file URL
|
||||||
|
file_name = os.path.basename(urlparse(actual_file_url).path) if actual_file_url else "downloaded_file.pdf"
|
||||||
|
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Define the file path for saving
|
||||||
|
data_dir = os.path.join(script_dir, "data")
|
||||||
|
file_path = os.path.join(data_dir, file_name)
|
||||||
|
|
||||||
|
# Save the file
|
||||||
|
with open(file_path, "wb") as file:
|
||||||
|
file.write(response.content)
|
||||||
|
|
||||||
|
print(f"File downloaded and saved to {file_path}")
|
||||||
|
else:
|
||||||
|
print("Failed to download the file. Status code:", response.status_code)
|
||||||
|
|
||||||
|
else:
|
||||||
|
pass # print(msg['payload']['body'])
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logging.error(f"An error occurred while processing email: {error}")
|
print(f'An error occurred: {error}')
|
||||||
|
|
||||||
def read_emails():
|
|
||||||
creds = get_credentials()
|
|
||||||
service = build('gmail', 'v1', credentials=creds)
|
|
||||||
messages = fetch_unread_messages(service)
|
|
||||||
|
|
||||||
if not messages:
|
readEmails()
|
||||||
logging.info("No new messages.")
|
|
||||||
else:
|
|
||||||
for message in messages:
|
|
||||||
process_email(service, message)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
read_emails()
|
|
||||||
11
web.service
11
web.service
@ -1,11 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=Amazon Mail Converter Daemon
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
ExecStart=/usr/bin/python3 /path/to/your_script.py
|
|
||||||
Restart=always
|
|
||||||
User=your_username
|
|
||||||
WorkingDirectory=/path/to/your_script.py
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
Loading…
Reference in New Issue
Block a user