Table of Contents
- Why Automate With Python?
- 1. Bulk File Organiser
- 2. Website Uptime Monitor
- 3. Duplicate File Finder
- 4. Automated Email Sender
- 5. Web Scraper with Rotating Headers
- 6. PDF Report Generator
- 7. Environment Variable Auditor
- 8. Directory Tree Snapshot
- 9. REST API Batch Requester
- 10. Log File Analyser
- Making Scripts Production-Ready
Why Automate With Python?
Every developer has a list of tedious manual tasks: cleaning up downloads folders, checking if a website is still up, sending the same email with slightly different data, parsing a log file to find errors. These tasks are not hard — they are just repetitive. And repetitive tasks are exactly what Python is built for.
Python’s standard library covers file I/O, HTTP requests, email, CSV parsing, and JSON handling without installing a single package. Add a few popular libraries and you can generate PDFs, scrape websites, schedule tasks, and automate cloud APIs — all in less code than you would expect.
These ten scripts are practical, immediately useful, and written to be readable and modifiable rather than clever and opaque. Copy them, adapt them, and make them your own.
1. Bulk File Organiser
Downloads folders become chaos over time. This script sorts files into subdirectories by extension automatically.
#!/usr/bin/env python3
"""Organise files in a directory by extension."""
import shutil
from pathlib import Path
RULES = {
"Images": [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"],
"Documents": [".pdf", ".docx", ".doc", ".txt", ".xlsx", ".csv"],
"Videos": [".mp4", ".mov", ".avi", ".mkv"],
"Audio": [".mp3", ".wav", ".flac", ".aac"],
"Archives": [".zip", ".tar", ".gz", ".rar", ".7z"],
"Code": [".py", ".js", ".ts", ".html", ".css", ".json"],
}
def organise(source_dir: str) -> None:
source = Path(source_dir)
ext_map = {ext: folder for folder, exts in RULES.items() for ext in exts}
for item in source.iterdir():
if not item.is_file():
continue
ext = item.suffix.lower()
folder_name = ext_map.get(ext, "Other")
dest = source / folder_name
dest.mkdir(exist_ok=True)
shutil.move(str(item), dest / item.name)
print(f" Moved {item.name} → {folder_name}/")
if __name__ == "__main__":
import sys
target = sys.argv[1] if len(sys.argv) > 1 else str(Path.home() / "Downloads")
print(f"Organising: {target}")
organise(target)
print("Done.")
Run it:
python organise.py ~/Downloads
2. Website Uptime Monitor
Get notified when a site goes down. This script checks a list of URLs on an interval and prints (or emails) an alert on failure.
#!/usr/bin/env python3
"""Monitor website uptime and alert on failure."""
import time
import requests
from datetime import datetime
SITES = [
"https://missquibble.com",
"https://github.com",
"https://your-api.example.com/health",
]
CHECK_INTERVAL = 60 # seconds
TIMEOUT = 10 # request timeout
def check_site(url: str) -> tuple[bool, int | None, float]:
try:
start = time.time()
resp = requests.get(url, timeout=TIMEOUT)
elapsed = round((time.time() - start) * 1000)
return resp.status_code < 400, resp.status_code, elapsed
except requests.RequestException:
return False, None, 0
def monitor() -> None:
print(f"Monitoring {len(SITES)} sites. Ctrl+C to stop.\n")
while True:
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
for url in SITES:
ok, code, ms = check_site(url)
status = "✓ UP " if ok else "✗ DOWN"
code_str = str(code) if code else "TIMEOUT"
print(f"[{ts}] {status} | {code_str} | {ms:>5}ms | {url}")
print()
time.sleep(CHECK_INTERVAL)
if __name__ == "__main__":
monitor()
Install dependency: pip install requests
Extend this by sending an email (see script 4) or a Slack webhook when ok is False.
3. Duplicate File Finder
Hard drives fill up with duplicate photos, downloaded files, and copied documents. This script finds them by comparing SHA-256 hashes — not just filenames.
#!/usr/bin/env python3
"""Find duplicate files by content hash."""
import hashlib
from pathlib import Path
from collections import defaultdict
def sha256(path: Path, chunk: int = 65536) -> str:
h = hashlib.sha256()
with open(path, "rb") as f:
while data := f.read(chunk):
h.update(data)
return h.hexdigest()
def find_duplicates(root: str) -> dict[str, list[Path]]:
hashes: dict[str, list[Path]] = defaultdict(list)
for path in Path(root).rglob("*"):
if path.is_file():
try:
hashes[sha256(path)].append(path)
except (PermissionError, OSError):
pass
return {h: paths for h, paths in hashes.items() if len(paths) > 1}
if __name__ == "__main__":
import sys
root = sys.argv[1] if len(sys.argv) > 1 else "."
dupes = find_duplicates(root)
if not dupes:
print("No duplicates found.")
else:
total = sum(len(v) - 1 for v in dupes.values())
print(f"Found {len(dupes)} duplicate groups ({total} redundant files):\n")
for h, paths in dupes.items():
print(f" Hash: {h[:16]}…")
for p in paths:
print(f" {p}")
print()
4. Automated Email Sender
Send personalised emails from a CSV list — useful for newsletters, invoices, or notifications.
#!/usr/bin/env python3
"""Send personalised emails from a CSV list."""
import csv
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import os
SMTP_HOST = "smtp.gmail.com"
SMTP_PORT = 587
FROM_ADDR = os.environ["EMAIL_ADDRESS"]
FROM_PASS = os.environ["EMAIL_PASSWORD"] # use an app password, not your real password
def send_email(to: str, subject: str, body: str) -> bool:
msg = MIMEMultipart("alternative")
msg["From"] = FROM_ADDR
msg["To"] = to
msg["Subject"] = subject
msg.attach(MIMEText(body, "html"))
try:
with smtplib.SMTP(SMTP_HOST, SMTP_PORT) as server:
server.starttls()
server.login(FROM_ADDR, FROM_PASS)
server.send_message(msg)
return True
except Exception as e:
print(f" Failed to send to {to}: {e}")
return False
def send_campaign(csv_path: str) -> None:
with open(csv_path, newline="") as f:
for row in csv.DictReader(f):
subject = f"Hello {row['name']}!"
body = f"<p>Hi {row['name']},</p><p>Your message here.</p>"
ok = send_email(row["email"], subject, body)
print(f" {'✓' if ok else '✗'} {row['email']}")
if __name__ == "__main__":
import sys
csv_path = sys.argv[1] if len(sys.argv) > 1 else "contacts.csv"
send_campaign(csv_path)
CSV format (contacts.csv):
name,email
Alice,alice@example.com
Bob,bob@example.com
Set
EMAIL_ADDRESSandEMAIL_PASSWORDas environment variables — never hardcode credentials.
5. Web Scraper with Rotating Headers
A resilient scraper that rotates user-agent strings and handles pagination.
#!/usr/bin/env python3
"""Scrape a paginated website with rotating headers."""
import random
import time
import requests
from bs4 import BeautifulSoup
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0",
]
def get_page(url: str) -> BeautifulSoup | None:
headers = {"User-Agent": random.choice(USER_AGENTS)}
try:
resp = requests.get(url, headers=headers, timeout=15)
resp.raise_for_status()
return BeautifulSoup(resp.text, "html.parser")
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def scrape(base_url: str, pages: int = 5) -> list[dict]:
results = []
for page in range(1, pages + 1):
url = f"{base_url}?page={page}"
soup = get_page(url)
if not soup:
break
# Adapt these selectors to your target site
for article in soup.select("article.post"):
results.append({
"title": article.select_one("h2").get_text(strip=True),
"url": article.select_one("a")["href"],
"date": article.select_one("time").get("datetime", ""),
})
print(f" Page {page}: {len(results)} items so far")
time.sleep(random.uniform(1.5, 3.5)) # be polite — don't hammer the server
return results
if __name__ == "__main__":
data = scrape("https://example.com/blog", pages=3)
print(f"\nScraped {len(data)} items:")
for item in data[:5]:
print(f" {item['title'][:60]} — {item['url']}")
Install: pip install requests beautifulsoup4
Only scrape sites that permit it in their
robots.txtand terms of service.
6. PDF Report Generator
Generate a formatted PDF report programmatically — useful for invoices, summaries, or automated reports.
#!/usr/bin/env python3
"""Generate a PDF report from data."""
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from datetime import date
def generate_report(filename: str, title: str, rows: list[list]) -> None:
doc = SimpleDocTemplate(filename, pagesize=A4)
styles = getSampleStyleSheet()
story = []
# Title
story.append(Paragraph(title, styles["Title"]))
story.append(Paragraph(f"Generated: {date.today()}", styles["Normal"]))
story.append(Spacer(1, 20))
# Table
headers = [["#", "Item", "Status", "Value"]]
data = headers + rows
table = Table(data, colWidths=[30, 250, 100, 80])
table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0d1117")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor("#00ff88")),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 11),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f9f9f9")]),
("GRID", (0, 0), (-1, -1), 0.5, colors.lightgrey),
("TOPPADDING", (0, 0), (-1, -1), 8),
("BOTTOMPADDING", (0, 0), (-1, -1), 8),
]))
story.append(table)
doc.build(story)
print(f"Report saved: {filename}")
if __name__ == "__main__":
sample_data = [
[1, "SSL Certificate Check", "PASS", "A+"],
[2, "Security Headers", "WARN", "B"],
[3, "Open Ports Scan", "PASS", "Clean"],
[4, "Dependency Audit", "FAIL", "3 CVEs"],
]
generate_report("security_report.pdf", "Security Audit Report", sample_data)
Install: pip install reportlab
7. Environment Variable Auditor
Check a project’s .env file for common security mistakes before pushing to version control.
#!/usr/bin/env python3
"""Audit .env files for common security issues."""
import re
from pathlib import Path
WEAK_VALUES = {"changeme", "password", "secret", "12345", "test", "example", "your_key_here", ""}
SENSITIVE_KEYS = re.compile(r"(password|secret|key|token|api|auth|cred|private)", re.I)
def audit(env_path: str = ".env") -> None:
path = Path(env_path)
if not path.exists():
print(f"No .env file found at {env_path}")
return
issues = []
for i, line in enumerate(path.read_text().splitlines(), 1):
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
value = value.strip().strip('"').strip("'")
if value.lower() in WEAK_VALUES:
issues.append((i, key, "⚠ Weak or empty value"))
if len(value) < 16 and SENSITIVE_KEYS.search(key):
issues.append((i, key, "⚠ Sensitive key with short value"))
if value.startswith("http://") and "localhost" not in value:
issues.append((i, key, "⚠ Non-HTTPS URL"))
if issues:
print(f"Found {len(issues)} issue(s) in {env_path}:\n")
for lineno, key, msg in issues:
print(f" Line {lineno:>3} | {key:<30} | {msg}")
else:
print(f"✓ No obvious issues found in {env_path}")
if __name__ == "__main__":
import sys
audit(sys.argv[1] if len(sys.argv) > 1 else ".env")
8. Directory Tree Snapshot
Capture a snapshot of a directory structure to a text file — useful for documentation and change tracking.
#!/usr/bin/env python3
"""Capture a directory tree snapshot."""
from pathlib import Path
IGNORE = {".git", "__pycache__", "node_modules", ".venv", ".DS_Store"}
def tree(path: Path, prefix: str = "", output: list | None = None) -> list[str]:
if output is None:
output = [str(path)]
children = sorted(
[p for p in path.iterdir() if p.name not in IGNORE],
key=lambda p: (p.is_file(), p.name.lower())
)
for i, child in enumerate(children):
connector = "└── " if i == len(children) - 1 else "├── "
output.append(prefix + connector + child.name)
if child.is_dir():
extension = " " if i == len(children) - 1 else "│ "
tree(child, prefix + extension, output)
return output
if __name__ == "__main__":
import sys
from datetime import datetime
root = Path(sys.argv[1] if len(sys.argv) > 1 else ".")
lines = tree(root)
out = "\n".join(lines)
outfile = f"tree_{root.name}_{datetime.now():%Y%m%d_%H%M%S}.txt"
Path(outfile).write_text(out)
print(out)
print(f"\nSaved to {outfile}")
9. REST API Batch Requester
Process a list of IDs or items through a REST API with error handling, retries, and rate limit awareness.
#!/usr/bin/env python3
"""Batch process items through a REST API."""
import time
import requests
from typing import Any
API_BASE = "https://jsonplaceholder.typicode.com"
DELAY = 0.2 # seconds between requests
MAX_RETRY = 3
def fetch(endpoint: str, session: requests.Session) -> dict | None:
for attempt in range(1, MAX_RETRY + 1):
try:
resp = session.get(f"{API_BASE}{endpoint}", timeout=10)
if resp.status_code == 429:
wait = int(resp.headers.get("Retry-After", 5))
print(f" Rate limited — waiting {wait}s")
time.sleep(wait)
continue
resp.raise_for_status()
return resp.json()
except requests.RequestException as e:
print(f" Attempt {attempt}/{MAX_RETRY} failed: {e}")
if attempt < MAX_RETRY:
time.sleep(2 ** attempt) # exponential backoff
return None
def batch_fetch(ids: list[int]) -> list[dict[str, Any]]:
results = []
with requests.Session() as session:
session.headers.update({"Accept": "application/json"})
for item_id in ids:
data = fetch(f"/posts/{item_id}", session)
if data:
results.append(data)
print(f" ✓ Fetched post {item_id}: {data.get('title', '')[:50]}")
else:
print(f" ✗ Failed post {item_id}")
time.sleep(DELAY)
return results
if __name__ == "__main__":
results = batch_fetch(list(range(1, 11)))
print(f"\nFetched {len(results)} items successfully.")
10. Log File Analyser
Parse application or server logs to extract error frequencies, top IPs, and slow requests.
#!/usr/bin/env python3
"""Analyse a web server access log."""
import re
from pathlib import Path
from collections import Counter
# Common Log Format regex
LOG_PATTERN = re.compile(
r'(?P<ip>\S+) \S+ \S+ \[(?P<time>[^\]]+)\] '
r'"(?P<method>\S+) (?P<path>\S+) \S+" '
r'(?P<status>\d{3}) (?P<size>\S+)'
)
def analyse(log_path: str, top_n: int = 10) -> None:
lines = Path(log_path).read_text(errors="ignore").splitlines()
ips = Counter()
statuses = Counter()
paths = Counter()
errors = []
for line in lines:
m = LOG_PATTERN.match(line)
if not m:
continue
d = m.groupdict()
ips[d["ip"]] += 1
statuses[d["status"]] += 1
paths[d["path"]] += 1
if d["status"].startswith(("4", "5")):
errors.append(f'{d["status"]} {d["method"]} {d["path"]}')
print(f"=== Log Analysis: {log_path} ===\n")
print(f"Total requests: {len(lines)}\n")
print(f"Top {top_n} IPs:")
for ip, count in ips.most_common(top_n):
print(f" {count:>6} {ip}")
print(f"\nStatus code breakdown:")
for code, count in sorted(statuses.items()):
print(f" {code} {count:>6}")
print(f"\nTop {top_n} paths:")
for path, count in paths.most_common(top_n):
print(f" {count:>6} {path}")
print(f"\nLast 10 errors:")
for err in errors[-10:]:
print(f" {err}")
if __name__ == "__main__":
import sys
analyse(sys.argv[1] if len(sys.argv) > 1 else "access.log")
Making Scripts Production-Ready
The scripts above work as-is, but a few additions make them production-quality:
Add argument parsing:
import argparse
parser = argparse.ArgumentParser(description="My script")
parser.add_argument("path", help="Target path")
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
Add logging instead of print:
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
logger.info("Starting process")
Schedule with cron (Linux) or Task Scheduler (Windows):
# Run uptime monitor every 5 minutes
*/5 * * * * /usr/bin/python3 /home/user/monitor.py >> /var/log/monitor.log 2>&1
Package dependencies:
pip freeze > requirements.txt
pip install -r requirements.txt
The best automation is the automation you actually run. Start with one script that solves a real problem you have today. Once it works, you will start seeing automation opportunities everywhere.