Compare commits

...

2 Commits

Author SHA1 Message Date
b351cbddfb Re-organized by putting the debian specific scripts into their own folder so I can better accomodate my general, non-debian scripts 2025-06-18 11:45:10 -06:00
bfdffd156a
Create thank-you-receipt-address-block-parser.py
First try at a python parser to extract address blocks from historic thank you receipts.
2025-06-18 11:41:21 -06:00
5 changed files with 65 additions and 92 deletions

View File

@ -1,33 +0,0 @@
#!/bin/bash
# Ensure the script is run as root
if [ "$EUID" -ne 0 ]; then
echo "Please run as root or use sudo"
exit 1
fi
# Install resolvconf
apt update && apt install -y resolvconf
# Enable and start resolvconf service
systemctl enable resolvconf
systemctl start resolvconf
# Check service status
systemctl status resolvconf --no-pager
# Update resolv.conf head file
cat <<EOF > /etc/resolvconf/resolv.conf.d/head
nameserver 8.8.8.8
nameserver 8.8.4.4
EOF
# Apply changes
resolvconf --enable-updates
resolvconf -u
# Confirm changes
echo "Updated resolv.conf file:"
cat /etc/resolv.conf
echo "resolvconf setup completed successfully."

View File

@ -1,35 +0,0 @@
#!/bin/bash
# Configuration
SOURCE_DIR="/mnt/storage/Downloads"
DEST_DIR="/home/steve/new_books"
TIMESTAMP_FILE="/home/steve/.last_book_upload_time"
# Resolve full path of this script so we can exclude it
SCRIPT_PATH="$(readlink -f "$0")"
# Ensure destination exists
mkdir -p "$DEST_DIR"
# If no timestamp file exists, create one with a default time
if [ ! -f "$TIMESTAMP_FILE" ]; then
echo "First run. Creating timestamp file."
date -d "1 day ago" +"%Y-%m-%d %H:%M:%S" > "$TIMESTAMP_FILE"
fi
# Read the last run time
LAST_RUN=$(cat "$TIMESTAMP_FILE")
echo "Syncing items modified since: $LAST_RUN"
# Find modified items (top-level only), excluding this script
find "$SOURCE_DIR" -mindepth 1 -maxdepth 1 -newermt "$LAST_RUN" -print0 |
while IFS= read -r -d '' ITEM; do
ITEM_PATH="$(readlink -f "$ITEM")"
if [ "$ITEM_PATH" != "$SCRIPT_PATH" ]; then
rsync -rP "$ITEM" "$DEST_DIR/"
fi
done
# Update timestamp after sync
date +"%Y-%m-%d %H:%M:%S" > "$TIMESTAMP_FILE"

View File

@ -1,2 +0,0 @@
#!/bin/bash
find /var/lib/docker/overlay2/${TMP}/diff/tmp -type f -atime +1 -exec rm -f {} \;

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python3
"""
Extract address blocks from donation-receipt PDFs produced by Paperless-NGX.
Usage:
python extract_addresses.py receipts.pdf [-o addresses.csv]
Requires:
pip install pdfplumber
"""
from pathlib import Path
import csv
import re
import sys
import pdfplumber
# ⚙️ --- settings -------------------------------------------------------------
ADDRESS_RE = re.compile(
r"""^(.+?)\n # line 1 person / org name
(\d{1,6} .+?)\n # line 2 street / unit (starts with digits)
([A-Za-z .'-]+,\s?[A-Z]{2}\s\d{5}(?:-\d{4})?)$ # line 3 City, ST ZIP
""",
re.MULTILINE | re.VERBOSE,
)
# -----------------------------------------------------------------------------
def extract_blocks(pdf_path: Path):
"""Yield (name, street, city_state_zip) tuples found in *pdf_path*."""
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
text = page.extract_text() or ""
# Paperless sometimes inserts double-spaces; normalise first
text = re.sub(r"[ \t]{2,}", " ", text)
for match in ADDRESS_RE.finditer(text):
yield match.groups()
break # only want the first address on a page
def main():
if len(sys.argv) < 2:
print("Usage: extract_addresses.py file.pdf [-o out.csv]", file=sys.stderr)
sys.exit(1)
pdf_file = Path(sys.argv[1]).expanduser()
out_csv = None
if "-o" in sys.argv:
out_csv = Path(sys.argv[sys.argv.index("-o") + 1]).expanduser()
blocks = list(extract_blocks(pdf_file))
# ── output ────────────────────────────────────────────────────────────────
if out_csv:
with out_csv.open("w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["Name", "Street", "CityStateZip"])
writer.writerows(blocks)
print(f"Wrote {len(blocks)} addresses to {out_csv}")
else:
for name, street, city in blocks:
print(name)
print(street)
print(city)
print("-" * 40)
if __name__ == "__main__":
main()

View File

@ -1,22 +0,0 @@
#!/bin/bash
# Variables
CONTAINER_NAME="yacreader"
LIBRARY_PATH="/comics"
COMMAND="YACReaderLibraryServer update-library"
# Check if the container is running
if docker ps --format "{{.Names}}" | grep -q "^$CONTAINER_NAME$"; then
echo "Container '$CONTAINER_NAME' is running. Executing the update command..."
docker exec "$CONTAINER_NAME" $COMMAND "$LIBRARY_PATH"
if [ $? -eq 0 ]; then
echo "Library update completed successfully."
else
echo "An error occurred while updating the library."
exit 1
fi
else
echo "Error: Container '$CONTAINER_NAME' is not running."
exit 1
fi