invoice_renamer/RenameInvoice.ps1
Steve Dogiakos 50606414e8
Finally working script!
Match 'Bill to' anywhere in the text and then capture the next non-empty line to use for renaming. It's not perfect, but it does work on all the files this time.
2025-03-19 11:14:14 -06:00

45 lines
1.6 KiB
PowerShell

# Ensure 'pdftotext.exe' is available in your PATH or specify the full path here
$pdftotextPath = "pdftotext.exe"
# Get all PDF files in the script's current directory
$pdfFiles = Get-ChildItem -Path $PSScriptRoot -Filter *.pdf
foreach ($pdfFile in $pdfFiles) {
# Temporary text file for extracting content
$tempTxt = "$env:TEMP\$(New-Guid).txt"
# Extract text content from PDF
& $pdftotextPath -layout $pdfFile.FullName $tempTxt
# Read extracted content as a single string
$content = Get-Content $tempTxt -Raw
# Robust regex method: finds "Bill to" and captures the next meaningful line even if Bill to is not alone on line
if ($content -match "Bill to.*?(?:\r?\n|\r|\n)\s*(.+)") {
$billToName = $matches[1].Trim()
# Clean name from invalid filename characters
$cleanName = ($billToName -replace '[\\/:*?"<>|]', '').Trim()
# Construct safe filename
$newFileName = "$cleanName 2025 calendar ad invoice.pdf"
$destPath = Join-Path -Path $pdfFile.DirectoryName -ChildPath $newFileName
# Avoid conflicts
if (-not (Test-Path $destPath)) {
try {
Rename-Item -Path $pdfFile.FullName -NewName $newFileName -Verbose -ErrorAction Stop
} catch {
Write-Warning "Error renaming '$($pdfFile.Name)': $_"
}
} else {
Write-Warning "File '$newFileName' already exists. Skipping rename."
}
} else {
Write-Warning "Could not find 'Bill to' in file '$($pdfFile.Name)'. Skipping file."
}
# Cleanup temporary file
Remove-Item $tempTxt -Force
}