-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRemoveFirstPageAndTrack.ps1
56 lines (43 loc) · 1.91 KB
/
RemoveFirstPageAndTrack.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Set the working directory
$workingDir = "G:\My Drive\Medical\barmenia\Einreichungen"
Set-Location -Path $workingDir
# CSV file to save extracted data
$csvFile = "$workingDir\extracted_data.csv"
# Create CSV header if the file doesn't exist
if (-not (Test-Path $csvFile)) {
"FileName,ExtractedText" | Out-File -FilePath $csvFile
}
# Function to extract the first page of a PDF and save the text to CSV
function Extract-FirstPage-Text {
param (
[string]$pdfFile
)
# Use PDF24 command-line to extract the first page to a temporary file (assumes pdf24-cli.exe exists)
$firstPagePdf = "$workingDir\first_page.pdf"
& "C:\Program Files\PDF24\pdf24.exe" extract-pages -from 1 -to 1 -source $pdfFile -target $firstPagePdf
# Extract text from the first page (using PDF24 or another utility like pdftotext if necessary)
$extractedText = & "C:\Program Files\PDF24\pdf24.exe" extract-text -source $firstPagePdf
# Save the extracted text to the CSV file
"$pdfFile,$extractedText" | Out-File -Append -FilePath $csvFile
# Delete the temporary first page file
Remove-Item -Path $firstPagePdf
}
# Function to remove the first page of the PDF and save the result with _done.pdf
function Remove-FirstPage-And-Save {
param (
[string]$pdfFile
)
# Create new file name with _done.pdf
$newFile = [System.IO.Path]::ChangeExtension($pdfFile, "_done.pdf")
# Use PDF24 command-line to delete the first page and save the rest
& "C:\Program Files\PDF24\pdf24.exe" delete-pages -from 1 -to 1 -source $pdfFile -target $newFile
# Optionally, delete the original file (uncomment if needed)
# Remove-Item -Path $pdfFile
}
# Process all PDFs in the directory
Get-ChildItem -Path $workingDir -Filter *.pdf | ForEach-Object {
$pdfFile = $_.FullName
Extract-FirstPage-Text -pdfFile $pdfFile
Remove-FirstPage-And-Save -pdfFile $pdfFile
}
Write-Host "Processing complete."