-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf_merger_ST_VPW.py
More file actions
193 lines (159 loc) · 7.3 KB
/
pdf_merger_ST_VPW.py
File metadata and controls
193 lines (159 loc) · 7.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# pdf_merger_ST_VPW.py
import streamlit as st
import os
from PyPDF2 import PdfMerger, PdfReader
from io import BytesIO
from streamlit_sortables import sort_items
# --- Application Title and Setup ---
st.set_page_config(
page_title="PDF Merger App",
page_icon="📄",
layout="centered"
)
st.title("🔐 PDF Merger App")
st.markdown("""
Upload PDF files and drag-and-drop to set the merge order.
**If a file is password-protected, enter the password below.**
""")
# --- Core Merging Logic ---
def merge_pdfs_streamlit(ordered_file_data, password_map, output_filename="merged_output.pdf"):
"""
Merges file-like objects, handling password-protected files via the password_map.
Args:
ordered_file_data (list): A list of dictionaries containing the file name and the file object.
password_map (dict): A map of filename -> password for protected files.
Returns:
tuple: (bool, BytesIO object or error message)
"""
if not ordered_file_data:
return False, "Please upload and order at least one PDF file."
merger = PdfMerger()
successful_merges = 0
for item in ordered_file_data:
file = item['file_object']
file_name = item['label']
if file.type == "application/pdf":
try:
# Rewind the file pointer to the beginning
file.seek(0)
# We read the file contents into a BytesIO object for PyPDF2
file_buffer = BytesIO(file.read())
pdf_reader = PdfReader(file_buffer)
# --- PASSWORD CHECK AND DECRYPTION ---
if pdf_reader.is_encrypted:
# Get the password from the map, default to None if not provided
password = password_map.get(file_name)
if password:
decryption_result = pdf_reader.decrypt(password)
if decryption_result == 1: # Success
st.info(f"🔑 Decrypted **{file_name}** successfully.")
elif decryption_result == 0: # Already decrypted (shouldn't happen here)
pass
elif decryption_result == -1: # Failed
st.error(f"❌ Failed to decrypt **{file_name}**. Password incorrect or file corrupted. Skipping file.")
continue # Skip this file and move to the next one
else:
st.error(f"🔒 **{file_name}** is password-protected. Please provide the password above to include it in the merge. Skipping file.")
continue # Skip this file
# --- END PASSWORD CHECK ---
merger.append(pdf_reader)
successful_merges += 1
except Exception as e:
st.error(f"Skipped file '{file_name}' due to a critical read error: {e}")
else:
st.warning(f"Skipped file '{file_name}' because it is not a valid PDF.")
if successful_merges == 0:
merger.close()
return False, "No valid PDF files were successfully processed."
# Create an in-memory byte buffer to hold the merged PDF (which will not be encrypted)
output_buffer = BytesIO()
try:
merger.write(output_buffer)
output_buffer.seek(0)
return True, output_buffer
except Exception as e:
return False, f"Error writing merged file: {e}"
finally:
merger.close()
# --- Streamlit UI Implementation ---
# 1. Session State Management
if 'uploaded_files_map' not in st.session_state:
st.session_state.uploaded_files_map = {}
# 2. File Uploader
uploaded_files = st.file_uploader(
"1. Choose PDF files",
type="pdf",
accept_multiple_files=True
)
# Update the session state map with new uploads
if uploaded_files:
# Logic to detect a new batch of files and reset state
current_names = set(f.name for f in uploaded_files)
if current_names != set(st.session_state.uploaded_files_map.keys()):
st.session_state.uploaded_files_map = {}
for file in uploaded_files:
st.session_state.uploaded_files_map[file.name] = file
# --- NEW FEATURE: PASSWORD INPUT SECTION ---
if st.session_state.uploaded_files_map:
st.subheader("2. Password Input (If Needed)")
password_map = {}
# Display a text input for each uploaded file
for name in sorted(st.session_state.uploaded_files_map.keys()):
# Use the filename as a unique key for the password input field
password = st.text_input(
f"Password for **{name}** (Leave blank if not protected)",
type="password",
key=f"password_{name}"
)
if password:
password_map[name] = password
# 3. Drag-and-Drop Reordering
st.subheader("3. Drag-and-Drop to Reorder")
st.info("Drag the items to set the desired merge sequence (top-to-bottom).")
initial_items = list(st.session_state.uploaded_files_map.keys())
# Use the 'sort_items' component
reordered_names = sort_items(
items=initial_items,
key="pdf_list_key"
)
# 4. Finalize and Merge
st.subheader("4. Finalize and Merge")
default_name = "merged_documents.pdf"
if reordered_names:
base_name = reordered_names[0].replace('.pdf', '')
default_name = f"{base_name}_merged.pdf"
output_name = st.text_input(
"Enter output file name (e.g., final_report.pdf)",
value=default_name,
help="The output file will **not** be password-protected."
)
# 5. Execute Merge Button
if st.button("✨ Execute Merge"):
if not reordered_names:
st.warning("Please upload files or ensure the list is not empty.")
elif not output_name.lower().endswith('.pdf'):
st.error("The output filename must end with **.pdf**")
else:
# Reconstruct the list of file data based on the reordered names
ordered_file_data = []
for name in reordered_names:
ordered_file_data.append({
'label': name,
'file_object': st.session_state.uploaded_files_map[name]
})
with st.spinner('Processing and Merging your PDF files...'):
# Pass the password map to the merge function
success, result = merge_pdfs_streamlit(ordered_file_data, password_map, output_name)
if success:
st.success(f"✅ Success! Merged {len(ordered_file_data)} PDF files. The final file is decrypted.")
st.download_button(
label="⬇️ Download Merged PDF",
data=result,
file_name=output_name,
mime="application/pdf"
)
st.balloons()
else:
st.error(f"❌ Merge failed: {result}")
else:
st.info("Upload your PDF files to begin the merging process.")