@@ -51,6 +51,7 @@ def from_file(
51
51
filename : str ,
52
52
fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
53
53
pdf_image_dpi : int = 200 ,
54
+ password : Optional [str ] = None ,
54
55
** kwargs ,
55
56
) -> DocumentLayout :
56
57
"""Creates a DocumentLayout from a pdf file."""
@@ -62,6 +63,7 @@ def from_file(
62
63
pdf_image_dpi ,
63
64
output_folder = temp_dir ,
64
65
path_only = True ,
66
+ password = password ,
65
67
)
66
68
image_paths = cast (List [str ], _image_paths )
67
69
number_of_pages = len (image_paths )
@@ -133,6 +135,7 @@ def __init__(
133
135
document_filename : Optional [Union [str , PurePath ]] = None ,
134
136
detection_model : Optional [UnstructuredObjectDetectionModel ] = None ,
135
137
element_extraction_model : Optional [UnstructuredElementExtractionModel ] = None ,
138
+ password : Optional [str ] = None ,
136
139
):
137
140
if detection_model is not None and element_extraction_model is not None :
138
141
raise ValueError ("Only one of detection_model and extraction_model should be passed." )
@@ -148,6 +151,7 @@ def __init__(
148
151
self .element_extraction_model = element_extraction_model
149
152
self .elements : Collection [LayoutElement ] = []
150
153
self .elements_array : LayoutElements | None = None
154
+ self .password = password
151
155
# NOTE(alan): Dropped LocationlessLayoutElement that was created for chipper - chipper has
152
156
# locations now and if we need to support LayoutElements without bounding boxes we can make
153
157
# the bbox property optional
@@ -325,6 +329,7 @@ def from_image(
325
329
def process_data_with_model (
326
330
data : BinaryIO ,
327
331
model_name : Optional [str ],
332
+ password : Optional [str ] = None ,
328
333
** kwargs : Any ,
329
334
) -> DocumentLayout :
330
335
"""Process PDF as file-like object `data` into a `DocumentLayout`.
@@ -339,6 +344,7 @@ def process_data_with_model(
339
344
layout = process_file_with_model (
340
345
file_path ,
341
346
model_name ,
347
+ password = password ,
342
348
** kwargs ,
343
349
)
344
350
@@ -351,6 +357,7 @@ def process_file_with_model(
351
357
is_image : bool = False ,
352
358
fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
353
359
pdf_image_dpi : int = 200 ,
360
+ password : Optional [str ] = None ,
354
361
** kwargs : Any ,
355
362
) -> DocumentLayout :
356
363
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -379,6 +386,7 @@ def process_file_with_model(
379
386
element_extraction_model = element_extraction_model ,
380
387
fixed_layouts = fixed_layouts ,
381
388
pdf_image_dpi = pdf_image_dpi ,
389
+ password = password ,
382
390
** kwargs ,
383
391
)
384
392
)
@@ -390,6 +398,7 @@ def convert_pdf_to_image(
390
398
dpi : int = 200 ,
391
399
output_folder : Optional [Union [str , PurePath ]] = None ,
392
400
path_only : bool = False ,
401
+ password : Optional [str ] = None ,
393
402
) -> Union [List [Image .Image ], List [str ]]:
394
403
"""Get the image renderings of the pdf pages using pdf2image"""
395
404
@@ -402,12 +411,14 @@ def convert_pdf_to_image(
402
411
dpi = dpi ,
403
412
output_folder = output_folder ,
404
413
paths_only = path_only ,
414
+ userpw = password or "" ,
405
415
)
406
416
else :
407
417
images = pdf2image .convert_from_path (
408
418
filename ,
409
419
dpi = dpi ,
410
420
paths_only = path_only ,
421
+ userpw = password or "" ,
411
422
)
412
423
413
424
return images
0 commit comments