|
25 | 25 | import sigProfilerPlotting
|
26 | 26 | import os, sys
|
27 | 27 | from pypdf import PdfWriter, PdfReader
|
28 |
| -import fitz |
| 28 | +from pdf2image import convert_from_path |
29 | 29 | import time
|
30 | 30 | from pathlib import Path
|
31 | 31 |
|
@@ -60,21 +60,32 @@ def get_storage_dir(volume=None):
|
60 | 60 |
|
61 | 61 |
|
62 | 62 | def convert_PDF_to_PNG(input_file_name, output_directory, page_names):
|
63 |
| - pdf_doc = fitz.open(input_file_name) |
64 |
| - zoom = 3 |
65 |
| - magnify = fitz.Matrix(zoom, zoom) |
| 63 | + """ |
| 64 | + Converts each page of the PDF to a PNG image with names from page_names. |
| 65 | + Requires the 'pdf2image' Python package and the 'poppler' binary. |
| 66 | +
|
| 67 | + Parameters: |
| 68 | + - input_file_name (str): Path to the input PDF file. |
| 69 | + - output_directory (str): Directory where PNGs will be saved. |
| 70 | + - page_names (List[str]): List of names (without extensions) to name PNGs. |
| 71 | + """ |
| 72 | + if not os.path.exists(output_directory): |
| 73 | + os.makedirs(output_directory) |
| 74 | + |
| 75 | + # Convert PDF pages to PIL images |
| 76 | + try: |
| 77 | + images = convert_from_path(input_file_name, dpi=300) |
| 78 | + except Exception as e: |
| 79 | + raise RuntimeError(f"Error converting PDF to images: {e}") |
66 | 80 |
|
67 |
| - if pdf_doc.page_count != len(page_names): |
| 81 | + if len(images) != len(page_names): |
68 | 82 | raise ValueError(
|
69 | 83 | "Error: The number of samples and number of plots do not match."
|
70 | 84 | )
|
71 |
| - if not os.path.exists(output_directory): |
72 |
| - os.makedirs(output_directory) |
73 | 85 |
|
74 |
| - for sample_name, page in zip(page_names, pdf_doc): |
75 |
| - pix = page.get_pixmap(matrix=magnify) |
76 |
| - out_file_name = os.path.join(output_directory, sample_name + ".png") |
77 |
| - pix.save(out_file_name) |
| 86 | + for image, sample_name in zip(images, page_names): |
| 87 | + output_path = os.path.join(output_directory, sample_name + ".png") |
| 88 | + image.save(output_path, "PNG") |
78 | 89 |
|
79 | 90 |
|
80 | 91 | # Create sample reconstruction plots
|
@@ -263,6 +274,7 @@ def spa_analyze(
|
263 | 274 | genome_build = A string. The reference genome build. List of supported genomes: "GRCh37", "GRCh38", "mm9", "mm10" and "rn6". The default value is "GRCh37". If the selected genome is not in the supported list, the default genome will be used.
|
264 | 275 | verbose = Boolean. Prints statements. Default value is False.
|
265 | 276 | exome = Boolean. Defines if the exome renormalized signatures will be used. The default value is False.
|
| 277 | + sample_reconstruction_plots (str): Select output format for sample reconstruction plots. Valid options are {'pdf', 'png', 'both', 'none'}. Default is 'none'. |
266 | 278 |
|
267 | 279 | Values:
|
268 | 280 | The files below will be generated in the output folder.
|
@@ -1061,7 +1073,8 @@ def spa_analyze(
|
1061 | 1073 | recon_output_types = ["png", "pdf", "both"]
|
1062 | 1074 | # Generate sample reconstruction plots
|
1063 | 1075 | if (
|
1064 |
| - sample_reconstruction_plots in recon_output_types |
| 1076 | + isinstance(sample_reconstruction_plots, str) |
| 1077 | + and sample_reconstruction_plots.lower() in recon_output_types |
1065 | 1078 | and mutation_type == "96"
|
1066 | 1079 | and signature_database is None
|
1067 | 1080 | ):
|
|
0 commit comments