Skip to content

Commit 1e19c24

Browse files
authored
Merge pull request #167 from AlexandrovLab/replace-fitz
v0.2.2: Refactor reconstruction plot generation to support PNG output…
2 parents ab28001 + aad864b commit 1e19c24

File tree

8 files changed

+73
-19
lines changed

8 files changed

+73
-19
lines changed

.travis.yml

+5-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ cache:
1010
branch:
1111
- master
1212

13+
addons:
14+
apt:
15+
packages:
16+
- poppler-utils
17+
1318
before_install:
1419
- pip install --upgrade pip setuptools packaging
1520
- if ! [ -f ./src/GRCh37.tar.gz ]; then wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi
@@ -21,7 +26,5 @@ before_script:
2126
- python3 install_genome.py $TRAVIS_BUILD_DIR/src/
2227

2328
script:
24-
# run unit tests
2529
- pytest tests
26-
# run integration tests
2730
- python3 test.py

CHANGELOG.md

+12
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66

77
## [Unreleased]
88

9+
## [0.2.2] - 2025-05-02
10+
11+
### Changed
12+
- Replaced PDF-to-PNG conversion backend from `PyMuPDF` to `pdf2image` for compatibility with Conda and improved portability.
13+
- Added new CLI parameter: `--sample_reconstruction_plots` with options `'none'` (default), `'pdf'`, `'png'`, and `'both'`.
14+
- Updated `spa_analyze` and CLI dispatch logic to support format-based sample reconstruction plot output.
15+
- Default behavior now skips sample reconstruction plots unless explicitly requested.
16+
- Removed `fitz` dependency; added system requirement note for `poppler` in `setup.py` and README.
17+
18+
### Added
19+
- Added a pyproject.toml file to the repository for better project management and configuration.
20+
921
## [0.2.1] - 2025-04-29
1022

1123
### Fixed

README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ $ python
3434
from SigProfilerMatrixGenerator import install as genInstall
3535
genInstall.install('GRCh37')
3636
```
37+
38+
If you plan to use `sample_reconstruction_plots='png'` or `'both'`, the external `poppler` binary is required. You can install it using one of the following commands:
39+
40+
- For Conda-based environments:
41+
`conda install -c conda-forge poppler`
42+
3743
## <a name="running"></a> Running
3844

3945
Assignment of known mutational signatures to individual samples is performed using the `cosmic_fit` function. Input samples are provided using the `samples` parameter in the form of mutation calling files (VCFs, MAFs, or simple text files), segmentation files or mutational matrices. COSMIC mutational signatures v3.4 are used as the default reference signatures, although previous COSMIC versions and custom signature databases are also supported using the `cosmic_version` and `signature_database` parameters. Results will be found in the folder specified in the `output` parameter.
@@ -66,7 +72,7 @@ Analyze.cosmic_fit(samples, output, input_type="matrix", context_type="96",
6672
| export_probabilities | Boolean | Defines if the probability matrix per mutational context for all samples is created. The default value is True. |
6773
| export_probabilities_per_mutation | Boolean | Defines if the probability matrices per mutation for all samples are created. Only available when `input_type` is "vcf". The default value is False. |
6874
| make_plots | Boolean | Toggle on and off for making and saving plots. The default value is True. |
69-
| sample_reconstruction_plots | String | Select the output format for sample reconstruction plots. Valid inputs are {'pdf', 'png', 'both', None}. The default value is None. |
75+
| sample_reconstruction_plots | String | Select the output format for sample reconstruction plots. Valid inputs are {'pdf', 'png', 'both', 'none'}. The default value is 'none'. If set to 'png' or 'both', the external binary `poppler` must be installed. Install via `conda install -c conda-forge poppler` or `brew install poppler` on macOS. |
7076
| verbose | Boolean | Prints detailed statements. The default value is False. |
7177
| volume | String | Path to SigProfilerAssignment volumes. Used for Docker/Singularity. Environmental variable "SIGPROFILERASSIGNMENT_VOLUME" takes precedence. Default value is None. |
7278

SigProfilerAssignment/controllers/cli_controller.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,19 @@ def parse_arguments_common(args: List[str], description: str) -> argparse.Namesp
135135
default=None,
136136
help="User specified directory for saving/loading template files. Note: The environment variable SIGPROFILERASSIGNMENT_VOLUME takes precedence over this parameter.",
137137
)
138+
parser.add_argument(
139+
"--sample_reconstruction_plots",
140+
type=str.lower,
141+
choices=["none", "pdf", "both", "png"],
142+
default="none",
143+
help=(
144+
"Output format for sample reconstruction plots. "
145+
"Options: 'none' (default, disables plotting), "
146+
"'pdf' (generate only PDF), "
147+
"'both' (PDF + PNG), or "
148+
"'png' (PNG only, PDF removed)."
149+
)
150+
)
138151

139152
return parser.parse_args(args)
140153

@@ -226,5 +239,5 @@ def dispatch_cosmic_fit(self, user_args: List[str]) -> None:
226239
context_type=parsed_args.context_type,
227240
export_probabilities=parsed_args.export_probabilities,
228241
export_probabilities_per_mutation=parsed_args.export_probabilities_per_mutation,
229-
sample_reconstruction_plots=False,
242+
sample_reconstruction_plots=parsed_args.sample_reconstruction_plots,
230243
)

SigProfilerAssignment/decomposition.py

+25-12
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import sigProfilerPlotting
2626
import os, sys
2727
from pypdf import PdfWriter, PdfReader
28-
import fitz
28+
from pdf2image import convert_from_path
2929
import time
3030
from pathlib import Path
3131

@@ -60,21 +60,32 @@ def get_storage_dir(volume=None):
6060

6161

6262
def convert_PDF_to_PNG(input_file_name, output_directory, page_names):
63-
pdf_doc = fitz.open(input_file_name)
64-
zoom = 3
65-
magnify = fitz.Matrix(zoom, zoom)
63+
"""
64+
Converts each page of the PDF to a PNG image with names from page_names.
65+
Requires the 'pdf2image' Python package and the 'poppler' binary.
66+
67+
Parameters:
68+
- input_file_name (str): Path to the input PDF file.
69+
- output_directory (str): Directory where PNGs will be saved.
70+
- page_names (List[str]): List of names (without extensions) to name PNGs.
71+
"""
72+
if not os.path.exists(output_directory):
73+
os.makedirs(output_directory)
74+
75+
# Convert PDF pages to PIL images
76+
try:
77+
images = convert_from_path(input_file_name, dpi=300)
78+
except Exception as e:
79+
raise RuntimeError(f"Error converting PDF to images: {e}")
6680

67-
if pdf_doc.page_count != len(page_names):
81+
if len(images) != len(page_names):
6882
raise ValueError(
6983
"Error: The number of samples and number of plots do not match."
7084
)
71-
if not os.path.exists(output_directory):
72-
os.makedirs(output_directory)
7385

74-
for sample_name, page in zip(page_names, pdf_doc):
75-
pix = page.get_pixmap(matrix=magnify)
76-
out_file_name = os.path.join(output_directory, sample_name + ".png")
77-
pix.save(out_file_name)
86+
for image, sample_name in zip(images, page_names):
87+
output_path = os.path.join(output_directory, sample_name + ".png")
88+
image.save(output_path, "PNG")
7889

7990

8091
# Create sample reconstruction plots
@@ -263,6 +274,7 @@ def spa_analyze(
263274
genome_build = A string. The reference genome build. List of supported genomes: "GRCh37", "GRCh38", "mm9", "mm10" and "rn6". The default value is "GRCh37". If the selected genome is not in the supported list, the default genome will be used.
264275
verbose = Boolean. Prints statements. Default value is False.
265276
exome = Boolean. Defines if the exome renormalized signatures will be used. The default value is False.
277+
sample_reconstruction_plots (str): Select output format for sample reconstruction plots. Valid options are {'pdf', 'png', 'both', 'none'}. Default is 'none'.
266278
267279
Values:
268280
The files below will be generated in the output folder.
@@ -1061,7 +1073,8 @@ def spa_analyze(
10611073
recon_output_types = ["png", "pdf", "both"]
10621074
# Generate sample reconstruction plots
10631075
if (
1064-
sample_reconstruction_plots in recon_output_types
1076+
isinstance(sample_reconstruction_plots, str)
1077+
and sample_reconstruction_plots.lower() in recon_output_types
10651078
and mutation_type == "96"
10661079
and signature_database is None
10671080
):

pyproject.toml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[build-system]
2+
requires = ["setuptools>=61", "wheel", "build"]
3+
build-backend = "setuptools.build_meta"

setup.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if os.path.exists("dist"):
77
shutil.rmtree("dist")
88

9-
VERSION = "0.2.1"
9+
VERSION = "0.2.2"
1010

1111

1212
def write_version_py(filename="SigProfilerAssignment/version.py"):
@@ -15,7 +15,7 @@ def write_version_py(filename="SigProfilerAssignment/version.py"):
1515
# THIS FILE IS GENERATED FROM SigProfilerAssignment SETUP.PY
1616
short_version = '%(version)s'
1717
version = '%(version)s'
18-
Update = 'v0.2.1: Fix bug in CLI returning non-zero exit code'
18+
Update = 'v0.2.2: Fix bug in CLI returning non-zero exit code'
1919
2020
2121
"""
@@ -41,7 +41,8 @@ def write_version_py(filename="SigProfilerAssignment/version.py"):
4141
"reportlab>=3.5.42",
4242
"pypdf>=5.0.0",
4343
"alive_progress>=2.4.1",
44-
"PyMuPDF>=1.21.0", # required for package "fitz"
44+
"pdf2image>=1.16.0", # replacing PyMuPDF
45+
# Note: 'poppler' is required as a system dependency for pdf2image
4546
]
4647

4748
write_version_py()

tests/test_cli.py

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ def test_argument_parsing():
4242
"True",
4343
"--exome",
4444
"True",
45+
"--sample_reconstruction_plots",
46+
"png",
4547
],
4648
"Test argument parsing",
4749
)
@@ -56,6 +58,7 @@ def test_argument_parsing():
5658
assert args.export_probabilities == False
5759
assert args.export_probabilities_per_mutation == True
5860
assert args.exome == True
61+
assert args.sample_reconstruction_plots == "png"
5962

6063

6164
def test_boolean_conversion():

0 commit comments

Comments
 (0)