Skip to content

Add ruff to pre-commit checks #464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ trim_trailing_whitespace = true
indent_size = 4
indent_style = space

[*.{md,yml,yaml,html,css,scss,js}]
[*.{md,yml,yaml,html,css,scss,js,toml}]
indent_size = 2

# These files are edited and tested upstream in nf-core/modules
Expand Down
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,12 @@ repos:
hooks:
- id: editorconfig-checker
alias: ec

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.6
hooks:
- id: ruff
types_or: [python, pyi, jupyter]
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
types_or: [python, pyi, jupyter]
53 changes: 53 additions & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
line-length = 120
extend-include = [ "*.ipynb" ]
exclude = [
"modules/nf-core/*",
"subworkflows/nf-core/*",
]

format.docstring-code-format = true
lint.select = [
"B", # flake8-bugbear
"BLE", # flake8-blind-except
"C4", # flake8-comprehensions
"D", # pydocstyle
"E", # Error detected by Pycodestyle
"F", # Errors detected by Pyflakes
"I", # isort
"RUF100", # Report unused noqa directives
"TID", # flake8-tidy-imports
"UP", # pyupgrade
"W", # Warning detected by Pycodestyle
]
lint.ignore = [
# Errors from function calls in argument defaults. These are fine when the result is immutable.
"B008",
# Missing docstring in public module
"D100",
# Missing docstring in public package
"D104",
# __magic__ methods are often self-explanatory, allow missing docstrings
"D105",
# Missing docstring in __init__
"D107",
## Disable one in each pair of mutually incompatible rules
# We don’t want a blank line before a class docstring
"D203",
# We want docstrings to start immediately after the opening triple quote
"D213",
# first line should end with a period [Bug: doesn't work with single-line docstrings]
"D400",
# First line should be in imperative mood; try rephrasing
"D401",
# line too long -> we accept long comment lines; formatter gets rid of long code lines
"E501",
# Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
"E731",
# allow I, O, l as variable names -> I is the identity matrix
"E741",
# We can live without docstrings in public functions in scripts
"D103"
]
lint.per-file-ignores."*/templates/*" = [
"F821" # Undefined name (due to nextflow variables)
]
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix simpleaf protocol name for 10xv4 ([#452](https://github.com/nf-core/scrnaseq/pull/452))
- Fix the workflow for cellranger-arc alignment and add new test with 10x multiome dataset ([#441](https://github.com/nf-core/scrnaseq/pull/441))

### Chore

- Add ruff pre-commit check as linter/autoformatter for python scripts ([#464](https://github.com/nf-core/scrnaseq/pull/464))

## v4.0.0 - 2025-03-10

- Move `txp2gene` to `reference_genome_options` in schema as it is required by `kb_python` and `alevin` ([434](https://github.com/nf-core/scrnaseq/pull/434))
Expand Down
8 changes: 5 additions & 3 deletions bin/check_cellrangermulti.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import sys


def parse_samplesheet(samplesheet_path):
# Define required headers
required_headers = ["sample", "multiplexed_sample_id", "description"]
Expand All @@ -14,7 +15,7 @@ def parse_samplesheet(samplesheet_path):
os.makedirs(cmo_output_dir, exist_ok=True)
os.makedirs(frna_output_dir, exist_ok=True)

with open(samplesheet_path, 'r') as csvfile:
with open(samplesheet_path) as csvfile:
reader = csv.DictReader(csvfile)
headers = reader.fieldnames

Expand All @@ -33,7 +34,7 @@ def parse_samplesheet(samplesheet_path):
# Process CMOs
if "cmo_ids" in headers and row["cmo_ids"]:
cmo_filename = os.path.join(cmo_output_dir, f"{sample}_cmo.csv")
with open(cmo_filename, 'a', newline='') as cmo_file:
with open(cmo_filename, "a", newline="") as cmo_file:
cmo_writer = csv.writer(cmo_file)
if not os.path.exists(cmo_filename) or os.stat(cmo_filename).st_size == 0:
cmo_writer.writerow(["sample_id", "cmo_ids", "description"])
Expand All @@ -42,7 +43,7 @@ def parse_samplesheet(samplesheet_path):
# Process FRNAs
if "probe_barcode_ids" in headers and row["probe_barcode_ids"]:
frna_filename = os.path.join(frna_output_dir, f"{sample}_frna.csv")
with open(frna_filename, 'a', newline='') as frna_file:
with open(frna_filename, "a", newline="") as frna_file:
frna_writer = csv.writer(frna_file)
if not os.path.exists(frna_filename) or os.stat(frna_filename).st_size == 0:
frna_writer.writerow(["sample_id", "probe_barcode_ids", "description"])
Expand All @@ -51,6 +52,7 @@ def parse_samplesheet(samplesheet_path):
print("Parsing completed successfully.")
return True


if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script.py <path_to_samplesheet>")
Expand Down
22 changes: 10 additions & 12 deletions bin/filter_gtf_for_genes_in_genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
# Script originally written by Pranathi Vemuri (github.com/pranathivemuri)
# modified by Harshil Patel (github.com/drpatelh)

from __future__ import print_function
import argparse
import logging
from itertools import groupby
import argparse

# Create a logger
logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s")
Expand All @@ -18,11 +17,12 @@ def is_header(line):


def extract_fasta_seq_names(fasta_name):
"""
modified from Brent Pedersen
"""Get sequence names from FFASTA files.

Modified from Brent Pedersen
Correct Way To Parse A Fasta File In Python
given a fasta file. yield tuples of header, sequence
from https://www.biostars.org/p/710/
from https://www.biostars.org/p/710/.
"""
# first open the file outside
fh = open(fasta_name)
Expand All @@ -31,7 +31,7 @@ def extract_fasta_seq_names(fasta_name):
# we know they alternate.
faiter = (x[1] for x in groupby(fh, is_header))

for i, header in enumerate(faiter):
for _i, header in enumerate(faiter):
line = next(header)
if is_header(line):
# drop the ">"
Expand All @@ -41,9 +41,9 @@ def extract_fasta_seq_names(fasta_name):

def extract_genes_in_genome(fasta, gtf_in, gtf_out):
seq_names_in_genome = set(extract_fasta_seq_names(fasta))
logger.info("Extracted chromosome sequence names from : %s" % fasta)
logger.info(f"Extracted chromosome sequence names from : {fasta}")
logger.info("All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome)))
seq_names_in_gtf = set([])
seq_names_in_gtf = set()

n_total_lines = 0
n_lines_in_genome = 0
Expand All @@ -56,12 +56,10 @@ def extract_genes_in_genome(fasta, gtf_in, gtf_out):
if seq_name_gtf in seq_names_in_genome:
n_lines_in_genome += 1
f.write(line)
logger.info(
"Extracted %d / %d lines from %s matching sequences in %s" % (n_lines_in_genome, n_total_lines, gtf_in, fasta)
)
logger.info(f"Extracted {n_lines_in_genome} / {n_total_lines} lines from {gtf_in} matching sequences in {fasta}")
logger.info("All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf)))

logger.info("Wrote matching lines to %s" % gtf_out)
logger.info(f"Wrote matching lines to {gtf_out}")


if __name__ == "__main__":
Expand Down
1 change: 0 additions & 1 deletion bin/generate_lib_csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
import argparse
import os

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.")
Expand Down
7 changes: 4 additions & 3 deletions bin/t2g.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys, argparse
import argparse
import sys


def create_transcript_list(input, use_name=True, use_version=False):
Expand Down Expand Up @@ -77,9 +78,9 @@ def create_transcript_list(input, use_name=True, use_version=False):
def print_output(output, r, use_name=True):
for tid in r:
if use_name:
output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1]))
output.write(f"{tid}\t{r[tid][0]}\t{r[tid][1]}\n")
else:
output.write("%s\t%s\n" % (tid, r[tid][0]))
output.write(f"{tid}\t{r[tid][0]}\n")


if __name__ == "__main__":
Expand Down
14 changes: 11 additions & 3 deletions modules/local/templates/concat_h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@

os.environ["NUMBA_CACHE_DIR"] = "."

import scanpy as sc, anndata as ad, pandas as pd
from pathlib import Path
import platform
from pathlib import Path

import anndata as ad
import pandas as pd
import scanpy as sc


def read_samplesheet(samplesheet):
Expand All @@ -21,13 +24,17 @@ def read_samplesheet(samplesheet):

return df


def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.

"""
yaml_str = ""
for key, value in data.items():
Expand All @@ -38,6 +45,7 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str


def dump_versions():
versions = {
"${task.process}": {
Expand All @@ -49,8 +57,8 @@ def dump_versions():
with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))

if __name__ == "__main__":

if __name__ == "__main__":
# Open samplesheet as dataframe
df_samplesheet = read_samplesheet("${samplesheet}")

Expand Down
27 changes: 16 additions & 11 deletions modules/local/templates/mtx_to_h5ad_cellranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@

os.environ["NUMBA_CACHE_DIR"] = "."

import scanpy as sc
import pandas as pd
import argparse
import anndata
from anndata import AnnData
import platform
import glob
import platform

import anndata
import pandas as pd
import scanpy as sc


def _mtx_to_adata(
input: str,
sample: str,
):

adata = sc.read_10x_h5(input)
adata.var["gene_symbols"] = adata.var_names
adata.var.set_index("gene_ids", inplace=True)
Expand All @@ -31,11 +30,14 @@ def _mtx_to_adata(

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.

"""
yaml_str = ""
for key, value in data.items():
Expand All @@ -46,6 +48,7 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str


def dump_versions():
versions = {
"${task.process}": {
Expand All @@ -59,6 +62,7 @@ def dump_versions():
with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))


def input_to_adata(
input_data: str,
output: str,
Expand All @@ -71,8 +75,8 @@ def input_to_adata(

# standard format
# index are gene IDs and symbols are a column
adata.var['gene_versions'] = adata.var.index
adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values
adata.var["gene_versions"] = adata.var.index
adata.var.index = adata.var["gene_versions"].str.split(".").str[0].values
adata.var_names_make_unique()

# write results
Expand All @@ -84,6 +88,7 @@ def input_to_adata(

return adata


#
# Run main script
#
Expand All @@ -93,7 +98,7 @@ def input_to_adata(

# input_type comes from NF module
adata = input_to_adata(
input_data=glob.glob("*${meta.input_type}_feature_bc_matrix.h5")[0], # cellrangermulti has 'sample_' as prefix
input_data=glob.glob("*${meta.input_type}_feature_bc_matrix.h5")[0], # cellrangermulti has 'sample_' as prefix
output="${meta.id}_${meta.input_type}_matrix.h5ad",
sample="${meta.id}"
sample="${meta.id}",
)
Loading
Loading