bigbio · ypriverol · Mar 21, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/mokume/io/feature.py b/mokume/io/feature.py
diff --git a/mokume/pipeline/stages.py b/mokume/pipeline/stages.py
@@ -161,18 +161,13 @@ def load_for_directlfq(self) -> pd.DataFrame:
             feature.enrich_with_sdrf(self.config.input.sdrf)
 
         # Build query with filters
-        where_clause = filter_builder.build_where_clause()
-        query = f"""
-            SELECT
-                pg_accessions,
-                sequence,
-                sample_accession,
-                intensity
-            FROM parquet_db
-            WHERE {where_clause}
-        """
+        where_clause, where_params = filter_builder.build_where_clause()
+        query = "".join([
+            "SELECT pg_accessions, sequence, sample_accession, intensity",
+            " FROM parquet_db WHERE ", where_clause,
+        ])
 
-        df = feature.parquet_db.sql(query).df()
+        df = feature.parquet_db.execute(query, where_params).df()
 
         # Parse protein accessions
         # Extract first element from pg_accessions list, then parse UniProt ID

diff --git a/mokume/quantification/__init__.py b/mokume/quantification/__init__.py
@@ -50,6 +50,7 @@ def __getattr__(name):
     """Lazy import for optional dependencies."""
     if name == "DirectLFQQuantification":
         from mokume.quantification.directlfq import DirectLFQQuantification
+
         return DirectLFQQuantification
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
@@ -70,7 +71,6 @@ def get_quantification_method(method: str, **kwargs) -> ProteinQuantificationMet
         For MaxLFQ:
             - min_peptides: int (default 2)
             - n_jobs: int (default -1, all cores)
-            - use_variance_guided: bool (default True)
 
         For TopN:
             - n: int (default 3, can also be parsed from method name)
@@ -120,13 +120,13 @@ def get_quantification_method(method: str, **kwargs) -> ProteinQuantificationMet
     elif method_lower == "maxlfq":
         return MaxLFQQuantification(
             min_peptides=kwargs.get("min_peptides", 2),
-            n_jobs=kwargs.get("n_jobs", -1),
-            use_variance_guided=kwargs.get("use_variance_guided", True),
+            threads=kwargs.get("n_jobs", -1),
             verbose=kwargs.get("verbose", 0),
         )
 
     elif method_lower == "directlfq":
         from mokume.quantification.directlfq import DirectLFQQuantification
+
         return DirectLFQQuantification(
             min_nonan=kwargs.get("min_nonan", 1),
             num_cores=kwargs.get("num_cores", None),
@@ -139,8 +139,7 @@ def get_quantification_method(method: str, **kwargs) -> ProteinQuantificationMet
     else:
         available = "topN (e.g., top3, top5, top10), maxlfq, directlfq, all/sum"
         raise ValueError(
-            f"Unknown quantification method: {method}. "
-            f"Available methods: {available}"
+            f"Unknown quantification method: {method}. Available methods: {available}"
         )
 
 

diff --git a/mokume/quantification/ratio.py b/mokume/quantification/ratio.py
@@ -301,13 +301,12 @@ def load_psm_data(
         Long-format PSM data with columns: ProteinName, PeptideCanonical,
         PrecursorCharge, SampleID, Fraction, Intensity.
     """
-    # Build SQL filters
+    # Build SQL filters (where_clause built after is_decoy detection below)
     filter_builder = SQLFilterBuilder(
         remove_contaminants=remove_contaminants,
         min_peptide_length=min_aa,
         require_unique=True,
     )
-    where_clause = filter_builder.build_where_clause()
 
     # Load SDRF for fraction info
     sdrf_df = pd.read_csv(sdrf_path, sep="\t")
@@ -335,32 +334,53 @@ def _strip_raw_ext(name: str) -> str:
         ]
         is_new_qpx = "charge" in cols or "run_file_name" in cols
 
-        # Predefined query templates (no user-controlled data)
-        _QUERY_NEW_QPX = (
-            "SELECT pg_accessions, sequence,"
-            " charge as precursor_charge,"
-            " run_file_name as run_file_name,"
-            " unnest.label as label,"
-            " unnest.intensity as intensity"
-            " FROM read_parquet(?) AS parquet_raw, UNNEST(intensities) as unnest"
-            " WHERE unnest.intensity IS NOT NULL AND "
-        )
-        _QUERY_OLD_QPX = (
-            "SELECT pg_accessions, sequence,"
-            " precursor_charge as precursor_charge,"
-            " unnest.sample_accession as sample_accession,"
-            " reference_file_name as run_file_name,"
-            " unnest.channel as label,"
-            " unnest.intensity as intensity"
-            " FROM read_parquet(?) AS parquet_raw, UNNEST(intensities) as unnest"
-            " WHERE unnest.intensity IS NOT NULL AND "
+        # Set has_is_decoy before building WHERE clause so DECOY filter is optimal
+        if "is_decoy" in cols:
+            filter_builder.has_is_decoy = True
+        where_clause, where_params = filter_builder.build_where_clause()
+
+        # Detect if pg_accessions is list<struct{accession,...}> (new QPX)
+        pg_is_struct = False
+        if "pg_accessions" in cols:
+            try:
+                type_str = conn.execute(
+                    "SELECT typeof(pg_accessions) FROM read_parquet(?) LIMIT 1",
+                    [parquet_path],
+                ).fetchone()[0].lower()
+                pg_is_struct = "struct" in type_str
+            except Exception as exc:
+                logger.debug("Could not detect pg_accessions type: %s", exc)
+        pg_col = (
+            "list_transform(pg_accessions, x -> x.accession) as pg_accessions"
+            if pg_is_struct
+            else "pg_accessions"
         )
 
+        # Predefined query templates (no user-controlled data)
+        _QUERY_NEW_QPX = "".join([
+            "SELECT ", pg_col, ", sequence,",
+            " charge as precursor_charge,",
+            " run_file_name as run_file_name,",
+            " unnest.label as label,",
+            " unnest.intensity as intensity",
+            " FROM read_parquet(?) AS parquet_raw, UNNEST(intensities) as unnest",
+            " WHERE unnest.intensity IS NOT NULL AND ",
+        ])
+        _QUERY_OLD_QPX = "".join([
+            "SELECT ", pg_col, ", sequence,",
+            " precursor_charge as precursor_charge,",
+            " unnest.sample_accession as sample_accession,",
+            " reference_file_name as run_file_name,",
+            " unnest.channel as label,",
+            " unnest.intensity as intensity",
+            " FROM read_parquet(?) AS parquet_raw, UNNEST(intensities) as unnest",
+            " WHERE unnest.intensity IS NOT NULL AND ",
+        ])
+
         base_query = _QUERY_NEW_QPX if is_new_qpx else _QUERY_OLD_QPX
-        # where_clause is built by SQLFilterBuilder from validated config only
         query = "".join((base_query, where_clause))
 
-        df = conn.execute(query, [parquet_path]).df()
+        df = conn.execute(query, [parquet_path] + where_params).df()
     finally:
         conn.close()