@@ -320,8 +320,12 @@ def match_barcodes_with_orientation(target_bc1, target_bc2, sample_rows,
320320 matched_rows: List of matching sample rows (empty if no match, ambiguous, or high N fraction)
321321 orientation_info: dict with keys:
322322 - 'barcode_2_revcomp': bool (True if reverse complement was needed)
323- - 'matched_bc1': str (the samplesheet barcode_1 value that matched)
324- - 'matched_bc2': str (the samplesheet barcode_2 value that matched)
323+ - 'matched_bc1': str. On successful match (no 'skipped_reason'), this is
324+ the samplesheet barcode_1 value. When 'skipped_reason' is set, this
325+ is the normalized barcode_1 observed in the FASTQ header.
326+ - 'matched_bc2': str. On successful match (no 'skipped_reason'), this is
327+ the samplesheet barcode_2 value. When 'skipped_reason' is set, this
328+ is the normalized barcode_2 observed in the FASTQ header.
325329 - 'skipped_reason': str (if empty results, why - 'high_n_fraction', 'ambiguous', or 'no_match')
326330 """
327331 log = logging .getLogger (__name__ )
@@ -860,19 +864,24 @@ def consensus_barcode_from_fastq(fastq_path, num_reads=10):
860864
861865 Returns:
862866 tuple: (barcode_1, barcode_2_or_None) normalized consensus barcodes,
863- or (None, None) if file is empty or has no parseable barcodes.
867+ or (None, None) if file is empty (no reads).
868+
869+ Raises:
870+ ValueError: If the FASTQ has reads but none have parseable barcode headers.
864871 """
865872 log = logging .getLogger (__name__ )
866873
867874 bc1_list = []
868875 bc2_list = []
876+ headers_seen = 0
869877
870878 with util_file .open_or_gzopen (fastq_path , 'rt' ) as f :
871879 reads_found = 0
872880 for line_num , line in enumerate (f ):
873881 # FASTQ format: header is every 4th line starting at line 0
874882 if line_num % 4 != 0 :
875883 continue
884+ headers_seen += 1
876885 if reads_found >= num_reads :
877886 break
878887
@@ -890,14 +899,27 @@ def consensus_barcode_from_fastq(fastq_path, num_reads=10):
890899 reads_found += 1
891900
892901 if not bc1_list :
902+ if headers_seen > 0 :
903+ raise ValueError (
904+ f"FASTQ file has reads but no parseable barcode headers in first "
905+ f"{ headers_seen } reads: { fastq_path } "
906+ )
893907 return None , None
894908
895909 def _position_consensus (bases_at_position ):
896- """Return majority base at a position, ignoring N."""
910+ """Return majority base at a position, ignoring N.
911+
912+ Requires strict majority (>50% of non-N bases). Returns N if
913+ there is no clear winner (e.g., a tie).
914+ """
897915 non_n = [b for b in bases_at_position if b != 'N' ]
898916 if not non_n :
899917 return 'N'
900- return Counter (non_n ).most_common (1 )[0 ][0 ]
918+ counts = Counter (non_n )
919+ top_base , top_count = counts .most_common (1 )[0 ]
920+ if top_count * 2 <= len (non_n ):
921+ return 'N' # no strict majority (tie or split)
922+ return top_base
901923
902924 # Build consensus for bc1
903925 bc1_len = len (bc1_list [0 ])
@@ -1025,6 +1047,10 @@ def splitcode_demux_fastqs(
10251047 raise FileNotFoundError (f"R2 FASTQ not found: { fastq_r2 } " )
10261048 if not os .path .exists (samplesheet ):
10271049 raise FileNotFoundError (f"Samplesheet not found: { samplesheet } " )
1050+ if max_barcode_mismatches < 0 :
1051+ raise ValueError (f"max_barcode_mismatches must be >= 0, got { max_barcode_mismatches } " )
1052+ if num_reads_for_barcode < 1 :
1053+ raise ValueError (f"num_reads_for_barcode must be >= 1, got { num_reads_for_barcode } " )
10281054
10291055 # Parse RunInfo.xml if provided (unless flowcell_id and run_date both overridden)
10301056 runinfo_obj = None
0 commit comments