Skip to content

Commit f84f702

Browse files
committed
catch rare product None cases #415
1 parent a8247e7 commit f84f702

1 file changed

Lines changed: 6 additions & 5 deletions

File tree

bakta/features/cds.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -560,21 +560,22 @@ def revise_special_cases_annotated(data: dict, cdss: Sequence[dict]):
560560
for cds in seq_cdss:
561561
# look for supposedly truncated genes on rotated sequences
562562
if cds['start'] == 1 and cds['strand'] == bc.STRAND_FORWARD and 'truncated' in cds and cds['start_type'] == 'Edge' and cds['rbs_motif'] is None:
563-
cds_gene_symbol = cds.get('gene', '-')
564-
if RE_CHROM_ROTATION_GENE.fullmatch(cds_gene_symbol) or any(map(lambda x: x is not None, [RE_CHROM_ROTATION_GENE.fullmatch(term) for term in cds['product'].split()])):
563+
cds_gene_symbol = cds.get('gene', '-') or '-'
564+
cds_product = cds.get('product', '-') or '-'
565+
if RE_CHROM_ROTATION_GENE.fullmatch(cds_gene_symbol) or any(map(lambda x: x is not None, [RE_CHROM_ROTATION_GENE.fullmatch(term) for term in cds_product.split()])):
565566
# look for dnaA genes on rotated chromosome starts
566567
cds.pop('truncated')
567568
log.info(
568569
'revise supposedly truncated dnaA gene on rotated chromosome start: seq=%s, start=%i, stop=%i, strand=%s, gene=%s, product=%s, nt=[%s..%s], aa=[%s..%s]',
569-
cds['sequence'], cds['start'], cds['stop'], cds['strand'], cds_gene_symbol, cds['product'], cds['nt'][:10], cds['nt'][-10:], cds['aa'][:10], cds['aa'][-10:]
570+
cds['sequence'], cds['start'], cds['stop'], cds['strand'], cds_gene_symbol, cds_product, cds['nt'][:10], cds['nt'][-10:], cds['aa'][:10], cds['aa'][-10:]
570571
)
571572
break
572-
elif RE_PLASMID_ROTATION_GENE.fullmatch(cds_gene_symbol) or any(map(lambda x: x is not None, [RE_PLASMID_ROTATION_GENE.fullmatch(term) for term in cds['product'].split()])):
573+
elif RE_PLASMID_ROTATION_GENE.fullmatch(cds_gene_symbol) or any(map(lambda x: x is not None, [RE_PLASMID_ROTATION_GENE.fullmatch(term) for term in cds_product.split()])):
573574
# look for repABC|parAB genes on rotated plasmid starts
574575
cds.pop('truncated')
575576
log.info(
576577
'revise supposedly truncated repABC/parAB gene on rotated plasmid start: seq=%s, start=%i, stop=%i, strand=%s, gene=%s, product=%s, nt=[%s..%s], aa=[%s..%s]',
577-
cds['sequence'], cds['start'], cds['stop'], cds['strand'], cds_gene_symbol, cds['product'], cds['nt'][:10], cds['nt'][-10:], cds['aa'][:10], cds['aa'][-10:]
578+
cds['sequence'], cds['start'], cds['stop'], cds['strand'], cds_gene_symbol, cds_product, cds['nt'][:10], cds['nt'][-10:], cds['aa'][:10], cds['aa'][-10:]
578579
)
579580
break
580581

0 commit comments

Comments
 (0)