Skip to content

Commit 3eba8a3

Browse files
committed
implement terminator export
1 parent 7931333 commit 3eba8a3

4 files changed

Lines changed: 25 additions & 4 deletions

File tree

bakta/io/fasta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def write_ffn(features: Sequence[dict], ffn_path: Path):
9797
log.info('write feature nucleotide sequences: path=%s', ffn_path)
9898
with ffn_path.open('wt') as fh:
9999
for feat in features:
100-
if(feat['type'] in [bc.FEATURE_T_RNA, bc.FEATURE_TM_RNA, bc.FEATURE_R_RNA, bc.FEATURE_NC_RNA, bc.FEATURE_NC_RNA_REGION, bc.FEATURE_CRISPR, bc.FEATURE_CDS, bc.FEATURE_SORF, bc.FEATURE_ORIC, bc.FEATURE_ORIV, bc.FEATURE_ORIT]):
100+
if(feat['type'] in [bc.FEATURE_T_RNA, bc.FEATURE_TM_RNA, bc.FEATURE_R_RNA, bc.FEATURE_NC_RNA, bc.FEATURE_NC_RNA_REGION, bc.FEATURE_CRISPR, bc.FEATURE_CDS, bc.FEATURE_SORF, bc.FEATURE_TERMINATOR, bc.FEATURE_ORIC, bc.FEATURE_ORIV, bc.FEATURE_ORIT]):
101101
identifier = feat['locus'] if 'locus' in feat else feat['id']
102102
if(feat.get('product', '') != ''):
103103
fh.write(f">{identifier} {feat['product']}\n{feat['nt']}\n")

bakta/io/gff.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,19 @@ def write_features(data: dict, features_by_sequence: Dict[str, dict], gff3_path:
315315
fh.write(f"{seq_id}\tBakta\t{so.SO_CDS.name}\t{start}\t{stop}\t.\t{feat['strand']}\t0\t{annotations}\n")
316316
if(bc.FEATURE_SIGNAL_PEPTIDE in feat):
317317
write_signal_peptide(fh, feat)
318+
elif(feat['type'] == bc.FEATURE_TERMINATOR):
319+
annotations = {
320+
'ID': feat['id'],
321+
'Name': feat['product'],
322+
'product': feat['product'],
323+
'Dbxref': feat['db_xrefs']
324+
}
325+
if(cfg.compliant):
326+
annotations['inference'] = 'profile:TranstermHP:2.0'
327+
annotations['Dbxref'], annotations['Note'] = insdc.revise_dbxref_insdc(feat['db_xrefs']) # remove INSDC invalid DbXrefs
328+
annotations[bc.INSDC_FEATURE_REGULATORY_CLASS] = insdc.select_regulatory_class(feat)
329+
annotations = encode_annotations(annotations)
330+
fh.write(f"{seq_id}\tTranstermHP\t{so.SO_REGULATORY_REGION.name}\t{start}\t{stop}\t.\t{feat['strand']}\t.\t{annotations}\n")
318331
elif(feat['type'] == bc.FEATURE_GAP):
319332
annotations = {
320333
'ID': feat['id'],

bakta/io/insdc.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,12 @@ def build_biopython_sequence_list(data: dict, features: Sequence[dict]):
215215
insdc_feature_type = bc.INSDC_FEATURE_REGULATORY
216216
qualifiers['note'].append(feature['product'])
217217
qualifiers.pop('product', None)
218+
elif(feature['type'] == bc.FEATURE_TERMINATOR):
219+
qualifiers['inference'] = 'profile:TranstermHP:2.0'
220+
qualifiers[bc.INSDC_FEATURE_REGULATORY_CLASS] = select_regulatory_class(feature)
221+
insdc_feature_type = bc.INSDC_FEATURE_REGULATORY
222+
qualifiers['note'].append(feature['product'])
223+
qualifiers.pop('product', None)
218224
elif(feature['type'] == bc.FEATURE_CRISPR):
219225
qualifiers[bc.INSDC_FEATURE_REPEAT_FAMILY] = 'CRISPR'
220226
qualifiers[bc.INSDC_FEATURE_REPEAT_TYPE] = 'direct'
@@ -335,7 +341,9 @@ def select_regulatory_class(feature: dict) -> str:
335341
feature_class = so.SO(feature_class[0], feature_class[1])
336342
feature['class'] = feature_class
337343

338-
if(feature_class.id == so.SO_CIS_REG_ATTENUATOR.id):
344+
if(feature_class.id == so.SO_CIS_REG_TERMINATOR.id):
345+
return bc.INSDC_FEATURE_REGULATORY_CLASS_TERMINATOR
346+
elif(feature_class.id == so.SO_CIS_REG_ATTENUATOR.id):
339347
return bc.INSDC_FEATURE_REGULATORY_CLASS_ATTENUATOR
340348
elif(feature_class.id == so.SO_CIS_REG_RIBOSWITCH.id):
341349
return bc.INSDC_FEATURE_REGULATORY_CLASS_RIBOSWITCH

bakta/io/tsv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ def write_feature_inferences(sequences: Sequence[dict], features_by_sequence: Di
133133
])
134134
)
135135
fh.write('\n')
136-
elif(feat['type'] in [bc.FEATURE_T_RNA, bc.FEATURE_R_RNA, bc.FEATURE_NC_RNA, bc.FEATURE_NC_RNA_REGION]):
137-
accession = '-' if feat['type'] == bc.FEATURE_T_RNA else [xref for xref in feat['db_xrefs'] if bc.DB_XREF_RFAM in xref][0]
136+
elif(feat['type'] in [bc.FEATURE_T_RNA, bc.FEATURE_R_RNA, bc.FEATURE_NC_RNA, bc.FEATURE_NC_RNA_REGION, bc.FEATURE_TERMINATOR]):
137+
accession = '-' if feat['type'] in [bc.FEATURE_T_RNA, bc.FEATURE_TERMINATOR] else [xref for xref in feat['db_xrefs'] if bc.DB_XREF_RFAM in xref][0]
138138
fh.write('\t'.join(
139139
[
140140
feat['sequence'] if 'sequence' in feat else feat['contig'], # <1.10.0 compatibility

0 commit comments

Comments
 (0)