@@ -452,6 +452,15 @@ def alt_samples(record):
452452 return carriers
453453
454454
455+ def format_svtype (record ):
456+ value = record.info.get ('SVTYPE' , '.' )
457+ if isinstance (value , (tuple , list )):
458+ if len (value ) == 0 :
459+ return '.'
460+ return str (value [0 ])
461+ return str (value )
462+
463+
455464def format_af (record ):
456465 if 'AF' in record.info :
457466 value = record.info ['AF' ]
@@ -493,6 +502,7 @@ for record in comp_vcf.fetch():
493502 'comp_id' : comp_id ,
494503 'sr_id' : record.id if record.id is not None else '.' ,
495504 'sr_af' : format_af (record ),
505+ 'sr_svtype' : format_svtype (record ),
496506 'samples' : alt_samples (record )
497507 }
498508 base_key = (record.chrom , base_id )
@@ -549,17 +559,18 @@ with open(out_tsv, 'w') as out:
549559 for match in candidate_matches :
550560 if require_sample_overlap and len (base_carriers.intersection (match ['samples' ])) == 0 :
551561 continue
552- kept.append ((match ['sr_id '], match ['sr_af ']))
562+ kept.append ((match ['sr_id '], match ['sr_af '], match [' sr_svtype '] ))
553563
554564 if not kept :
555565 continue
556566
557- kept = sorted (set (kept ), key = lambda x : (x [0 ], x [1 ]))
567+ kept = sorted (set (kept ), key = lambda x : (x [0 ], x [1 ], x [ 2 ] ))
558568 alt_value = ',' .join (record.alts ) if record.alts is not None else '.'
559569 sr_ids = ',' .join (item [0 ] for item in kept )
560570 sr_afs = ',' .join (item [1 ] for item in kept )
571+ sr_svtypes = ',' .join (item [2 ] for item in kept )
561572 lr_id = record.id if record.id is not None else '.'
562- out.write (f "{record.chrom}\t{record.pos}\t{record.ref}\t{alt_value}\t{lr_id}\t{sr_ids}\t{sr_afs}\n" )
573+ out.write (f "{record.chrom}\t{record.pos}\t{record.ref}\t{alt_value}\t{lr_id}\t{sr_ids}\t{sr_afs}\t{sr_svtypes}\ n" )
563574base_vcf.close ()
564575EOF
565576 >>>
@@ -652,12 +663,17 @@ out_vcf = sys.argv[3]
652663annotations = {}
653664with open (anno_tsv , 'r' ) as handle :
654665 for line in handle :
655- chrom , pos , ref , alt , lr_id , sr_ids , sr_afs = line.rstrip ('\n' ).split ('\t' )
656- annotations [(chrom , pos , ref , alt , lr_id )] = (tuple (sr_ids.split (',' )), tuple (sr_afs.split (',' )))
666+ chrom , pos , ref , alt , lr_id , sr_ids , sr_afs , sr_svtypes = line.rstrip ('\n' ).split ('\t' )
667+ annotations [(chrom , pos , ref , alt , lr_id )] = (
668+ tuple (sr_ids.split (',' )),
669+ tuple (sr_afs.split (',' )),
670+ tuple (sr_svtypes.split (',' ))
671+ )
657672
658673header = vcf_in.header.copy ()
659674header.add_line ('##INFO=<ID=SR_MATCH_IDS,Number=.,Type=String,Description="Matched short-read structural variant IDs from Truvari benchmarking">' )
660675header.add_line ('##INFO=<ID=SR_MATCH_AFS,Number=.,Type=String,Description="Allele frequencies of matched short-read structural variants, parallel to SR_MATCH_IDS">' )
676+ header.add_line ('##INFO=<ID=SR_MATCH_SVTYPES,Number=.,Type=String,Description="SVTYPE values of matched short-read structural variants, parallel to SR_MATCH_IDS">' )
661677header.add_line ('##INFO=<ID=SR_MATCH_COUNT,Number=1,Type=Integer,Description="Number of matched short-read structural variants">' )
662678
663679vcf_out = VariantFile (out_vcf , 'w' , header = header )
@@ -666,9 +682,10 @@ for record in vcf_in:
666682 lr_id = record.id if record.id is not None else '.'
667683 key = (record.chrom , str (record.pos ), record.ref , alt , lr_id )
668684 if key in annotations :
669- sr_ids , sr_afs = annotations [key ]
685+ sr_ids , sr_afs , sr_svtypes = annotations [key ]
670686 record.info ['SR_MATCH_IDS' ] = sr_ids
671687 record.info ['SR_MATCH_AFS' ] = sr_afs
688+ record.info ['SR_MATCH_SVTYPES' ] = sr_svtypes
672689 record.info ['SR_MATCH_COUNT' ] = len (sr_ids )
673690 vcf_out.write (record )
674691
0 commit comments