Skip to content

Commit 7baf550

Browse files
output SR SVTYPEs
1 parent bef7f04 commit 7baf550

File tree

1 file changed

+23
-6
lines changed

1 file changed

+23
-6
lines changed

wdl/SR_LR_comparison.wdl

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,15 @@ def alt_samples(record):
452452
return carriers
453453
454454
455+
def format_svtype(record):
456+
value = record.info.get('SVTYPE', '.')
457+
if isinstance(value, (tuple, list)):
458+
if len(value) == 0:
459+
return '.'
460+
return str(value[0])
461+
return str(value)
462+
463+
455464
def format_af(record):
456465
if 'AF' in record.info:
457466
value = record.info['AF']
@@ -493,6 +502,7 @@ for record in comp_vcf.fetch():
493502
'comp_id': comp_id,
494503
'sr_id': record.id if record.id is not None else '.',
495504
'sr_af': format_af(record),
505+
'sr_svtype': format_svtype(record),
496506
'samples': alt_samples(record)
497507
}
498508
base_key = (record.chrom, base_id)
@@ -549,17 +559,18 @@ with open(out_tsv, 'w') as out:
549559
for match in candidate_matches:
550560
if require_sample_overlap and len(base_carriers.intersection(match['samples'])) == 0:
551561
continue
552-
kept.append((match['sr_id'], match['sr_af']))
562+
kept.append((match['sr_id'], match['sr_af'], match['sr_svtype']))
553563
554564
if not kept:
555565
continue
556566
557-
kept = sorted(set(kept), key=lambda x: (x[0], x[1]))
567+
kept = sorted(set(kept), key=lambda x: (x[0], x[1], x[2]))
558568
alt_value = ','.join(record.alts) if record.alts is not None else '.'
559569
sr_ids = ','.join(item[0] for item in kept)
560570
sr_afs = ','.join(item[1] for item in kept)
571+
sr_svtypes = ','.join(item[2] for item in kept)
561572
lr_id = record.id if record.id is not None else '.'
562-
out.write(f"{record.chrom}\t{record.pos}\t{record.ref}\t{alt_value}\t{lr_id}\t{sr_ids}\t{sr_afs}\n")
573+
out.write(f"{record.chrom}\t{record.pos}\t{record.ref}\t{alt_value}\t{lr_id}\t{sr_ids}\t{sr_afs}\t{sr_svtypes}\n")
563574
base_vcf.close()
564575
EOF
565576
>>>
@@ -652,12 +663,17 @@ out_vcf = sys.argv[3]
652663
annotations = {}
653664
with open(anno_tsv, 'r') as handle:
654665
for line in handle:
655-
chrom, pos, ref, alt, lr_id, sr_ids, sr_afs = line.rstrip('\n').split('\t')
656-
annotations[(chrom, pos, ref, alt, lr_id)] = (tuple(sr_ids.split(',')), tuple(sr_afs.split(',')))
666+
chrom, pos, ref, alt, lr_id, sr_ids, sr_afs, sr_svtypes = line.rstrip('\n').split('\t')
667+
annotations[(chrom, pos, ref, alt, lr_id)] = (
668+
tuple(sr_ids.split(',')),
669+
tuple(sr_afs.split(',')),
670+
tuple(sr_svtypes.split(','))
671+
)
657672
658673
header = vcf_in.header.copy()
659674
header.add_line('##INFO=<ID=SR_MATCH_IDS,Number=.,Type=String,Description="Matched short-read structural variant IDs from Truvari benchmarking">')
660675
header.add_line('##INFO=<ID=SR_MATCH_AFS,Number=.,Type=String,Description="Allele frequencies of matched short-read structural variants, parallel to SR_MATCH_IDS">')
676+
header.add_line('##INFO=<ID=SR_MATCH_SVTYPES,Number=.,Type=String,Description="SVTYPE values of matched short-read structural variants, parallel to SR_MATCH_IDS">')
661677
header.add_line('##INFO=<ID=SR_MATCH_COUNT,Number=1,Type=Integer,Description="Number of matched short-read structural variants">')
662678
663679
vcf_out = VariantFile(out_vcf, 'w', header=header)
@@ -666,9 +682,10 @@ for record in vcf_in:
666682
lr_id = record.id if record.id is not None else '.'
667683
key = (record.chrom, str(record.pos), record.ref, alt, lr_id)
668684
if key in annotations:
669-
sr_ids, sr_afs = annotations[key]
685+
sr_ids, sr_afs, sr_svtypes = annotations[key]
670686
record.info['SR_MATCH_IDS'] = sr_ids
671687
record.info['SR_MATCH_AFS'] = sr_afs
688+
record.info['SR_MATCH_SVTYPES'] = sr_svtypes
672689
record.info['SR_MATCH_COUNT'] = len(sr_ids)
673690
vcf_out.write(record)
674691

0 commit comments

Comments
 (0)