Skip to content

Commit b72113c

Browse files
authored
Merge pull request #184 from dvitale199/related
Related
2 parents 8fb1ff0 + 6492cd5 commit b72113c

File tree

1 file changed

+23
-28
lines changed

1 file changed

+23
-28
lines changed

genotools/qc.py

+23-28
Original file line numberDiff line numberDiff line change
@@ -416,53 +416,51 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
416416
if related_cutoff < 0 or related_cutoff > 1 or duplicated_cutoff < 0 or duplicated_cutoff > 1:
417417
raise ValueError("related_cutoff and duplicated_cutoff should be between 0 and 1.")
418418

419-
grm1 = f"{out_path}_total_grm"
420-
grm2 = f"{out_path}_related_grm"
421-
grm3 = f"{out_path}_duplicated_grm"
419+
king1 = f"{out_path}_related_king"
420+
king2 = f"{out_path}_duplicated_king"
422421

423422
related_pairs = f"{out_path}_pairs"
424423
related_out = f"{related_pairs}.related"
425424
related_pruned_out = f"{out_path}.pruned"
426425

427426
# create pfiles
428-
king_cmd1 = f'{plink2_exec} --pfile {geno_path} --hwe 0.0001 --mac 2 --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {grm1}'
429427
# create table of related pairs
430-
king_cmd2 = f'{plink2_exec} --pfile {grm1} --make-king-table --make-king triangle bin --king-table-filter {related_cutoff} --out {related_pairs}'
428+
king_cmd1 = f'{plink2_exec} --pfile {geno_path} --make-king-table --make-king triangle bin --king-table-filter {related_cutoff} --out {related_pairs}'
431429
# see if any samples are related (includes duplicates)
432-
king_cmd3 = f'{plink2_exec} --pfile {grm1} --king-cutoff {related_pairs} {related_cutoff} --out {grm2}'
430+
king_cmd2 = f'{plink2_exec} --pfile {geno_path} --king-cutoff {related_pairs} {related_cutoff} --out {king1}'
433431
# see if any samples are duplicated (grm cutoff >= 0.354)
434-
king_cmd4 = f'{plink2_exec} --pfile {grm1} --king-cutoff {related_pairs} {duplicated_cutoff} --out {grm3}'
432+
king_cmd3 = f'{plink2_exec} --pfile {geno_path} --king-cutoff {related_pairs} {duplicated_cutoff} --out {king2}'
435433

436-
cmds = [king_cmd1, king_cmd2, king_cmd3, king_cmd4]
434+
cmds = [king_cmd1, king_cmd2, king_cmd3]
437435
for cmd in cmds:
438436
shell_do(cmd)
439437

440-
listOfFiles = [f'{grm1}.log', f'{related_pairs}.log', f'{grm2}.log', f'{grm3}.log']
438+
listOfFiles = [f'{related_pairs}.log', f'{king1}.log', f'{king2}.log']
441439
concat_logs(step, out_path, listOfFiles)
442440

443-
if os.path.isfile(f'{related_pairs}.kin0') and os.path.isfile(f'{grm2}.king.cutoff.out.id') and os.path.isfile(f'{grm3}.king.cutoff.out.id'):
441+
if os.path.isfile(f'{related_pairs}.kin0') and os.path.isfile(f'{king1}.king.cutoff.out.id') and os.path.isfile(f'{king2}.king.cutoff.out.id'):
444442

445443
# create .related related pair sample files
446444
kinship = pd.read_csv(f'{related_pairs}.kin0', sep='\s+')
447445
kinship['REL'] = pd.cut(x=kinship['KINSHIP'], bins=[-np.inf, 0.0884, 0.177, 0.354, np.inf], labels=['unrel', 'second_deg', 'first_deg', 'duplicate'])
448446
kinship.to_csv(f'{related_pairs}.related', index=False)
449447

450448
# create .related and .duplicated single sample files
451-
shutil.copy(f'{grm2}.king.cutoff.out.id',f'{grm2}.related')
452-
related_count = sum(1 for line in open(f'{grm2}.related'))
449+
shutil.copy(f'{king1}.king.cutoff.out.id',f'{king1}.related')
450+
related_count = sum(1 for line in open(f'{king1}.related'))
453451

454-
shutil.copy(f'{grm3}.king.cutoff.out.id',f'{grm3}.duplicated')
455-
duplicated_count = sum(1 for line in open(f'{grm3}.duplicated'))
452+
shutil.copy(f'{king2}.king.cutoff.out.id',f'{king2}.duplicated')
453+
duplicated_count = sum(1 for line in open(f'{king2}.duplicated'))
456454

457455
related_count = related_count - duplicated_count
458-
duplicated = pd.read_csv(f'{grm3}.duplicated', sep = '\s+')
456+
duplicated = pd.read_csv(f'{king2}.duplicated', sep = '\s+')
459457

460458
# concat duplicated sample ids to related sample ids, drop_duplicates(keep='last) because all duplicated would also be considered related
461459
if prune_related and prune_duplicated:
462-
plink_cmd1 = f'{plink2_exec} --pfile {grm1} --remove {grm2}.king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {out_path}'
460+
plink_cmd1 = f'{plink2_exec} --pfile {geno_path} --remove {king1}.king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {out_path}'
463461
shell_do(plink_cmd1)
464462

465-
related = pd.read_csv(f'{grm2}.related', sep = '\s+')
463+
related = pd.read_csv(f'{king1}.related', sep = '\s+')
466464
grm_pruned = pd.concat([related, duplicated], ignore_index=True)
467465

468466
if '#FID' in grm_pruned:
@@ -475,7 +473,7 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
475473
process_complete = True
476474

477475
if prune_duplicated and not prune_related:
478-
plink_cmd1 = f'{plink2_exec} --pfile {grm1} --remove {grm3}.king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {out_path}'
476+
plink_cmd1 = f'{plink2_exec} --pfile {geno_path} --remove {king2}.king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {out_path}'
479477
shell_do(plink_cmd1)
480478

481479
grm_pruned = duplicated
@@ -513,15 +511,12 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
513511
concat_logs(step, out_path, listOfFiles)
514512

515513
# remove intermediate files
516-
os.remove(f'{grm1}.pgen')
517-
os.remove(f'{grm1}.psam')
518-
os.remove(f'{grm1}.pvar')
519-
os.remove(f'{grm2}.king.cutoff.in.id')
520-
os.remove(f'{grm2}.king.cutoff.out.id')
521-
os.remove(f'{grm2}.related')
522-
os.remove(f'{grm3}.duplicated')
523-
os.remove(f'{grm3}.king.cutoff.in.id')
524-
os.remove(f'{grm3}.king.cutoff.out.id')
514+
os.remove(f'{king1}.king.cutoff.in.id')
515+
os.remove(f'{king1}.king.cutoff.out.id')
516+
os.remove(f'{king1}.related')
517+
os.remove(f'{king2}.duplicated')
518+
os.remove(f'{king2}.king.cutoff.in.id')
519+
os.remove(f'{king2}.king.cutoff.out.id')
525520
os.remove(f'{related_pairs}.king.bin')
526521
os.remove(f'{related_pairs}.king.id')
527522
os.remove(f'{related_pairs}.kin0')
@@ -545,7 +540,7 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
545540
outfiles_dict = {
546541
'pruned_samples': 'Related Pruning Failed',
547542
'related_samples': None,
548-
'plink_out': [grm1, grm2, grm3]
543+
'plink_out': [king1, king2]
549544
}
550545

551546
metrics_dict = {

0 commit comments

Comments
 (0)