@@ -416,53 +416,51 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
416
416
if related_cutoff < 0 or related_cutoff > 1 or duplicated_cutoff < 0 or duplicated_cutoff > 1 :
417
417
raise ValueError ("related_cutoff and duplicated_cutoff should be between 0 and 1." )
418
418
419
- grm1 = f"{ out_path } _total_grm"
420
- grm2 = f"{ out_path } _related_grm"
421
- grm3 = f"{ out_path } _duplicated_grm"
419
+ king1 = f"{ out_path } _related_king"
420
+ king2 = f"{ out_path } _duplicated_king"
422
421
423
422
related_pairs = f"{ out_path } _pairs"
424
423
related_out = f"{ related_pairs } .related"
425
424
related_pruned_out = f"{ out_path } .pruned"
426
425
427
426
# create pfiles
428
- king_cmd1 = f'{ plink2_exec } --pfile { geno_path } --hwe 0.0001 --mac 2 --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { grm1 } '
429
427
# create table of related pairs
430
- king_cmd2 = f'{ plink2_exec } --pfile { grm1 } --make-king-table --make-king triangle bin --king-table-filter { related_cutoff } --out { related_pairs } '
428
+ king_cmd1 = f'{ plink2_exec } --pfile { geno_path } --make-king-table --make-king triangle bin --king-table-filter { related_cutoff } --out { related_pairs } '
431
429
# see if any samples are related (includes duplicates)
432
- king_cmd3 = f'{ plink2_exec } --pfile { grm1 } --king-cutoff { related_pairs } { related_cutoff } --out { grm2 } '
430
+ king_cmd2 = f'{ plink2_exec } --pfile { geno_path } --king-cutoff { related_pairs } { related_cutoff } --out { king1 } '
433
431
# see if any samples are duplicated (grm cutoff >= 0.354)
434
- king_cmd4 = f'{ plink2_exec } --pfile { grm1 } --king-cutoff { related_pairs } { duplicated_cutoff } --out { grm3 } '
432
+ king_cmd3 = f'{ plink2_exec } --pfile { geno_path } --king-cutoff { related_pairs } { duplicated_cutoff } --out { king2 } '
435
433
436
- cmds = [king_cmd1 , king_cmd2 , king_cmd3 , king_cmd4 ]
434
+ cmds = [king_cmd1 , king_cmd2 , king_cmd3 ]
437
435
for cmd in cmds :
438
436
shell_do (cmd )
439
437
440
- listOfFiles = [f'{ grm1 } .log' , f' { related_pairs } .log' , f'{ grm2 } .log' , f'{ grm3 } .log' ]
438
+ listOfFiles = [f'{ related_pairs } .log' , f'{ king1 } .log' , f'{ king2 } .log' ]
441
439
concat_logs (step , out_path , listOfFiles )
442
440
443
- if os .path .isfile (f'{ related_pairs } .kin0' ) and os .path .isfile (f'{ grm2 } .king.cutoff.out.id' ) and os .path .isfile (f'{ grm3 } .king.cutoff.out.id' ):
441
+ if os .path .isfile (f'{ related_pairs } .kin0' ) and os .path .isfile (f'{ king1 } .king.cutoff.out.id' ) and os .path .isfile (f'{ king2 } .king.cutoff.out.id' ):
444
442
445
443
# create .related related pair sample files
446
444
kinship = pd .read_csv (f'{ related_pairs } .kin0' , sep = '\s+' )
447
445
kinship ['REL' ] = pd .cut (x = kinship ['KINSHIP' ], bins = [- np .inf , 0.0884 , 0.177 , 0.354 , np .inf ], labels = ['unrel' , 'second_deg' , 'first_deg' , 'duplicate' ])
448
446
kinship .to_csv (f'{ related_pairs } .related' , index = False )
449
447
450
448
# create .related and .duplicated single sample files
451
- shutil .copy (f'{ grm2 } .king.cutoff.out.id' ,f'{ grm2 } .related' )
452
- related_count = sum (1 for line in open (f'{ grm2 } .related' ))
449
+ shutil .copy (f'{ king1 } .king.cutoff.out.id' ,f'{ king1 } .related' )
450
+ related_count = sum (1 for line in open (f'{ king1 } .related' ))
453
451
454
- shutil .copy (f'{ grm3 } .king.cutoff.out.id' ,f'{ grm3 } .duplicated' )
455
- duplicated_count = sum (1 for line in open (f'{ grm3 } .duplicated' ))
452
+ shutil .copy (f'{ king2 } .king.cutoff.out.id' ,f'{ king2 } .duplicated' )
453
+ duplicated_count = sum (1 for line in open (f'{ king2 } .duplicated' ))
456
454
457
455
related_count = related_count - duplicated_count
458
- duplicated = pd .read_csv (f'{ grm3 } .duplicated' , sep = '\s+' )
456
+ duplicated = pd .read_csv (f'{ king2 } .duplicated' , sep = '\s+' )
459
457
460
458
# concat duplicated sample ids to related sample ids, drop_duplicates(keep='last) because all duplicated would also be considered related
461
459
if prune_related and prune_duplicated :
462
- plink_cmd1 = f'{ plink2_exec } --pfile { grm1 } --remove { grm2 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
460
+ plink_cmd1 = f'{ plink2_exec } --pfile { geno_path } --remove { king1 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
463
461
shell_do (plink_cmd1 )
464
462
465
- related = pd .read_csv (f'{ grm2 } .related' , sep = '\s+' )
463
+ related = pd .read_csv (f'{ king1 } .related' , sep = '\s+' )
466
464
grm_pruned = pd .concat ([related , duplicated ], ignore_index = True )
467
465
468
466
if '#FID' in grm_pruned :
@@ -475,7 +473,7 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
475
473
process_complete = True
476
474
477
475
if prune_duplicated and not prune_related :
478
- plink_cmd1 = f'{ plink2_exec } --pfile { grm1 } --remove { grm3 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
476
+ plink_cmd1 = f'{ plink2_exec } --pfile { geno_path } --remove { king2 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
479
477
shell_do (plink_cmd1 )
480
478
481
479
grm_pruned = duplicated
@@ -513,15 +511,12 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
513
511
concat_logs (step , out_path , listOfFiles )
514
512
515
513
# remove intermediate files
516
- os .remove (f'{ grm1 } .pgen' )
517
- os .remove (f'{ grm1 } .psam' )
518
- os .remove (f'{ grm1 } .pvar' )
519
- os .remove (f'{ grm2 } .king.cutoff.in.id' )
520
- os .remove (f'{ grm2 } .king.cutoff.out.id' )
521
- os .remove (f'{ grm2 } .related' )
522
- os .remove (f'{ grm3 } .duplicated' )
523
- os .remove (f'{ grm3 } .king.cutoff.in.id' )
524
- os .remove (f'{ grm3 } .king.cutoff.out.id' )
514
+ os .remove (f'{ king1 } .king.cutoff.in.id' )
515
+ os .remove (f'{ king1 } .king.cutoff.out.id' )
516
+ os .remove (f'{ king1 } .related' )
517
+ os .remove (f'{ king2 } .duplicated' )
518
+ os .remove (f'{ king2 } .king.cutoff.in.id' )
519
+ os .remove (f'{ king2 } .king.cutoff.out.id' )
525
520
os .remove (f'{ related_pairs } .king.bin' )
526
521
os .remove (f'{ related_pairs } .king.id' )
527
522
os .remove (f'{ related_pairs } .kin0' )
@@ -545,7 +540,7 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
545
540
outfiles_dict = {
546
541
'pruned_samples' : 'Related Pruning Failed' ,
547
542
'related_samples' : None ,
548
- 'plink_out' : [grm1 , grm2 , grm3 ]
543
+ 'plink_out' : [king1 , king2 ]
549
544
}
550
545
551
546
metrics_dict = {
0 commit comments