|
| 1 | +import numpy as np |
| 2 | + |
| 3 | + |
| 4 | +def calc_switch_error_rate(): |
| 5 | + """ |
| 6 | + Run this function at root directory and input the path to the |
| 7 | + assessed haplotype file. |
| 8 | + The SER and PER_intra calculation satisfies the definition from: |
| 9 | + https://www.cell.com/hgg-advances/fulltext/S2666-2477(25)00082-X |
| 10 | + """ |
| 11 | + nLociAll = 2000 |
| 12 | + true_path = "tests/accuracy_tests/sim_for_alphapeel_accu_test/true-hap_0.5.txt" |
| 13 | + file_name = input("Enter the path of the assessed haplotype file: ") |
| 14 | + new_file = np.loadtxt(file_name, usecols=np.arange(1, nLociAll + 1)) |
| 15 | + true_file = np.loadtxt(true_path, usecols=np.arange(1, nLociAll + 1)) |
| 16 | + |
| 17 | + nInd = 1000 |
| 18 | + switch_error_count = 0 |
| 19 | + phase_error_count = 0 |
| 20 | + uncalled_count = 0 |
| 21 | + wrong_homo_count = 0 |
| 22 | + true_hetero_count = 0 |
| 23 | + homo_count = 0 |
| 24 | + hetero_count = 0 |
| 25 | + for ind in range(nInd): |
| 26 | + hap_p_new = new_file[ind * 2] |
| 27 | + hap_m_new = new_file[ind * 2 + 1] |
| 28 | + hap_p_true = true_file[ind * 2] |
| 29 | + hap_m_true = true_file[ind * 2 + 1] |
| 30 | + |
| 31 | + switched = False |
| 32 | + for loci in range(nLociAll): |
| 33 | + if hap_p_true[loci] == hap_m_true[loci]: |
| 34 | + homo_count += 1 |
| 35 | + else: |
| 36 | + hetero_count += 1 |
| 37 | + if hap_p_new[loci] == 9 or hap_m_new[loci] == 9: |
| 38 | + uncalled_count += 1 |
| 39 | + continue |
| 40 | + if hap_p_true[loci] + hap_m_true[loci] == 1: |
| 41 | + if hap_p_new[loci] == hap_m_new[loci]: |
| 42 | + wrong_homo_count += 1 |
| 43 | + continue |
| 44 | + true_hetero_count += 1 |
| 45 | + if hap_p_new[loci] != hap_p_true[loci]: |
| 46 | + phase_error_count += 1 |
| 47 | + if (hap_p_new[loci] != hap_p_true[loci] and switched is False) or ( |
| 48 | + hap_p_new[loci] == hap_p_true[loci] and switched is True |
| 49 | + ): |
| 50 | + switched = not switched |
| 51 | + switch_error_count += 1 |
| 52 | + |
| 53 | + print(f"Switch error rate: {switch_error_count / (nInd * (nLociAll - 1))}") |
| 54 | + print(f"Phase error (intra) rate: {phase_error_count / (nInd * nLociAll)}") |
| 55 | + print(f"Uncalled rate: {uncalled_count / (nInd * nLociAll)}") |
| 56 | + print(f"Wrong homozygote rate: {wrong_homo_count / (nInd * nLociAll)}") |
| 57 | + print(f"True heterozygote rate: {true_hetero_count / (nInd * nLociAll)}") |
| 58 | + print(f"Homozygote count in true genotype: {homo_count}") |
| 59 | + print(f"Heterozygote count in true genotype: {hetero_count}") |
| 60 | + print(f"Homo to hetero ratio: {homo_count / hetero_count}") |
| 61 | + |
| 62 | + |
| 63 | +def main(): |
| 64 | + calc_switch_error_rate() |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + main() |
0 commit comments