diff --git a/count_cutoffs.py b/count_cutoffs.py new file mode 100644 index 0000000..5a59b71 --- /dev/null +++ b/count_cutoffs.py @@ -0,0 +1,26 @@ +from argnorm.lib import get_aro_mapping_table +from argnorm.lib import DATABASES +import pandas as pd + +dbs = DATABASES +mapping_tables = [] +groot_added = False + +for db in DATABASES: + if 'groot' in db: + if not groot_added: + mapping_tables.append(get_aro_mapping_table(db)) + groot_added = True + else: + mapping_tables.append(get_aro_mapping_table(db)) + +combined_mapping_table = pd.concat(mapping_tables) + +tot_num_genes = len(combined_mapping_table['Cut_Off']) +print(f'Total number of genes: {tot_num_genes}\n') + +value_counts = combined_mapping_table['Cut_Off'].value_counts().to_dict() +print(f'Total # of perfect hits: {value_counts["Perfect"]}. This is {value_counts["Perfect"] / tot_num_genes * 100}% of the total hits.') +print(f'Total # of strict hits: {value_counts["Strict"]}. This is {value_counts["Strict"] / tot_num_genes * 100}% of the total hits.') +print(f'Total # of loose hits: {value_counts["Loose"]}. This is {value_counts["Loose"] / tot_num_genes * 100}% of the total hits.') +print(f'Total # of manual curation genes: {value_counts["Manual"]}. This is {value_counts["Manual"] / tot_num_genes * 100}% of the total hits.') diff --git a/readme.md b/readme.md index e270a95..e772b45 100644 --- a/readme.md +++ b/readme.md @@ -304,4 +304,12 @@ Number of unique antibiotic classes: 32 | STR | 1 | | TZB | 1 | -Number of unique antibiotic classes: 42 \ No newline at end of file +Number of unique antibiotic classes: 42 + +# Breakdown of hit types + +Total number of hits: 60814 +Total # of perfect hits: 30867. This is 50.8% of the total hits. +Total # of strict hits: 15768. This is 25.9% of the total hits. +Total # of loose hits: 12911. This is 21.2% of the total hits. +Total # of manual curation genes: 1268. This is 2.1% of the total hits. \ No newline at end of file