22import string
33import rdflib
44from openpyxl .worksheet import cell_range , worksheet
5+ from openpyxl import load_workbook
56from pathlib import Path
67
78def check_name (nm_to_chck ):
@@ -22,6 +23,11 @@ def check_name(nm_to_chck):
2223 for ltr in nm_to_chck :
2324 if ord (ltr ) == 32 :
2425 nm_to_chck = nm_to_chck .replace (ltr , "_" )
26+ elif ord (ltr ) == 45 :
27+ # Allow hyphens to be reinterpreted as underscores
28+ nm_to_chck = nm_to_chck .replace (ltr , "_" )
29+ elif ord (ltr ) == 46 :
30+ nm_to_chck = nm_to_chck .replace (ltr , "_" )
2531 elif ord (ltr ) > 122 or ord (ltr ) < 48 :
2632 # 122 is the highest decimal code number
2733 # for common latin ltrs or arabic numbers
@@ -124,7 +130,7 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
124130 LAST_VARIANT_ROW = 35
125131
126132 print (f'Loading workbook "{ excel_file } "' )
127- work_book = openpyxl . load_workbook (excel_file , data_only = True )
133+ work_book = load_workbook (excel_file , data_only = True )
128134 sheet = work_book [VARIANTS_SHEET ]
129135
130136 # First, get the library name
@@ -133,7 +139,7 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
133139
134140 # Then get the base sequence
135141 print ('Extracting base sequence' )
136- first_aa_column = get_column_number (FIRST_AMINO_ACID_COLUMN )
142+ first_aa_column = col_to_num (FIRST_AMINO_ACID_COLUMN )
137143 last_aa_column = row_ends (sheet , ORIGINAL_AMINO_ACID_ROW , first_aa_column )
138144 # Get row from sheet and concatenate it into a string
139145 row_iterator = sheet .iter_rows (min_row = ORIGINAL_AMINO_ACID_ROW , max_row = ORIGINAL_AMINO_ACID_ROW ,
0 commit comments