-
Notifications
You must be signed in to change notification settings - Fork 12
Open
Description
Not sure if this is indeed intended.
But I just had a workflow breaking because one smiles could not be converted to a fingerprint.
ArgumentError Traceback (most recent call last)
Cell In[6], line 6
1 library_creator = LibraryFilesCreator(spectrums,
2 output_directory=r"D:\summer_school_copenhagen_2023\ms2query_library2023",
3 #ms2ds_model_file_name=ms2ds_model_file_name,
4 #s2v_model_file_name=s2v_model_file_name,
5 )
----> 6 library_creator.create_all_library_files()
File d:\ms2query\ms2query\create_new_library\library_files_creator.py:114, in LibraryFilesCreator.create_all_library_files(self)
111 def create_all_library_files(self):
112 """Creates files with embeddings and a sqlite file with spectra data
113 """
--> 114 self.create_sqlite_file()
115 self.store_s2v_embeddings()
116 self.store_ms2ds_embeddings()
File d:\ms2query\ms2query\create_new_library\library_files_creator.py:124, in LibraryFilesCreator.create_sqlite_file(self)
122 else:
123 compound_classes_df = None
--> 124 make_sqlfile_wrapper(
125 self.sqlite_file_name,
126 self.list_of_spectra,
127 columns_dict={"precursor_mz": "REAL"},
128 compound_classes=compound_classes_df,
129 progress_bars=self.progress_bars,
130 )
File d:\ms2query\ms2query\create_new_library\create_sqlite_database.py:53, in make_sqlfile_wrapper(sqlite_file_name, list_of_spectra, columns_dict, compound_classes, progress_bars)
49 initialize_tables(sqlite_file_name, additional_metadata_columns_dict=columns_dict,
50 additional_inchikey_columns=additional_inchikey_columns)
51 fill_spectrum_data_table(sqlite_file_name, list_of_spectra, progress_bar=progress_bars)
---> 53 fill_inchikeys_table(sqlite_file_name, list_of_spectra,
54 compound_classes=compound_classes,
55 progress_bars=progress_bars)
File d:\ms2query\ms2query\create_new_library\create_sqlite_database.py:204, in fill_inchikeys_table(sqlite_file_name, list_of_spectra, compound_classes, progress_bars)
201 conn = sqlite3.connect(sqlite_file_name)
202 cur = conn.cursor()
--> 204 closest_related_inchikey14s = calculate_highest_tanimoto_score(list_of_spectra, list_of_spectra, 10)
206 # Fill table
207 for inchikey14 in tqdm(spectra_belonging_to_inchikey14,
208 desc="Adding inchikey14s to sqlite table",
209 disable=not progress_bars):
File d:\ms2query\ms2query\create_new_library\calculate_tanimoto_scores.py:92, in calculate_highest_tanimoto_score(query_spectra, library_spectra, nr_of_top_inchikeys)
88 def calculate_highest_tanimoto_score(query_spectra,
89 library_spectra,
90 nr_of_top_inchikeys):
91 """Returns the highest scoring library spectra in """
---> 92 tanimoto_scores_df = calculate_tanimoto_scores_unique_inchikey(query_spectra, library_spectra)
93 unique_query_inchikeys = list(tanimoto_scores_df.index)
94 highest_score_dict = {}
File d:\ms2query\ms2query\create_new_library\calculate_tanimoto_scores.py:54, in calculate_tanimoto_scores_unique_inchikey(list_of_spectra_1, list_of_spectra_2)
51 list_of_smiles_1 = [spectrum.get("smiles") for spectrum in spectra_with_most_frequent_inchi_per_inchikey_1]
52 list_of_smiles_2 = [spectrum.get("smiles") for spectrum in spectra_with_most_frequent_inchi_per_inchikey_2]
---> 54 tanimoto_scores = calculate_tanimoto_scores_from_smiles(list_of_smiles_1, list_of_smiles_2)
55 tanimoto_df = pd.DataFrame(tanimoto_scores, index=unique_inchikeys_1, columns=unique_inchikeys_2)
56 return tanimoto_df
File d:\ms2query\ms2query\create_new_library\calculate_tanimoto_scores.py:27, in calculate_tanimoto_scores_from_smiles(list_of_smiles_1, list_of_smiles_2)
24 def calculate_tanimoto_scores_from_smiles(list_of_smiles_1: List[str],
25 list_of_smiles_2: List[str]) -> np.ndarray:
26 """Returns a 2d ndarray containing the tanimoto scores between the smiles"""
---> 27 fingerprints_1 = np.array([get_fingerprint(spectrum) for spectrum in tqdm(list_of_smiles_1,
28 desc="Calculating fingerprints")])
29 fingerprints_2 = np.array([get_fingerprint(spectrum) for spectrum in tqdm(list_of_smiles_2,
30 desc="Calculating fingerprints")])
31 print("Calculating tanimoto scores")
File d:\ms2query\ms2query\create_new_library\calculate_tanimoto_scores.py:27, in <listcomp>(.0)
24 def calculate_tanimoto_scores_from_smiles(list_of_smiles_1: List[str],
25 list_of_smiles_2: List[str]) -> np.ndarray:
26 """Returns a 2d ndarray containing the tanimoto scores between the smiles"""
---> 27 fingerprints_1 = np.array([get_fingerprint(spectrum) for spectrum in tqdm(list_of_smiles_1,
28 desc="Calculating fingerprints")])
29 fingerprints_2 = np.array([get_fingerprint(spectrum) for spectrum in tqdm(list_of_smiles_2,
30 desc="Calculating fingerprints")])
31 print("Calculating tanimoto scores")
File d:\ms2query\ms2query\create_new_library\calculate_tanimoto_scores.py:18, in get_fingerprint(smiles)
17 def get_fingerprint(smiles: str):
---> 18 fingerprint = np.array(Chem.RDKFingerprint(Chem.MolFromSmiles(smiles), fpSize=2048))
19 assert isinstance(fingerprint, np.ndarray), \
20 f"Fingerprint for 1 spectrum could not be set smiles is {smiles}"
21 return fingerprint
ArgumentError: Python argument types in
rdkit.Chem.rdmolops.RDKFingerprint(NoneType)
did not match C++ signature:
RDKFingerprint(class RDKit::ROMol mol, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, unsigned int nBitsPerHash=2, bool useHs=True, double tgtDensity=0.0, unsigned int minSize=128, bool branchedPaths=True, bool useBondOrder=True, class boost::python::api::object atomInvariants=0, class boost::python::api::object fromAtoms=0, class boost::python::api::object atomBits=None, class boost::python::api::object bitInfo=None)
Metadata
Metadata
Assignees
Labels
No labels