Skip to content

Commit 60c312f

Browse files
anupriyatripathiElDeveloper
authored andcommitted
Classyfire updates (#92)
* deleting old test data * url encode smiles as there may be reserved characters * add an example smile with special characters * test if smile with spl. characters is classified * check taxonomy keys in classyfire response - produces key error otherwise * test if molecule was assigned to the correct kingdom
1 parent d97df7d commit 60c312f

File tree

3 files changed

+18
-9
lines changed

3 files changed

+18
-9
lines changed

q2_qemistree/_classyfire.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pandas as pd
1111
import numpy as np
1212
import warnings
13+
import urllib
1314

1415

1516
def get_classyfire_taxonomy(feature_data: pd.DataFrame) -> pd.DataFrame:
@@ -65,21 +66,26 @@ def get_classyfire_taxonomy(feature_data: pd.DataFrame) -> pd.DataFrame:
6566
for idx in feature_data.index:
6667
smiles = feature_data.loc[idx, 'smiles']
6768
if pd.notna(smiles):
68-
url_smiles = 'https://gnps-structure.ucsd.edu/inchikey?smiles='
69-
response = requests.get(url_smiles+smiles)
69+
to_inchikey = 'https://gnps-structure.ucsd.edu/inchikey?smiles='
70+
urlencoded_smiles = urllib.parse.quote(smiles)
71+
response = requests.get(to_inchikey+urlencoded_smiles)
7072
if response.status_code != 200:
7173
classyfire[idx] = 'SMILE parse error'
7274
no_inchikey.append((idx, smiles))
7375
continue
7476
inchikey = response.text
75-
url_inchi = 'https://gnps-classyfire.ucsd.edu/entities/'
76-
response = requests.get(url_inchi+str(inchikey)+'.json')
77+
to_classyfire = 'https://gnps-classyfire.ucsd.edu/entities/'
78+
response = requests.get(to_classyfire+str(inchikey)+'.json')
7779
if response.status_code == 200:
7880
response = response.json()
81+
classyfire_levels_sub = [level for level in classyfire_levels
82+
if level in response.keys()]
83+
if len(classyfire_levels_sub) == 0:
84+
continue
7985
taxonomy = [response[level]['name']
8086
if bool(response) and response[level] is not None
8187
else 'unclassified'
82-
for level in classyfire_levels]
88+
for level in classyfire_levels_sub]
8389
classyfire[idx] = taxonomy
8490
elif response.status_code == 404:
8591
classyfire[idx] = 'unclassified'
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
label #featureID csi_smiles ms2_smiles table_number
22
3638d9b9658b202f9c773e5e43206d32 3 1
3-
c8f6e296c0bacf58cfec06b333fd2015 7 CC(=N)C1=CN(C)C=N1 1
3+
c8f6e296c0bacf58cfec06b333fd2015 7 CCCCCCCCCCCCCCCC(=O)OCC(COP(=O)([O-])OCC[N+](C)(C)C)O 1
44
a500d26568f130313bdb70e3ae7d2ade 2 1

q2_qemistree/tests/test_classyfire.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,12 @@ def test_malformed_smiles(self):
5252
get_classyfire_taxonomy(self.mal_smiles)
5353

5454
def test_classyfire_output(self):
55-
classified_feature_data = get_classyfire_taxonomy(self.smiles)
56-
self.assertTrue((self.levels.issubset(set(
57-
classified_feature_data.columns))))
55+
classified = get_classyfire_taxonomy(self.smiles)
56+
classified_mols = classified[classified['kingdom'] != 'unclassified']
57+
self.assertTrue(len(classified_mols) == 1)
58+
self.assertTrue(classified_mols.loc[1,
59+
'kingdom'] == 'Organic compounds')
60+
self.assertTrue((self.levels.issubset(set(classified.columns))))
5861

5962

6063
if __name__ == '__main__':

0 commit comments

Comments
 (0)