|
10 | 10 | import pandas as pd
|
11 | 11 | import numpy as np
|
12 | 12 | import warnings
|
| 13 | +import urllib |
13 | 14 |
|
14 | 15 |
|
15 | 16 | def get_classyfire_taxonomy(feature_data: pd.DataFrame) -> pd.DataFrame:
|
@@ -65,21 +66,26 @@ def get_classyfire_taxonomy(feature_data: pd.DataFrame) -> pd.DataFrame:
|
65 | 66 | for idx in feature_data.index:
|
66 | 67 | smiles = feature_data.loc[idx, 'smiles']
|
67 | 68 | if pd.notna(smiles):
|
68 |
| - url_smiles = 'https://gnps-structure.ucsd.edu/inchikey?smiles=' |
69 |
| - response = requests.get(url_smiles+smiles) |
| 69 | + to_inchikey = 'https://gnps-structure.ucsd.edu/inchikey?smiles=' |
| 70 | + urlencoded_smiles = urllib.parse.quote(smiles) |
| 71 | + response = requests.get(to_inchikey+urlencoded_smiles) |
70 | 72 | if response.status_code != 200:
|
71 | 73 | classyfire[idx] = 'SMILE parse error'
|
72 | 74 | no_inchikey.append((idx, smiles))
|
73 | 75 | continue
|
74 | 76 | inchikey = response.text
|
75 |
| - url_inchi = 'https://gnps-classyfire.ucsd.edu/entities/' |
76 |
| - response = requests.get(url_inchi+str(inchikey)+'.json') |
| 77 | + to_classyfire = 'https://gnps-classyfire.ucsd.edu/entities/' |
| 78 | + response = requests.get(to_classyfire+str(inchikey)+'.json') |
77 | 79 | if response.status_code == 200:
|
78 | 80 | response = response.json()
|
| 81 | + classyfire_levels_sub = [level for level in classyfire_levels |
| 82 | + if level in response.keys()] |
| 83 | + if len(classyfire_levels_sub) == 0: |
| 84 | + continue |
79 | 85 | taxonomy = [response[level]['name']
|
80 | 86 | if bool(response) and response[level] is not None
|
81 | 87 | else 'unclassified'
|
82 |
| - for level in classyfire_levels] |
| 88 | + for level in classyfire_levels_sub] |
83 | 89 | classyfire[idx] = taxonomy
|
84 | 90 | elif response.status_code == 404:
|
85 | 91 | classyfire[idx] = 'unclassified'
|
|
0 commit comments