|
11 | 11 | from tqdm import tqdm |
12 | 12 | from time import sleep |
13 | 13 |
|
| 14 | +from download_msv import _resolve_msv_usi |
| 15 | +from download_workbench import _resolve_metabolomicsworkbench_usi |
| 16 | + |
14 | 17 | def _get_usi_display_filename(usi): |
15 | 18 | usi_splits = usi.split(":") |
16 | 19 |
|
@@ -65,54 +68,6 @@ def _usi_to_local_filename(usi): |
65 | 68 |
|
66 | 69 |
|
67 | 70 |
|
68 | | - |
69 | | -def _resolve_msv_usi(usi, force_massive=False): |
70 | | - """ |
71 | | - |
72 | | -
|
73 | | - Args: |
74 | | - usi ([type]): [description] |
75 | | - force_massive (bool, optional): [description]. Defaults to False, we try to create the url given the USI, usually for non mzML/RAW files, e.g. CDF files |
76 | | -
|
77 | | - Returns: |
78 | | - [type]: [description] |
79 | | - """ |
80 | | - |
81 | | - usi_splits = usi.split(':') |
82 | | - |
83 | | - msv_usi = usi |
84 | | - if len(usi.split(":")) == 3: |
85 | | - msv_usi = "{}:scan:1".format(usi) |
86 | | - |
87 | | - lookup_url = f'https://massive.ucsd.edu/ProteoSAFe/QuerySpectrum?id={msv_usi}' |
88 | | - lookup_request = requests.get(lookup_url) |
89 | | - |
90 | | - try: |
91 | | - resolution_json = lookup_request.json() |
92 | | - |
93 | | - remote_path = None |
94 | | - |
95 | | - mzML_resolutions = [resolution for resolution in resolution_json["row_data"] if os.path.splitext(resolution["file_descriptor"])[1] == ".mzML"] |
96 | | - mzXML_resolutions = [resolution for resolution in resolution_json["row_data"] if os.path.splitext(resolution["file_descriptor"])[1] == ".mzXML"] |
97 | | - raw_resolutions = [resolution for resolution in resolution_json["row_data"] if os.path.splitext(resolution["file_descriptor"])[1].lower() == ".raw"] |
98 | | - |
99 | | - if len(mzML_resolutions) > 0: |
100 | | - remote_path = mzML_resolutions[0]["file_descriptor"] |
101 | | - elif len(mzXML_resolutions) > 0: |
102 | | - remote_path = mzXML_resolutions[0]["file_descriptor"] |
103 | | - elif len(raw_resolutions) > 0: |
104 | | - remote_path = raw_resolutions[0]["file_descriptor"] |
105 | | - |
106 | | - # Format into FTP link |
107 | | - remote_link = f"ftp://massive.ucsd.edu/{remote_path[2:]}" |
108 | | - except: |
109 | | - # We did not successfully look it up, this is the fallback try |
110 | | - if force_massive: |
111 | | - return f"ftp://massive.ucsd.edu/{usi_splits[1]}/{usi_splits[2]}" |
112 | | - raise |
113 | | - |
114 | | - return remote_link |
115 | | - |
116 | 71 | def _resolve_gnps_usi(usi): |
117 | 72 | usi_splits = usi.split(':') |
118 | 73 |
|
@@ -148,23 +103,7 @@ def _resolve_mtbls_usi(usi): |
148 | 103 |
|
149 | 104 | return remote_link |
150 | 105 |
|
151 | | -def _resolve_metabolomicsworkbench_usi(usi): |
152 | | - usi_splits = usi.split(':') |
153 | | - |
154 | | - # First looking |
155 | | - dataset_accession = usi_splits[1] |
156 | | - filename = usi_splits[2] |
157 | | - |
158 | | - # Query Accession |
159 | | - url = "https://massive.ucsd.edu/ProteoSAFe/QueryDatasets?task=N%2FA&file=&pageSize=30&offset=0&query=%257B%2522full_search_input%2522%253A%2522%2522%252C%2522table_sort_history%2522%253A%2522createdMillis_dsc%2522%252C%2522query%2522%253A%257B%257D%252C%2522title_input%2522%253A%2522{}%2522%257D&target=&_=1606254845533".format(dataset_accession) |
160 | | - r = requests.get(url) |
161 | | - data_json = r.json() |
162 | | - |
163 | | - msv_accession = data_json["row_data"][0]["dataset"] |
164 | | - |
165 | | - msv_usi = "mzspec:{}:{}:scan:1".format(msv_accession, filename) |
166 | 106 |
|
167 | | - return _resolve_msv_usi(msv_usi) |
168 | 107 |
|
169 | 108 | def _resolve_pxd_usi(usi): |
170 | 109 | usi_splits = usi.split(':') |
|
0 commit comments