Skip to content

Commit a3ed83d

Browse files
authored
Merge pull request #233 from iomega/update_matchms_version
Update matchms version
2 parents 66c4af2 + 8febebe commit a3ed83d

30 files changed

+157
-396
lines changed

.github/workflows/CI_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
fail-fast: false
4747
matrix:
4848
os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
49-
python-version: ['3.7', '3.8']
49+
python-version: ['3.8', '3.9']
5050
exclude:
5151
# already tested in first_check job
5252
- python-version: 3.8

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## unpublished
8+
## 1.3.0
9+
### Changed
10+
- New models have to be downloaded, since this version is not compatible with the older models! Embeddings have to be stored as parquet.
11+
- Embeddings are now stored by parquet instead of pickle
12+
- Made MS2Query compatible with matchms 0.24.0
13+
## 1.2.4
914
### Added
1015
- environment.yml and CI_build test fur building a conda env from this file
1116
### fixed

environment.yml

Lines changed: 16 additions & 225 deletions
Original file line numberDiff line numberDiff line change
@@ -4,228 +4,19 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- _libgcc_mutex=0.1=conda_forge
8-
- _openmp_mutex=4.5=2_gnu
9-
- abseil-cpp=20210324.2=h9c3ff4c_0
10-
- absl-py=1.4.0=pyhd8ed1ab_0
11-
- aiohttp=3.8.5=py38h01eb140_0
12-
- aiosignal=1.3.1=pyhd8ed1ab_0
13-
- astunparse=1.6.3=pyhd8ed1ab_0
14-
- async-timeout=4.0.3=pyhd8ed1ab_0
15-
- attrs=23.1.0=pyh71513ae_1
16-
- blinker=1.6.2=pyhd8ed1ab_0
17-
- boost=1.78.0=py38h4e30db6_4
18-
- boost-cpp=1.78.0=h5adbc97_2
19-
- brotli=1.0.9=h166bdaf_9
20-
- brotli-bin=1.0.9=h166bdaf_9
21-
- brotli-python=1.0.9=py38hfa26641_9
22-
- bzip2=1.0.8=h7f98852_4
23-
- c-ares=1.19.1=hd590300_0
24-
- ca-certificates=2023.7.22=hbcca054_0
25-
- cached-property=1.5.2=hd8ed1ab_1
26-
- cached_property=1.5.2=pyha770c72_1
27-
- cachetools=5.3.1=pyhd8ed1ab_0
28-
- cairo=1.16.0=ha61ee94_1014
29-
- certifi=2023.7.22=pyhd8ed1ab_0
30-
- cffi=1.15.1=py38h4a40e3a_3
31-
- charset-normalizer=3.2.0=pyhd8ed1ab_0
32-
- click=8.1.7=unix_pyh707e725_0
33-
- colorama=0.4.6=pyhd8ed1ab_0
34-
- coloredlogs=15.0.1=pyhd8ed1ab_3
35-
- contourpy=1.1.0=py38h7f3f72f_0
36-
- cryptography=39.0.0=py38h1724139_0
37-
- cycler=0.11.0=pyhd8ed1ab_0
38-
- deprecated=1.2.14=pyh1a96a4e_0
39-
- exceptiongroup=1.1.3=pyhd8ed1ab_0
40-
- expat=2.5.0=hcb278e6_1
41-
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
42-
- font-ttf-inconsolata=3.000=h77eed37_0
43-
- font-ttf-source-code-pro=2.038=h77eed37_0
44-
- font-ttf-ubuntu=0.83=hab24e00_0
45-
- fontconfig=2.14.2=h14ed4e7_0
46-
- fonts-conda-ecosystem=1=0
47-
- fonts-conda-forge=1=0
48-
- fonttools=4.42.1=py38h01eb140_0
49-
- freetype=2.12.1=hca18f0e_1
50-
- freetype-py=2.3.0=pyhd8ed1ab_0
51-
- frozenlist=1.4.0=py38h01eb140_0
52-
- fst-pso=1.8.1=pyhd8ed1ab_0
53-
- fuzzytm=2.0.5=pyhd8ed1ab_0
54-
- gast=0.5.4=pyhd8ed1ab_0
55-
- gensim=4.3.2=py38h53bb729_0
56-
- gettext=0.21.1=h27087fc_0
57-
- giflib=5.2.1=h0b41bf4_3
58-
- gmp=6.2.1=h58526e2_0
59-
- gmpy2=2.1.2=py38h793c122_1
60-
- google-auth=2.17.3=pyh1a96a4e_0
61-
- google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
62-
- google-pasta=0.2.0=pyh8c360ce_0
63-
- greenlet=2.0.2=py38h17151c0_1
64-
- grpc-cpp=1.45.2=h9d3bbbb_5
65-
- grpcio=1.45.0=py38ha0cdfde_0
66-
- h5py=3.9.0=nompi_py38h89e2d6c_100
67-
- hdf5=1.14.0=nompi_h5231ba7_103
68-
- humanfriendly=10.0=py38h578d9bd_4
69-
- icu=70.1=h27087fc_0
70-
- idna=3.4=pyhd8ed1ab_0
71-
- importlib-metadata=6.8.0=pyha770c72_0
72-
- importlib-resources=6.0.1=pyhd8ed1ab_0
73-
- importlib_resources=6.0.1=pyhd8ed1ab_0
74-
- iniconfig=2.0.0=pyhd8ed1ab_0
75-
- joblib=1.3.2=pyhd8ed1ab_0
76-
- jpeg=9e=h0b41bf4_3
77-
- keras=2.8.0=pyhd8ed1ab_0
78-
- keras-preprocessing=1.1.2=pyhd8ed1ab_0
79-
- keyutils=1.6.1=h166bdaf_0
80-
- kiwisolver=1.4.5=py38h7f3f72f_0
81-
- krb5=1.20.1=hf9c8cef_0
82-
- lcms2=2.15=hfd0df8a_0
83-
- ld_impl_linux-64=2.40=h41732ed_0
84-
- lerc=4.0.0=h27087fc_0
85-
- libaec=1.0.6=hcb278e6_1
86-
- libblas=3.9.0=17_linux64_openblas
87-
- libbrotlicommon=1.0.9=h166bdaf_9
88-
- libbrotlidec=1.0.9=h166bdaf_9
89-
- libbrotlienc=1.0.9=h166bdaf_9
90-
- libcblas=3.9.0=17_linux64_openblas
91-
- libcurl=7.87.0=h6312ad2_0
92-
- libdeflate=1.17=h0b41bf4_0
93-
- libedit=3.1.20191231=he28a2e2_2
94-
- libev=4.33=h516909a_1
95-
- libexpat=2.5.0=hcb278e6_1
96-
- libffi=3.4.2=h7f98852_5
97-
- libgcc-ng=13.1.0=he5830b7_0
98-
- libgfortran-ng=13.1.0=h69a702a_0
99-
- libgfortran5=13.1.0=h15d22d2_0
100-
- libglib=2.76.4=hebfc3b9_0
101-
- libgomp=13.1.0=he5830b7_0
102-
- libiconv=1.17=h166bdaf_0
103-
- liblapack=3.9.0=17_linux64_openblas
104-
- libllvm14=14.0.6=hcd5def8_4
105-
- libnghttp2=1.51.0=hdcd2b5c_0
106-
- libnsl=2.0.0=h7f98852_0
107-
- libopenblas=0.3.23=pthreads_h80387f5_0
108-
- libpng=1.6.39=h753d276_0
109-
- libprotobuf=3.20.3=h3eb15da_0
110-
- libsqlite=3.43.0=h2797004_0
111-
- libssh2=1.10.0=haa6b8db_3
112-
- libstdcxx-ng=13.1.0=hfd8a6a1_0
113-
- libtiff=4.5.0=h6adf6a1_2
114-
- libuuid=2.38.1=h0b41bf4_0
115-
- libwebp-base=1.3.1=hd590300_0
116-
- libxcb=1.13=h7f98852_1004
117-
- libxml2=2.10.3=hca2bb57_4
118-
- libxslt=1.1.37=h873f0b0_0
119-
- libzlib=1.2.13=hd590300_5
120-
- llvmlite=0.40.1=py38h94a1851_0
121-
- lxml=4.9.2=py38h215a2d7_0
122-
- markdown=3.4.4=pyhd8ed1ab_0
123-
- markupsafe=2.1.3=py38h01eb140_0
124-
- matchms=0.17.0=pyh7cba7a3_0
125-
- matchmsextras=0.4.1=pyhdfd78af_0
126-
- matplotlib-base=3.7.2=py38hf5b0b65_0
127-
- miniful=0.0.6=pyhd8ed1ab_0
128-
- mpc=1.3.1=hfe3b2da_0
129-
- mpfr=4.2.0=hb012696_0
130-
- mpmath=1.3.0=pyhd8ed1ab_0
131-
- ms2deepscore=0.4.0=pyhdfd78af_0
132-
- ms2query=1.2.2=pyhdfd78af_0
133-
- multidict=6.0.4=py38h1de0b5d_0
134-
- munkres=1.1.4=pyh9f0ad1d_0
135-
- ncurses=6.4=hcb278e6_0
136-
- networkx=3.1=pyhd8ed1ab_0
137-
- numba=0.57.1=py38hd559b08_0
138-
- numpy=1.24.4=py38h59b608b_0
139-
- oauthlib=3.2.2=pyhd8ed1ab_0
140-
- onnx=1.12.0=py38h8d49f1e_0
141-
- onnxconverter-common=1.13.0=pyhd8ed1ab_0
142-
- onnxruntime=1.15.1=py38h896e202_2_cpu
143-
- openjpeg=2.5.0=hfec8fc6_2
144-
- openssl=1.1.1v=hd590300_0
145-
- opt_einsum=3.3.0=pyhd8ed1ab_1
146-
- packaging=23.1=pyhd8ed1ab_0
147-
- pandas=1.5.3=py38hdc8b05c_1
148-
- pcre2=10.40=hc3806b6_0
149-
- pickydict=0.4.0=pyhd8ed1ab_0
150-
- pillow=9.4.0=py38hde6dc18_1
151-
- pip=23.2.1=pyhd8ed1ab_0
152-
- pixman=0.40.0=h36c2ea0_0
153-
- platformdirs=3.10.0=pyhd8ed1ab_0
154-
- pluggy=1.3.0=pyhd8ed1ab_0
155-
- pooch=1.7.0=pyha770c72_3
156-
- protobuf=3.20.3=py38h8dc9893_1
157-
- pthread-stubs=0.4=h36c2ea0_1001
158-
- pubchempy=1.0.4=py_0
159-
- pyasn1=0.4.8=py_0
160-
- pyasn1-modules=0.2.7=py_0
161-
- pycairo=1.24.0=py38h1a1917b_0
162-
- pycparser=2.21=pyhd8ed1ab_0
163-
- pyfume=0.2.25=pyhd8ed1ab_0
164-
- pyjwt=2.8.0=pyhd8ed1ab_0
165-
- pyopenssl=23.2.0=pyhd8ed1ab_1
166-
- pyparsing=3.0.9=pyhd8ed1ab_0
167-
- pysocks=1.7.1=pyha2e5f31_6
168-
- pyteomics=4.6=pyh7cba7a3_0
169-
- pytest=7.4.0=pyhd8ed1ab_0
170-
- python=3.8.15=h257c98d_0_cpython
171-
- python-dateutil=2.8.2=pyhd8ed1ab_0
172-
- python-flatbuffers=23.5.26=pyhd8ed1ab_0
173-
- python-louvain=0.16=pyhd8ed1ab_0
174-
- python_abi=3.8=3_cp38
175-
- pytz=2023.3=pyhd8ed1ab_0
176-
- pyu2f=0.1.5=pyhd8ed1ab_0
177-
- rdkit=2023.03.3=py38h36d2b2f_0
178-
- re2=2022.06.01=h27087fc_1
179-
- readline=8.2=h8228510_1
180-
- reportlab=4.0.4=py38h01eb140_0
181-
- requests=2.31.0=pyhd8ed1ab_0
182-
- requests-oauthlib=1.3.1=pyhd8ed1ab_0
183-
- rlpycairo=0.2.0=pyhd8ed1ab_0
184-
- rsa=4.9=pyhd8ed1ab_0
185-
- scikit-learn=1.3.0=py38hc099248_0
186-
- scipy=1.10.1=py38h59b608b_3
187-
- setuptools=68.1.2=pyhd8ed1ab_0
188-
- simpful=2.11.0=pyhd8ed1ab_0
189-
- six=1.16.0=pyh6c4a22f_0
190-
- skl2onnx=1.15.0=pyhd8ed1ab_0
191-
- smart_open=6.3.0=pyhd8ed1ab_1
192-
- snappy=1.1.10=h9fff704_0
193-
- spec2vec=0.8.0=pyhdfd78af_0
194-
- sqlalchemy=2.0.20=py38h01eb140_0
195-
- sqlite=3.43.0=h2c6b66d_0
196-
- sympy=1.12=pypyh9d50eac_103
197-
- tensorboard=2.8.0=pyhd8ed1ab_1
198-
- tensorboard-data-server=0.6.1=py38h2b5fc30_4
199-
- tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
200-
- tensorflow=2.8.1=cpu_py38h66f0ec1_0
201-
- tensorflow-base=2.8.1=cpu_py38hc7a75a0_0
202-
- tensorflow-estimator=2.8.1=cpu_py38h4e23bc6_0
203-
- termcolor=2.3.0=pyhd8ed1ab_0
204-
- threadpoolctl=3.2.0=pyha21a80b_0
205-
- tk=8.6.12=h27826a3_0
206-
- tomli=2.0.1=pyhd8ed1ab_0
207-
- tqdm=4.66.1=pyhd8ed1ab_0
208-
- typing-extensions=4.7.1=hd8ed1ab_0
209-
- typing_extensions=4.7.1=pyha770c72_0
210-
- unicodedata2=15.0.0=py38h0a891b7_0
211-
- urllib3=2.0.4=pyhd8ed1ab_0
212-
- werkzeug=2.3.7=pyhd8ed1ab_0
213-
- wheel=0.41.2=pyhd8ed1ab_0
214-
- wrapt=1.15.0=py38h1de0b5d_0
215-
- xorg-kbproto=1.0.7=h7f98852_1002
216-
- xorg-libice=1.1.1=hd590300_0
217-
- xorg-libsm=1.2.4=h7391055_0
218-
- xorg-libx11=1.8.4=h0b41bf4_0
219-
- xorg-libxau=1.0.11=hd590300_0
220-
- xorg-libxdmcp=1.1.3=h7f98852_0
221-
- xorg-libxext=1.3.4=h0b41bf4_2
222-
- xorg-libxrender=0.9.10=h7f98852_1003
223-
- xorg-renderproto=0.11.1=h7f98852_1002
224-
- xorg-xextproto=7.3.0=h0b41bf4_1003
225-
- xorg-xproto=7.0.31=h7f98852_1007
226-
- xz=5.2.6=h166bdaf_0
227-
- yarl=1.9.2=py38h01eb140_0
228-
- zip
229-
- zipp=3.16.2=pyhd8ed1ab_0
230-
- zlib=1.2.13=hd590300_5
231-
- zstd=1.5.5=hfc55251_0
7+
- python=3.8.18
8+
- matchms=0.24.1
9+
- numpy=1.24.4
10+
- spec2vec=0.8.0
11+
- h5py=3.9.0
12+
- pyarrow=12.0.1
13+
- tensorflow=2.12.1
14+
- scikit-learn=1.3.2
15+
- ms2deepscore=0.4.0
16+
- pandas=2.0.3
17+
- matplotlib=3.7.3
18+
- skl2onnx=1.16.0
19+
- onnxruntime=1.16.3
20+
- pytest=7.4.0
21+
- pytest-cov=4.1.0
22+
- zip

ms2query/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.2.4'
1+
__version__ = '1.3.0'

ms2query/benchmarking/collect_test_data_results.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,22 @@ def get_modified_cosine_score_results(lib_spectra,
157157
else:
158158
selected_lib_spectra = lib_spectra
159159
if len(selected_lib_spectra) != 0:
160-
scores_list = calculate_scores(selected_lib_spectra,
161-
[test_spectrum], ModifiedCosine()).scores_by_query(test_spectrum)
162-
# Scores list is a List[spectrum, (mod_cos, matching_peaks)
163-
cosine_scores = [scores_tuple[1]["score"] for scores_tuple in scores_list]
164-
highest_cosine_score = float(max(cosine_scores))
165-
highest_scoring_spectrum = scores_list[cosine_scores.index(highest_cosine_score)][0]
160+
scores = calculate_scores(references=selected_lib_spectra,
161+
queries=[test_spectrum],
162+
similarity_function=ModifiedCosine())
163+
# Matchms allows to get the best matches for any query using scores_by_query
164+
sorted_scores = scores.scores_by_query(test_spectrum, 'ModifiedCosine_score', sort=True)
165+
# Scores are not stored if the cosine score is 0 (no overlapping peaks).
166+
if len(sorted_scores) == 0:
167+
highest_scoring_spectrum = random.choice(selected_lib_spectra)
168+
highest_cosine_score = (0, 0)
169+
else:
170+
highest_scoring_spectrum, highest_cosine_score = sorted_scores[0]
166171

167172
tanimoto_score = calculate_single_tanimoto_score(test_spectrum.get("smiles"),
168173
highest_scoring_spectrum.get("smiles"))
169174
exact_match = highest_scoring_spectrum.get("inchikey")[:14] == test_spectrum.get("inchikey")[:14]
170-
best_matches_for_test_spectra.append((highest_cosine_score, tanimoto_score, exact_match))
175+
best_matches_for_test_spectra.append((highest_cosine_score[0], tanimoto_score, exact_match))
171176
else:
172177
best_matches_for_test_spectra.append(None)
173178
return best_matches_for_test_spectra
@@ -189,6 +194,7 @@ def get_cosines_score_results(lib_spectra,
189194
scores_list = calculate_scores(selected_lib_spectra,
190195
[test_spectrum],
191196
CosineGreedy(tolerance=fragment_mass_tolerance)).scores_by_query(test_spectrum)
197+
# todo This was build with old matchms in mind, this version will not find cosine scores = 0
192198
cosine_scores = [scores_tuple[1].item()[0] for scores_tuple in scores_list if scores_tuple[1].item()[1] >= minimum_matched_peaks]
193199
if len(cosine_scores) != 0:
194200
highest_cosine_score = max(cosine_scores)

ms2query/benchmarking/create_accuracy_vs_recall_plot.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import numpy as np
99
from matplotlib import pyplot as plt
1010
from tqdm import tqdm
11-
from ms2query.utils import load_json_file, load_pickled_file, save_pickled_file
11+
from ms2query.utils import (load_df_from_parquet_file, load_json_file,
12+
save_df_as_parquet_file)
1213

1314

1415
def plot_all_with_standard_deviation(means_and_standars_deviation,
@@ -231,10 +232,11 @@ def create_plot(exact_matches,
231232
if recalculate_means:
232233
dict_with_results = load_all_test_results(20, test_results_folder, exact_match=exact_matches)
233234
means_and_standard_deviation = calculate_all_means_and_standard_deviation(dict_with_results, exact_matches=exact_matches)
234-
save_pickled_file(means_and_standard_deviation,
235-
os.path.join(test_results_folder, f"means_and_standard_deviations_20_fold{extra_file_name}.pickle"))
235+
save_df_as_parquet_file(means_and_standard_deviation, os.path.join(test_results_folder,
236+
f"means_and_standard_deviations_20_fold{extra_file_name}.pickle"))
236237
else:
237-
means_and_standard_deviation = load_pickled_file(os.path.join(test_results_folder, f"means_and_standard_deviations_20_fold{extra_file_name}.pickle"))
238+
means_and_standard_deviation = load_df_from_parquet_file(
239+
os.path.join(test_results_folder, f"means_and_standard_deviations_20_fold{extra_file_name}.pickle"))
238240

239241
if exact_matches:
240242
optimal_results = means_and_standard_deviation["Optimal"]

0 commit comments

Comments
 (0)