Skip to content

Commit 34ebede

Browse files
authored
Merge pull request #198 from iomega/fix_zenodo_version
Change the default additional metadata to retention_time
2 parents 2facf21 + bf24e62 commit 34ebede

File tree

11 files changed

+45
-50
lines changed

11 files changed

+45
-50
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## 1.2.2
9+
### fixed
10+
- Set version of matchmsextras to 0.4.0, to fix dependency issue
11+
- Fix test with wrong sklearn version.
12+
13+
### Changed
14+
- Set default additional metadata from rtinseconds to retention_time
15+
816
## 1.2.1
917
### fixed
1018
- Fix bug in downloading models from command line

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ optional arguments:
114114
--download This will download the most up to date model and library.The model will be stored in the folder given as the second argumentThe model will be downloaded in the in the ionization mode specified under --mode
115115
--results RESULTS The folder in which the results should be stored. The default is a new results folder in the folder with the spectra
116116
--filter_ionmode Filter out all spectra that are not in the specified ion-mode. The ion mode can be specified by using --ionmode
117-
--addional_metadata Return additional metadata columns in the results, for example --additional_metadata rtinseconds feature_id
117+
--addional_metadata Return additional metadata columns in the results, for example --additional_metadata retention_time feature_id
118118
```
119119

120120
## Build MS2Query into other tools

ms2query/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ def command_line():
5151
help="Filter out all spectra that are not in the specified ion-mode. "
5252
"The ion mode can be specified by using --ionmode")
5353
parser.add_argument("--additional_metadata", action="store",
54-
default=("rtinseconds", "feature_id",),
54+
default=("retention_time", "feature_id",),
5555
nargs="+",
5656
type=str,
57-
help="Return additional metadata columns in the results, for example --additional_metadata rtinseconds feature_id")
57+
help="Return additional metadata columns in the results, for example --additional_metadata retention_time feature_id")
5858
args = parser.parse_args()
5959
ms2query_library_files_directory = args.library
6060
ms2_spectra_location = args.spectra

ms2query/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.2.1'
1+
__version__ = '1.2.2'

ms2query/create_new_library/train_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import os
77
from spec2vec.model_building import train_new_word2vec_model
88
from ms2query.create_new_library.train_ms2deepscore import train_ms2deepscore_wrapper
9-
from ms2query.create_new_library.train_ms2query_model import train_ms2query_model
9+
from ms2query.create_new_library.train_ms2query_model import train_ms2query_model, convert_to_onnx_model
1010
from ms2query.create_new_library.library_files_creator import LibraryFilesCreator
11-
from ms2query.utils import load_matchms_spectrum_objects_from_file, convert_to_onnx_model
11+
from ms2query.utils import load_matchms_spectrum_objects_from_file
1212
from ms2query.clean_and_filter_spectra import create_spectrum_documents, clean_normalize_and_split_annotated_spectra
1313

1414

ms2query/create_new_library/train_ms2query_model.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import os
77
from typing import List
88
import pandas as pd
9+
from onnxconverter_common import FloatTensorType
10+
from skl2onnx import convert_sklearn
911
from tqdm import tqdm
1012
from matchms import Spectrum
1113
from sklearn.ensemble import RandomForestRegressor
@@ -15,7 +17,7 @@
1517
from ms2query.create_new_library.library_files_creator import LibraryFilesCreator
1618
from ms2query.create_new_library.split_data_for_training import split_spectra_on_inchikeys, split_training_and_validation_spectra
1719
from ms2query.create_new_library.calculate_tanimoto_scores import calculate_tanimoto_scores_from_smiles
18-
from ms2query.utils import save_pickled_file
20+
from ms2query.utils import save_pickled_file, return_non_existing_file_name
1921

2022

2123
class DataCollectorForTraining():
@@ -142,3 +144,17 @@ def train_ms2query_model(training_spectra,
142144
# Train MS2Query model
143145
ms2query_model = train_random_forest(training_scores, training_labels)
144146
return ms2query_model
147+
148+
149+
def convert_to_onnx_model(random_forest_model, file_name = None):
150+
"""The randomforest model is stored as an onnx model for backwards compatability"""
151+
FloatTensorType([None, 5])
152+
onnx = convert_sklearn(random_forest_model, initial_types=[("input",
153+
FloatTensorType([None, random_forest_model.n_features_in_]))],
154+
target_opset=12)
155+
if file_name is not None:
156+
file_name = return_non_existing_file_name(file_name)
157+
158+
with open(file_name, "wb") as file:
159+
file.write(onnx.SerializeToString())
160+
return onnx

ms2query/utils.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
from matchms import importing
77
from spec2vec.Spec2Vec import Spectrum
8-
from skl2onnx import convert_sklearn
9-
from skl2onnx.common.data_types import FloatTensorType
108
from onnxruntime import InferenceSession
119

1210

@@ -216,20 +214,6 @@ def __init__(self,
216214
self.filter_on_ion_mode = filter_on_ion_mode
217215

218216

219-
def convert_to_onnx_model(random_forest_model, file_name = None):
220-
"""The randomforest model is stored as an onnx model for backwards compatability"""
221-
FloatTensorType([None, 5])
222-
onnx = convert_sklearn(random_forest_model, initial_types=[("input",
223-
FloatTensorType([None, random_forest_model.n_features_in_]))],
224-
target_opset=12)
225-
if file_name is not None:
226-
file_name = return_non_existing_file_name(file_name)
227-
228-
with open(file_name, "wb") as file:
229-
file.write(onnx.SerializeToString())
230-
return onnx
231-
232-
233217
def predict_onnx_model(random_forest_onnx_model: InferenceSession, input_values):
234218
"""Makes predictions for an onnx model"""
235219
# input_name = random_forest_onnx_model.get_inputs()[0].name

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
"ms2deepscore",
4141
"gensim>=4.0.0",
4242
"pandas>=1.2.5,<2.0.0",
43-
"matchmsextras>=0.4.0",
43+
"matchmsextras==0.4.0",
4444
"pubchempy", #This is a dependency for matchmsextras, which is missing in setup
4545
"tqdm",
4646
"matplotlib",
-1.02 MB
Binary file not shown.

tests/test_train_ms2query_model.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import os
2+
3+
import numpy as np
24
import pytest
35
import sys
46
import pandas as pd
57
from ms2query.create_new_library.train_ms2query_model import \
6-
DataCollectorForTraining, calculate_tanimoto_scores_with_library, train_random_forest, train_ms2query_model
7-
from ms2query.utils import load_pickled_file, load_matchms_spectrum_objects_from_file, convert_to_onnx_model
8+
DataCollectorForTraining, calculate_tanimoto_scores_with_library, train_random_forest, train_ms2query_model, \
9+
convert_to_onnx_model
10+
from ms2query.utils import load_pickled_file, load_matchms_spectrum_objects_from_file, load_ms2query_model, \
11+
predict_onnx_model
812
from onnxruntime import InferenceSession
913
from ms2query.utils import predict_onnx_model
1014
from ms2query.ms2library import MS2Library
@@ -76,15 +80,19 @@ def test_calculate_all_tanimoto_scores(tmp_path, ms2library, query_spectra):
7680
pd.testing.assert_frame_equal(result, expected_result, check_dtype=False)
7781

7882

79-
def test_train_random_forest():
83+
def test_train_and_save_random_forest():
8084
training_scores, training_labels = load_pickled_file(os.path.join(
8185
os.path.split(os.path.dirname(__file__))[0],
8286
"tests/test_files/test_files_train_ms2query_nn",
8387
"expected_train_and_val_data.pickle"))[:2]
8488
ms2query_model = train_random_forest(training_scores, training_labels)
8589
onnx_model = convert_to_onnx_model(ms2query_model)
8690
onnx_model_session = InferenceSession(onnx_model.SerializeToString())
87-
predictions = predict_onnx_model(onnx_model_session, training_scores.values)
91+
predictions_onnx_model = predict_onnx_model(onnx_model_session, training_scores.values)
92+
93+
# check if saving onnx model works
94+
predictions_sklearn_model = ms2query_model.predict(training_scores.values.astype(np.float32))
95+
assert np.allclose(predictions_onnx_model, predictions_sklearn_model)
8896

8997

9098
@pytest.mark.integration

0 commit comments

Comments
 (0)