Skip to content

Commit e27027f

Browse files
committed
skip matchms matrix method (for now)
1 parent de95f77 commit e27027f

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

ms2query/metrics.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,29 @@ def generalized_tanimoto_similarity_matrix_weighted(references: np.ndarray, quer
459459
for j in range(size2):
460460
scores[i, j] = generalized_tanimoto_similarity_weighted(references[i, :], queries[j, :], weights)
461461
return scores
462+
463+
464+
def compute_cosine_greedy(cosine_obj, spectra):
465+
# This is only a replacement of the matchme method until that will allow disabling tqdm
466+
n_rows = n_cols = len(spectra)
467+
468+
idx_row = []
469+
idx_col = []
470+
scores = []
471+
# Wrap the outer loop with tqdm to track progress
472+
for i_ref, reference in enumerate(spectra[:n_rows]):
473+
for i_query, query in enumerate(spectra[i_ref:n_cols], start=i_ref):
474+
score = cosine_obj.pair(reference, query)
475+
if cosine_obj.keep_score(score):
476+
idx_row += [i_ref, i_query]
477+
idx_col += [i_query, i_ref]
478+
scores += [score, score]
479+
480+
idx_row = np.array(idx_row, dtype=np.int_)
481+
idx_col = np.array(idx_col, dtype=np.int_)
482+
scores_data = np.array(scores, dtype=cosine_obj.score_datatype)
483+
# TODO: make StackedSparseArray the default and add fixed function to output different formats (with code below)
484+
485+
scores_array = np.zeros(shape=(n_rows, n_cols), dtype=self.score_datatype)
486+
scores_array[idx_row, idx_col] = scores_data.reshape(-1)
487+
return scores_array

ms2query/spectral_processing/merging_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from scipy.sparse.csgraph import connected_components
44
from matchms import Spectrum
55

6+
from ms2query.metrics import compute_cosine_greedy
7+
68

79
METADATA_FIELDS_FROM_FIRST = [
810
"ionmode", "smiles", "inchikey", "inchi", "name", "precursor_mz",
@@ -211,7 +213,8 @@ def cluster_block(spectra, sim_score, threshold=0.95):
211213
Spectra with similarity >= threshold will be merged.
212214
"""
213215
# similarity
214-
sim = sim_score.matrix(spectra, spectra, is_symmetric=True)
216+
#sim = sim_score.matrix(spectra, spectra, is_symmetric=True)
217+
sim = compute_cosine_greedy(sim_score, spectra)
215218
S = sim["score"]
216219

217220
# Graph by threshold on upper triangle

0 commit comments

Comments
 (0)