Skip to content

change files of 1_import_file #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions streamlit_app/FAIR_MS_Library_Editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,21 @@
st.markdown("## Datasets")
if 'datasets' not in st.session_state or st.session_state['datasets'] == {}:
st.warning("Please upload a file to begin!")
if 'selected_sheets' not in st.session_state or st.session_state['selected_sheets'] == {}:
st.warning("Please select a dataset to begin!")
#if 'selected_sheets' not in st.session_state or st.session_state['selected_sheets'] == {}:
#st.warning("Please select a dataset to begin!")
# with st.spinner("Loading..."):
# time.sleep(5)
# st.success("Done!")
if 'datasets' in st.session_state and st.session_state['datasets'] != {}:
for key in st.session_state['selected_sheets']:
with st.expander(key):
datasets = st.session_state['datasets']
rowsMetricColumn, columnsMetricColumn = st.columns(2)
with rowsMetricColumn:
st.metric('Rows', datasets[key].shape[0])
with columnsMetricColumn:
st.metric('Columns', datasets[key].shape[1])
if 'df_spectra' in st.session_state:

df_spectra = st.session_state['df_spectra']
st.metric('Detected how many spectra', len(df_spectra))
# if st.button("Edit", key=key):
# selected_sheet = key
# if key in datasets_metadata:
# st.write(datasets_metadata[key].keys())
# st.write(datasets[dataset_key])
# st.json(datasets)

if 'spectra' in st.session_state:
spectra = st.session_state["spectra"]
9 changes: 9 additions & 0 deletions streamlit_app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ Use python venv to use defined dependencies.
python -m venv venv
source venv/bin/activate


### windows
1. cd C:/Users/rfm848
2. python -m venv venv
3. .\venv\Scripts\activate.bat
4. Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
5. .\venv\Scripts\activate.ps1


to activate the virtual environment.

You can then use the provided requirements.txt to populate the required dependencies in your virtual environment.
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,4 @@ def user_rerun_repair(self, spectrum_id, rerun: bool):
self.modifications[spectrum_id] = self.spectrum_repairer.process_spectrum_store_modifications(self.spectra[spectrum_id])
self.failed_requirements[spectrum_id] = self.spectrum_validator.process_spectrum_store_failed_filters(self.spectra[spectrum_id])



Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ def test_user_metadata_change():
library_handler.user_metadata_change(spectrum_id=spectrum_id, field_name="smiles", user_input="CCC")
assert library_handler.spectra[spectrum_id].get("smiles") == "CCC"


def test_return_():
library_handler = LibraryHandler("./examples/test_case_correct.mgf")
spectrum_id = 0
modifications, failed_requirements, metadata = library_handler.return_user_validation_info(spectrum_id=spectrum_id)
# todo add checks that tests that the expected output is given.
print(metadata)
print(metadata)
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
METADATA_FIELDS_OF_INTEREST = ["parent_mass", "precursor_mz", "adduct", "smiles",
"compound_name", "inchi", "inchikey", "charge", "ionmode"]


class Modification:
def __init__(self, metadata_field, before, after, logging_message, validated_by_user):
self.metadata_field = metadata_field
Expand Down
86 changes: 51 additions & 35 deletions streamlit_app/pages/1_File_Import.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from library_spectra_validation.library_handler import LibraryHandler
import os

import streamlit as st
import pandas as pd
from matchms.importing import load_from_mgf
from tempfile import NamedTemporaryFile


st.set_page_config(
layout="wide",
page_title="File Import - FAIR MS Library Curation Editor",
page_title="File Import (.mgf) - FAIR MS Library Curation Editor",
#page_icon="assets/favicon.ico",
menu_items={
'Get Help': 'https://github.com/mzmine/biohack23_p15',
Expand All @@ -12,10 +18,14 @@
}
)

st.markdown("## File Import")
st.markdown("Please select an Excel file to upload. The file should contain one or more sheets. Each sheet should contain sample columns, detailing factors of each individual sample (rows). Lipid identities are the column headers of the non-sample columns, quantities should be reported in the cells.")
st.markdown("## File Import (.mgf)")
st.markdown("Please select an mgf to upload.")

uploaded_file = st.file_uploader("Choose a file", type = ".mgf")
st.set_option('deprecation.showfileUploaderEncoding', False)



uploaded_file = st.file_uploader("Choose a file", )
if uploaded_file is not None:
print(uploaded_file)
st.session_state['uploaded_file'] = uploaded_file
Expand All @@ -29,35 +39,41 @@
datasets = st.session_state['datasets']
else:
st.session_state['datasets'] = datasets

mgf_file = os.path.join(st.session_state['working_dir'], uploaded_file.name)

with open(file=mgf_file, mode="wb") as f:
f.write(uploaded_file.getbuffer())

# load spectra from mgf, TODO: replace with SpectralLibrary implementation and import
# we will receive a list of spectra metadata (each being a dataframe)
# TODO: we can retrieve each spectrum from the SpectralLibrary object
# TODO: display forward and backward buttons to page through spectra

lib_handler = LibraryHandler(f.name)

spectra_temp = load_from_mgf(f.name)
spectra = list(spectra_temp)
df_spectra = pd.DataFrame({"spectrum": spectra})

xl = pd.ExcelFile(uploaded_file)
sheets = xl.sheet_names
for sheet in sheets:
if sheet not in datasets:
df = pd.read_excel(uploaded_file, sheet_name=sheet)
datasets[sheet] = df

st.markdown("## Preview Sheets")
sheet_selector = st.selectbox(
"Select a sheet",
sheets
)
if sheet_selector is not None and sheet_selector in datasets:
rowsMetricColumn, columnsMetricColumn = st.columns(2)
with rowsMetricColumn:
st.metric('Rows', datasets[sheet_selector].shape[0])
with columnsMetricColumn:
st.metric('Columns', datasets[sheet_selector].shape[1])
st.write(datasets[sheet_selector])

st.markdown("## Select Sheets as Datasets")
selected_sheets = st.multiselect(
'Each selected sheet will be converted to a dataset',
sheets,
sheets
)
st.session_state['datasets'] = datasets
st.session_state['selected_sheets'] = selected_sheets

if 'datasets' not in st.session_state:
st.session_state['datasets'] = []
# make dataframe for metadata
def extract_metadata(df, keys):
for key in keys:
df[key] = df["spectrum"].apply(lambda x: x.get(key))


extract_metadata(df_spectra, df_spectra["spectrum"][0].metadata.keys())

st.markdown("## Preview Information")

st.metric('Detected how many spectra', len(df_spectra))

st.data_editor(df_spectra)# , on_change=user_metadata_change())


st.session_state['spectra'] = spectra
st.session_state['df_spectra'] = df_spectra
st.session_state['len_spectra'] = len(df_spectra)

if 'df_spectra' not in st.session_state:
st.session_state['df_spectra'] = []
58 changes: 58 additions & 0 deletions streamlit_app/pages/2_Plot_spectrum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from matchms.plotting.spectrum_plots import plot_spectra_mirror, plot_spectrum
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pubchempy
from rdkit import Chem
from rdkit.Chem import Draw




st.set_page_config(
layout="wide",
page_title="Library Export - FAIR MS Library Curation Editor",
#page_icon="assets/favicon.ico",
menu_items={
'Get Help': 'https://github.com/mzmine/biohack23_p15',
'Report a bug': "https://github.com/mzmine/biohack23_p15/issues/new/choose",
'About': "# This is the creation and curation wizard for FAIR MS Libraries."
}
)


st.markdown("## Plot selected MS2 spectrum")
st.markdown("Please select a spectrum based on compound_name")


spectra = st.session_state['spectra']
df_spectra = st.session_state['df_spectra']

cmp_list = df_spectra["compound_name"].tolist()

st.markdown("## Select a compound name")
cmp_selector = st.selectbox(
"select a compound name",
cmp_list
)

if cmp_selector in cmp_list:
cmp_id = cmp_list.index(cmp_selector)
cmp_smile = df_spectra.loc[cmp_id]["smiles"]


plt_spectrum = spectra[cmp_id]

fig, axs = plt.subplots(1, 2, figsize=(12.8, 4.2), gridspec_kw={'width_ratios': [2, 5]}, sharey=False)
cmp_img = Chem.Draw.MolToImage(Chem.MolFromSmiles(cmp_smile), ax=axs[0])

axs[0].grid(False)
axs[0].tick_params(axis='both', bottom=False, labelbottom=False, left=False, labelleft=False)
axs[0].set_title(cmp_smile)
axs[0].imshow(cmp_img)
axs[0].axis("off")

plot_spectrum(plt_spectrum, axs[1])

st.pyplot(fig)
26 changes: 24 additions & 2 deletions streamlit_app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,63 @@ cachetools==5.3.2
certifi==2023.7.22
charset-normalizer==3.3.1
click==8.1.7
contourpy==1.1.1
cycler==0.12.1
Deprecated==1.2.14
fonttools==4.43.1
gitdb==4.0.11
GitPython==3.1.40
idna==3.4
importlib-metadata==6.8.0
importlib-resources==6.1.0
Jinja2==3.1.2
jsonschema==4.19.2
jsonschema-specifications==2023.7.1
kiwisolver==1.4.5
llvmlite==0.40.1
lxml==4.9.3
markdown-it-py==3.0.0
MarkupSafe==2.1.3
matchms==0.23.1
matplotlib==3.8.1
mdurl==0.1.2
numpy==1.26.1
networkx==3.2.1
numba==0.57.1
numpy==1.24.4
packaging==23.2
pandas==2.1.2
pickydict==0.4.0
Pillow==10.1.0
protobuf==4.24.4
PubChemPy==1.0.4
pyarrow==13.0.0
pydeck==0.8.1b0
Pygments==2.16.1
pyparsing==3.1.1
pyteomics==4.6.2
python-dateutil==2.8.2
pytz==2023.3.post1
PyYAML==6.0.1
rdkit==2023.9.1
referencing==0.30.2
requests==2.31.0
rich==13.6.0
rpds-py==0.10.6
scipy==1.10.1
six==1.16.0
smmap==5.0.1
sparsestack==0.4.1
streamlit==1.28.0
tenacity==8.2.3
toml==0.10.2
toolz==0.12.0
tornado==6.3.3
tqdm==4.66.1
typing_extensions==4.8.0
tzdata==2023.3
tzlocal==5.2
urllib3==2.0.7
validators==0.22.0
watchdog==3.0.0
zipp==3.17.0
wrapt==1.15.0
zipp==3.17.0