Skip to content

Refactor Pathway Management to Use pathlib #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/config_files/ES.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
exp_name: "ES_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 100
standardize: True

Expand All @@ -11,4 +11,4 @@ generations: 100
algorithm: "ES"
elitist: True

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
6 changes: 3 additions & 3 deletions examples/config_files/GA.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
exp_name: "GA_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_size: 100
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 11
standardize: True

max_rules_by_iter: 22949
Expand All @@ -10,4 +10,4 @@ tolerance: 0.1
generations: 100
algorithm: "GA"

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
4 changes: 2 additions & 2 deletions examples/config_files/IBEA.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
exp_name: "IBEA_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 100
standardize: True

Expand All @@ -11,4 +11,4 @@ generations: 100
algorithm: "IBEA"
kappa: 1.0

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
22 changes: 0 additions & 22 deletions examples/config_files/LS.yaml

This file was deleted.

8 changes: 4 additions & 4 deletions examples/config_files/NSGAII.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
exp_name: "NSGAII_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_size: 100
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 11
standardize: True

max_rules_by_iter: 22949
tolerance: 0.1

generations: 100
generations: 10
algorithm: "NSGAII"

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
4 changes: 2 additions & 2 deletions examples/config_files/NSGAIII.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
exp_name: "NSGAIII_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 100
standardize: True

Expand All @@ -10,4 +10,4 @@ tolerance: 0.1
generations: 100
algorithm: "NSGAIII"

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
4 changes: 2 additions & 2 deletions examples/config_files/SA.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
exp_name: "SA_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 100
standardize: True

Expand All @@ -13,4 +13,4 @@ temperature: 1.0
minimum_temperature: 0.000001
alpha: 0.95

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
4 changes: 2 additions & 2 deletions examples/config_files/SPEA2.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
exp_name: "SPEA2_EXAMPLE_CONFIG"

init_pop_path: ".../.../path_to_seed_compounds.tsv"
init_pop_path: "data/seed_compounds.csv"
init_pop_size: 100
standardize: True

Expand All @@ -10,4 +10,4 @@ tolerance: 0.1
generations: 100
algorithm: "SPEA2"

output_path: ".../output_dir_path/"
output_path: "output_dir_path/"
12 changes: 12 additions & 0 deletions examples/data/seed_compounds.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
compound_id smiles
id_0 N=C(O)NC(O)C(=O)O
id_1 Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OC(=O)c2cccc(O)c2O)C(O)C1O
id_2 CC1(O)OCC(O)C1(O)O
id_3 CC1(O)OCC(O)C1=O
id_4 CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(O)=NCCC(O)=NCCSC(=O)CC(O)CCC(=O)O
id_5 CC(C)(O)C(O)C(=O)O
id_6 CCC(C)(O)C(O)C(=O)O
id_7 N=C(N)N=C(O)c1nc(Cl)c(N2CCCCCC2)nc1N
id_8 O=C(O)C(O)CO
id_9 CC(C)(CO)C(O)C(=O)O
id_10 CC(C)(CO)C(O)C(O)=NCCC(=O)O
143 changes: 143 additions & 0 deletions examples/example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-12T12:51:01.579361Z",
"start_time": "2025-02-12T12:51:00.188Z"
}
},
"source": [
"from rdkit.Chem.QED import qed\n",
"from reactea import evaluation_functions_wrapper\n",
"\n",
"# EVALUATION FUNCTIONS\n",
"\n",
"# evaluation function returning the number of rings a molecule\n",
"def number_of_rings(mol):\n",
" ri = mol.GetRingInfo()\n",
" n_rings = len(ri.AtomRings())\n",
" return n_rings\n",
"\n",
"n_rigs_feval = evaluation_functions_wrapper(number_of_rings,\n",
" maximize=False,\n",
" worst_fitness=100,\n",
" name='n_rings')\n",
"\n",
"# evaluation function returning the drug-likeliness score (QED) of a molecule\n",
"def qed_score(mol):\n",
" return qed(mol)\n",
"\n",
"qed_feval = evaluation_functions_wrapper(qed_score,\n",
" maximize=True,\n",
" worst_fitness=0.0,\n",
" name='qed')\n",
"\n",
"# CASE STUDY\n",
"\n",
"from reactea import case_study_wrapper\n",
"\n",
"# SINGLE OBJECTIVE CASE STUDY\n",
"# case study to optimize a single objective `f1` (minimize number of rings in a molecule)\n",
"minimize_rings = case_study_wrapper(n_rigs_feval,\n",
" multi_objective=False,\n",
" name='minimize_rings')\n",
"\n",
"# SINGLE-OBJECTIVE CASE STUDY WITH MULTIPLE EVALUATION FUNCTIONS\n",
"# case study to optimize a single objective but with multiple evaluation functions `f1` and `f2` (minimize number of rings in a molecule and maximize qed)\n",
"# the number of evaluation functions must be the same as the number of values in weights and the sum of the weights must be 1\n",
"minimize_rings_maximize_qed = case_study_wrapper([n_rigs_feval, qed_feval],\n",
" multi_objective=False,\n",
" name='minimize_rings_maximize_qed',\n",
" weights=[0.3, 0.7])\n",
"\n",
"# MULTI-OBJECTIVE CASE STUDY\n",
"# case study to optimize multiple objectives simultaneous\n",
"minimize_rings_maximize_qed_mo = case_study_wrapper([n_rigs_feval, qed_feval],\n",
" multi_objective=True,\n",
" name='minimize_rings_maximize_qed_mo')"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[12:51:00] Initializing Normalizer\n"
]
}
],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-12T12:51:33.381067Z",
"start_time": "2025-02-12T12:51:01.642195Z"
}
},
"cell_type": "code",
"source": [
"from reactea import run_reactea\n",
"\n",
"case_study_rings = minimize_rings_maximize_qed_mo\n",
"# provide path to configuration file and case study\n",
"run_reactea(configs_path = 'config_files/NSGAII.yaml',\n",
" case_study = case_study_rings)"
],
"id": "e217633e149b67e5",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running NSGAII\n",
"Eval(s)| Worst Best Median Average Std Dev| Worst Best Median Average Std Dev|\n",
" 11| 4.000000 0.000000 0.000000 1.000000 1.348400| 0.041206 0.508241 0.427593 0.370435 0.139558|\n",
" 22| 2.000000 0.000000 0.000000 0.545455 0.782030| 0.301591 0.662767 0.456096 0.440253 0.098033|\n",
" 33| 3.000000 0.000000 0.000000 0.727273 1.052349| 0.396890 0.788971 0.501522 0.530570 0.110155|\n",
" 44| 3.000000 0.000000 0.000000 0.818182 1.113404| 0.456096 0.788971 0.571526 0.564990 0.096966|\n",
" 55| 3.000000 0.000000 0.000000 0.727273 1.052349| 0.461845 0.788971 0.571526 0.580778 0.085874|\n",
" 66| 3.000000 0.000000 2.000000 1.454545 1.372697| 0.571526 0.788971 0.591504 0.628655 0.068347|\n",
" 77| 3.000000 0.000000 1.000000 1.363636 1.226431| 0.571526 0.788971 0.638008 0.649290 0.065461|\n",
" 88| 3.000000 0.000000 1.000000 1.363636 1.226431| 0.571526 0.788971 0.638008 0.649290 0.065461|\n",
" 99| 3.000000 0.000000 1.000000 1.454545 1.157084| 0.579890 0.788971 0.671357 0.664836 0.062069|\n",
" 110| 3.000000 0.000000 2.000000 1.636364 1.226431| 0.579890 0.788971 0.679245 0.677558 0.058601|\n"
]
}
],
"execution_count": 2
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "fdcbdfb72eec84c0"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ rdkit-pypi==2022.03.1
numpy==1.21.5
pandas==1.3.5
cytoolz==0.11.2
jmetalpy
jmetalpy==1.5.5
PyYAML==6.0
matplotlib==3.5.1
chembl_structure_pipeline
Expand Down
13 changes: 7 additions & 6 deletions src/reactea/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import warnings
from pathlib import Path
from typing import Union

from rdkit import RDLogger
Expand All @@ -9,8 +9,6 @@
from reactea.optimization.jmetal.ea import ChemicalEA
from reactea.wrappers import case_study_wrapper, evaluation_functions_wrapper

ROOT_DIR = os.path.dirname(__file__)


def run_reactea(configs_path: Union[str, dict],
case_study: CaseStudy,
Expand All @@ -22,13 +20,16 @@ def run_reactea(configs_path: Union[str, dict],
if ignore_warnings:
warnings.filterwarnings("ignore")

if isinstance(configs_path, str) and os.path.exists(configs_path):
configs_path = Path(configs_path)
if configs_path.exists():
configs = Loaders.get_config_from_yaml(configs_path)
else:
elif isinstance(configs_path, dict):
configs = configs_path
else:
raise FileNotFoundError(f"Config file {configs_path} not found.")

# set up output folder
output_folder = os.path.join(configs['output_dir'], configs['algorithm'])
output_folder = Path(configs['output_dir']) / configs['algorithm']
configs['output_dir'] = output_folder

# initialize population and initialize population smiles
Expand Down
3 changes: 0 additions & 3 deletions src/reactea/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import time
from datetime import datetime

Expand All @@ -10,8 +9,6 @@
from reactea.io_streams import Loaders, Writers
from reactea.optimization.jmetal.ea import ChemicalEA

DATA_FILES = os.path.dirname(__file__)


def setup_configuration_file(args):
# create dictionary from parser.parse_args()
Expand Down
2 changes: 1 addition & 1 deletion src/reactea/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from reactea.chem import ChEMBLStandardizer
from reactea.optimization.comparators import ParetoDominanceComparator
from reactea.optimization.jmetal.operators import ReactorPseudoCrossover, ReactorMutation
from reactea.optimization.jmetal.terminators import StoppingByEvaluationsOrImprovement, StoppingByEvaluations
from reactea.optimization.jmetal.terminators import StoppingByEvaluations


class ExperimentConstants:
Expand Down
Loading