Skip to content

Commit 6457b4c

Browse files
authored
Merge pull request #57 from BioSystemsUM/paths
Refactor Pathway Management to Use `pathlib`
2 parents 561f5b4 + aa55850 commit 6457b4c

24 files changed

+251
-121
lines changed

examples/config_files/ES.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
exp_name: "ES_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
3+
init_pop_path: "data/seed_compounds.csv"
44
init_pop_size: 100
55
standardize: True
66

@@ -11,4 +11,4 @@ generations: 100
1111
algorithm: "ES"
1212
elitist: True
1313

14-
output_path: ".../output_dir_path/"
14+
output_path: "output_dir_path/"

examples/config_files/GA.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
exp_name: "GA_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
4-
init_pop_size: 100
3+
init_pop_path: "data/seed_compounds.csv"
4+
init_pop_size: 11
55
standardize: True
66

77
max_rules_by_iter: 22949
@@ -10,4 +10,4 @@ tolerance: 0.1
1010
generations: 100
1111
algorithm: "GA"
1212

13-
output_path: ".../output_dir_path/"
13+
output_path: "output_dir_path/"

examples/config_files/IBEA.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
exp_name: "IBEA_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
3+
init_pop_path: "data/seed_compounds.csv"
44
init_pop_size: 100
55
standardize: True
66

@@ -11,4 +11,4 @@ generations: 100
1111
algorithm: "IBEA"
1212
kappa: 1.0
1313

14-
output_path: ".../output_dir_path/"
14+
output_path: "output_dir_path/"

examples/config_files/LS.yaml

-22
This file was deleted.

examples/config_files/NSGAII.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
exp_name: "NSGAII_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
4-
init_pop_size: 100
3+
init_pop_path: "data/seed_compounds.csv"
4+
init_pop_size: 11
55
standardize: True
66

77
max_rules_by_iter: 22949
88
tolerance: 0.1
99

10-
generations: 100
10+
generations: 10
1111
algorithm: "NSGAII"
1212

13-
output_path: ".../output_dir_path/"
13+
output_path: "output_dir_path/"

examples/config_files/NSGAIII.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
exp_name: "NSGAIII_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
3+
init_pop_path: "data/seed_compounds.csv"
44
init_pop_size: 100
55
standardize: True
66

@@ -10,4 +10,4 @@ tolerance: 0.1
1010
generations: 100
1111
algorithm: "NSGAIII"
1212

13-
output_path: ".../output_dir_path/"
13+
output_path: "output_dir_path/"

examples/config_files/SA.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
exp_name: "SA_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
3+
init_pop_path: "data/seed_compounds.csv"
44
init_pop_size: 100
55
standardize: True
66

@@ -13,4 +13,4 @@ temperature: 1.0
1313
minimum_temperature: 0.000001
1414
alpha: 0.95
1515

16-
output_path: ".../output_dir_path/"
16+
output_path: "output_dir_path/"

examples/config_files/SPEA2.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
exp_name: "SPEA2_EXAMPLE_CONFIG"
22

3-
init_pop_path: ".../.../path_to_seed_compounds.tsv"
3+
init_pop_path: "data/seed_compounds.csv"
44
init_pop_size: 100
55
standardize: True
66

@@ -10,4 +10,4 @@ tolerance: 0.1
1010
generations: 100
1111
algorithm: "SPEA2"
1212

13-
output_path: ".../output_dir_path/"
13+
output_path: "output_dir_path/"

examples/data/seed_compounds.csv

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
compound_id smiles
2+
id_0 N=C(O)NC(O)C(=O)O
3+
id_1 Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OC(=O)c2cccc(O)c2O)C(O)C1O
4+
id_2 CC1(O)OCC(O)C1(O)O
5+
id_3 CC1(O)OCC(O)C1=O
6+
id_4 CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(O)=NCCC(O)=NCCSC(=O)CC(O)CCC(=O)O
7+
id_5 CC(C)(O)C(O)C(=O)O
8+
id_6 CCC(C)(O)C(O)C(=O)O
9+
id_7 N=C(N)N=C(O)c1nc(Cl)c(N2CCCCCC2)nc1N
10+
id_8 O=C(O)C(O)CO
11+
id_9 CC(C)(CO)C(O)C(=O)O
12+
id_10 CC(C)(CO)C(O)C(O)=NCCC(=O)O

examples/example.ipynb

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"id": "initial_id",
6+
"metadata": {
7+
"collapsed": true,
8+
"ExecuteTime": {
9+
"end_time": "2025-02-12T12:51:01.579361Z",
10+
"start_time": "2025-02-12T12:51:00.188Z"
11+
}
12+
},
13+
"source": [
14+
"from rdkit.Chem.QED import qed\n",
15+
"from reactea import evaluation_functions_wrapper\n",
16+
"\n",
17+
"# EVALUATION FUNCTIONS\n",
18+
"\n",
19+
"# evaluation function returning the number of rings a molecule\n",
20+
"def number_of_rings(mol):\n",
21+
" ri = mol.GetRingInfo()\n",
22+
" n_rings = len(ri.AtomRings())\n",
23+
" return n_rings\n",
24+
"\n",
25+
"n_rigs_feval = evaluation_functions_wrapper(number_of_rings,\n",
26+
" maximize=False,\n",
27+
" worst_fitness=100,\n",
28+
" name='n_rings')\n",
29+
"\n",
30+
"# evaluation function returning the drug-likeliness score (QED) of a molecule\n",
31+
"def qed_score(mol):\n",
32+
" return qed(mol)\n",
33+
"\n",
34+
"qed_feval = evaluation_functions_wrapper(qed_score,\n",
35+
" maximize=True,\n",
36+
" worst_fitness=0.0,\n",
37+
" name='qed')\n",
38+
"\n",
39+
"# CASE STUDY\n",
40+
"\n",
41+
"from reactea import case_study_wrapper\n",
42+
"\n",
43+
"# SINGLE OBJECTIVE CASE STUDY\n",
44+
"# case study to optimize a single objective `f1` (minimize number of rings in a molecule)\n",
45+
"minimize_rings = case_study_wrapper(n_rigs_feval,\n",
46+
" multi_objective=False,\n",
47+
" name='minimize_rings')\n",
48+
"\n",
49+
"# SINGLE-OBJECTIVE CASE STUDY WITH MULTIPLE EVALUATION FUNCTIONS\n",
50+
"# case study to optimize a single objective but with multiple evaluation functions `f1` and `f2` (minimize number of rings in a molecule and maximize qed)\n",
51+
"# the number of evaluation functions must be the same as the number of values in weights and the sum of the weights must be 1\n",
52+
"minimize_rings_maximize_qed = case_study_wrapper([n_rigs_feval, qed_feval],\n",
53+
" multi_objective=False,\n",
54+
" name='minimize_rings_maximize_qed',\n",
55+
" weights=[0.3, 0.7])\n",
56+
"\n",
57+
"# MULTI-OBJECTIVE CASE STUDY\n",
58+
"# case study to optimize multiple objectives simultaneous\n",
59+
"minimize_rings_maximize_qed_mo = case_study_wrapper([n_rigs_feval, qed_feval],\n",
60+
" multi_objective=True,\n",
61+
" name='minimize_rings_maximize_qed_mo')"
62+
],
63+
"outputs": [
64+
{
65+
"name": "stderr",
66+
"output_type": "stream",
67+
"text": [
68+
"[12:51:00] Initializing Normalizer\n"
69+
]
70+
}
71+
],
72+
"execution_count": 1
73+
},
74+
{
75+
"metadata": {
76+
"ExecuteTime": {
77+
"end_time": "2025-02-12T12:51:33.381067Z",
78+
"start_time": "2025-02-12T12:51:01.642195Z"
79+
}
80+
},
81+
"cell_type": "code",
82+
"source": [
83+
"from reactea import run_reactea\n",
84+
"\n",
85+
"case_study_rings = minimize_rings_maximize_qed_mo\n",
86+
"# provide path to configuration file and case study\n",
87+
"run_reactea(configs_path = 'config_files/NSGAII.yaml',\n",
88+
" case_study = case_study_rings)"
89+
],
90+
"id": "e217633e149b67e5",
91+
"outputs": [
92+
{
93+
"name": "stdout",
94+
"output_type": "stream",
95+
"text": [
96+
"Running NSGAII\n",
97+
"Eval(s)| Worst Best Median Average Std Dev| Worst Best Median Average Std Dev|\n",
98+
" 11| 4.000000 0.000000 0.000000 1.000000 1.348400| 0.041206 0.508241 0.427593 0.370435 0.139558|\n",
99+
" 22| 2.000000 0.000000 0.000000 0.545455 0.782030| 0.301591 0.662767 0.456096 0.440253 0.098033|\n",
100+
" 33| 3.000000 0.000000 0.000000 0.727273 1.052349| 0.396890 0.788971 0.501522 0.530570 0.110155|\n",
101+
" 44| 3.000000 0.000000 0.000000 0.818182 1.113404| 0.456096 0.788971 0.571526 0.564990 0.096966|\n",
102+
" 55| 3.000000 0.000000 0.000000 0.727273 1.052349| 0.461845 0.788971 0.571526 0.580778 0.085874|\n",
103+
" 66| 3.000000 0.000000 2.000000 1.454545 1.372697| 0.571526 0.788971 0.591504 0.628655 0.068347|\n",
104+
" 77| 3.000000 0.000000 1.000000 1.363636 1.226431| 0.571526 0.788971 0.638008 0.649290 0.065461|\n",
105+
" 88| 3.000000 0.000000 1.000000 1.363636 1.226431| 0.571526 0.788971 0.638008 0.649290 0.065461|\n",
106+
" 99| 3.000000 0.000000 1.000000 1.454545 1.157084| 0.579890 0.788971 0.671357 0.664836 0.062069|\n",
107+
" 110| 3.000000 0.000000 2.000000 1.636364 1.226431| 0.579890 0.788971 0.679245 0.677558 0.058601|\n"
108+
]
109+
}
110+
],
111+
"execution_count": 2
112+
},
113+
{
114+
"metadata": {},
115+
"cell_type": "code",
116+
"outputs": [],
117+
"execution_count": null,
118+
"source": "",
119+
"id": "fdcbdfb72eec84c0"
120+
}
121+
],
122+
"metadata": {
123+
"kernelspec": {
124+
"display_name": "Python 3",
125+
"language": "python",
126+
"name": "python3"
127+
},
128+
"language_info": {
129+
"codemirror_mode": {
130+
"name": "ipython",
131+
"version": 2
132+
},
133+
"file_extension": ".py",
134+
"mimetype": "text/x-python",
135+
"name": "python",
136+
"nbconvert_exporter": "python",
137+
"pygments_lexer": "ipython2",
138+
"version": "2.7.6"
139+
}
140+
},
141+
"nbformat": 4,
142+
"nbformat_minor": 5
143+
}

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ rdkit-pypi==2022.03.1
22
numpy==1.21.5
33
pandas==1.3.5
44
cytoolz==0.11.2
5-
jmetalpy
5+
jmetalpy==1.5.5
66
PyYAML==6.0
77
matplotlib==3.5.1
88
chembl_structure_pipeline

src/reactea/__init__.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import os
21
import warnings
2+
from pathlib import Path
33
from typing import Union
44

55
from rdkit import RDLogger
@@ -9,8 +9,6 @@
99
from reactea.optimization.jmetal.ea import ChemicalEA
1010
from reactea.wrappers import case_study_wrapper, evaluation_functions_wrapper
1111

12-
ROOT_DIR = os.path.dirname(__file__)
13-
1412

1513
def run_reactea(configs_path: Union[str, dict],
1614
case_study: CaseStudy,
@@ -22,13 +20,16 @@ def run_reactea(configs_path: Union[str, dict],
2220
if ignore_warnings:
2321
warnings.filterwarnings("ignore")
2422

25-
if isinstance(configs_path, str) and os.path.exists(configs_path):
23+
configs_path = Path(configs_path)
24+
if configs_path.exists():
2625
configs = Loaders.get_config_from_yaml(configs_path)
27-
else:
26+
elif isinstance(configs_path, dict):
2827
configs = configs_path
28+
else:
29+
raise FileNotFoundError(f"Config file {configs_path} not found.")
2930

3031
# set up output folder
31-
output_folder = os.path.join(configs['output_dir'], configs['algorithm'])
32+
output_folder = Path(configs['output_dir']) / configs['algorithm']
3233
configs['output_dir'] = output_folder
3334

3435
# initialize population and initialize population smiles

src/reactea/cli.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
import time
32
from datetime import datetime
43

@@ -10,8 +9,6 @@
109
from reactea.io_streams import Loaders, Writers
1110
from reactea.optimization.jmetal.ea import ChemicalEA
1211

13-
DATA_FILES = os.path.dirname(__file__)
14-
1512

1613
def setup_configuration_file(args):
1714
# create dictionary from parser.parse_args()

src/reactea/constants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from reactea.chem import ChEMBLStandardizer
88
from reactea.optimization.comparators import ParetoDominanceComparator
99
from reactea.optimization.jmetal.operators import ReactorPseudoCrossover, ReactorMutation
10-
from reactea.optimization.jmetal.terminators import StoppingByEvaluationsOrImprovement, StoppingByEvaluations
10+
from reactea.optimization.jmetal.terminators import StoppingByEvaluations
1111

1212

1313
class ExperimentConstants:

0 commit comments

Comments
 (0)