From 94a7a52503ef7bd7d5720e29a7f2c66fe5b737a1 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 12:53:35 +0000 Subject: [PATCH 01/10] [ADD] use pathlib instead of os --- src/reactea/__init__.py | 13 +++--- src/reactea/cli.py | 3 -- src/reactea/io_streams/readers.py | 45 +++++++------------ src/reactea/io_streams/writers.py | 34 +++++++------- tests/__init__.py | 4 +- tests/base_test_cases.py | 26 +++++++---- .../algorithms/test_mo_algorithms.py | 7 +-- 7 files changed, 61 insertions(+), 71 deletions(-) diff --git a/src/reactea/__init__.py b/src/reactea/__init__.py index 953a2b7..2f951e2 100644 --- a/src/reactea/__init__.py +++ b/src/reactea/__init__.py @@ -1,5 +1,5 @@ -import os import warnings +from pathlib import Path from typing import Union from rdkit import RDLogger @@ -9,8 +9,6 @@ from reactea.optimization.jmetal.ea import ChemicalEA from reactea.wrappers import case_study_wrapper, evaluation_functions_wrapper -ROOT_DIR = os.path.dirname(__file__) - def run_reactea(configs_path: Union[str, dict], case_study: CaseStudy, @@ -22,13 +20,16 @@ def run_reactea(configs_path: Union[str, dict], if ignore_warnings: warnings.filterwarnings("ignore") - if isinstance(configs_path, str) and os.path.exists(configs_path): + configs_path = Path(configs_path) + if configs_path.exists(): configs = Loaders.get_config_from_yaml(configs_path) - else: + elif isinstance(configs_path, dict): configs = configs_path + else: + raise FileNotFoundError(f"Config file {configs_path} not found.") # set up output folder - output_folder = os.path.join(configs['output_dir'], configs['algorithm']) + output_folder = Path(configs['output_dir']) / configs['algorithm'] configs['output_dir'] = output_folder # initialize population and initialize population smiles diff --git a/src/reactea/cli.py b/src/reactea/cli.py index 0efd080..adf142d 100644 --- a/src/reactea/cli.py +++ b/src/reactea/cli.py @@ -1,4 +1,3 @@ -import os import time from datetime import datetime @@ -10,8 +9,6 @@ from reactea.io_streams import Loaders, Writers from reactea.optimization.jmetal.ea import ChemicalEA -DATA_FILES = os.path.dirname(__file__) - def setup_configuration_file(args): # create dictionary from parser.parse_args() diff --git a/src/reactea/io_streams/readers.py b/src/reactea/io_streams/readers.py index a0df2ad..de9e645 100644 --- a/src/reactea/io_streams/readers.py +++ b/src/reactea/io_streams/readers.py @@ -1,6 +1,6 @@ -import os import time from datetime import datetime +from pathlib import Path import yaml @@ -10,6 +10,9 @@ from reactea.constants import ChemConstants +DATA_FILES = Path(__file__).resolve().parent.parent / 'data' +DEEPSWEET_MOLDES = Path(__file__).parent.parent + class Loaders: """ @@ -17,34 +20,13 @@ class Loaders: """ @staticmethod - def from_root(file_path: str): - """ - Gets path of file from root. - - Parameters - ---------- - file_path: str - file path - - Returns - ------- - str: - file path from root - """ - from reactea import ROOT_DIR - - if file_path[0] == '/': - file_path = file_path[1:] - return f"{ROOT_DIR}/{file_path}" - - @staticmethod - def get_config_from_yaml(yaml_file: str): + def get_config_from_yaml(yaml_file: Path): """ Reads the configuration file. Parameters ---------- - yaml_file: str + yaml_file: Path path to yaml file Returns @@ -56,9 +38,12 @@ def get_config_from_yaml(yaml_file: str): config_dict = yaml.safe_load(config_file) config_dict['time'] = datetime.now().strftime('%m-%d_%H-%M-%S') config_dict['start_time'] = time.time() - config_dict['output_dir'] = f"{os.path.join(os.getcwd(), config_dict['output_path'])}" - config_dict['output_dir'] = f"{os.path.join(config_dict['output_dir'], config_dict['exp_name'])}" - config_dict['init_pop_path'] = f"{os.path.join(os.getcwd(), config_dict['init_pop_path'])}" + + output_path = Path(config_dict['output_path']).resolve() + config_dict['output_dir'] = output_path / config_dict['exp_name'] + + init_pop_path = Path(config_dict['init_pop_path']).resolve() + config_dict['init_pop_path'] = init_pop_path return config_dict @staticmethod @@ -97,7 +82,9 @@ def initialize_rules(): List[ReactionRule]: list of reaction rules to use """ - rules_df = pd.read_csv(Loaders.from_root('/data/reactionrules/reaction_rules_reactea.tsv.bz2'), + + path = DATA_FILES / 'reactionrules' / 'reaction_rules_reactea.tsv.bz2' + rules_df = pd.read_csv(path, header=0, sep='\t', compression='bz2') @@ -119,7 +106,7 @@ def load_deepsweet_ensemble(): except ImportError: raise ImportError("DeepSweet is not installed. Please install it to use this feature " "(https://github.com/BioSystemsUM/DeepSweet).") - models_folder_path = Loaders.from_root('/evaluation_models/deepsweet_models/') + models_folder_path = DEEPSWEET_MOLDES / 'evaluation_models' / 'deepsweet_models' list_of_models = [DeepSweetRF(models_folder_path, "2d", "SelectFromModelFS"), DeepSweetDNN(models_folder_path, "rdk", "all"), # it is necessary to insert the gpu number because it is a torch model and the device needs diff --git a/src/reactea/io_streams/writers.py b/src/reactea/io_streams/writers.py index 0ee1733..e3f64c0 100644 --- a/src/reactea/io_streams/writers.py +++ b/src/reactea/io_streams/writers.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path from typing import List import yaml @@ -7,8 +7,6 @@ from reactea.optimization.solution import ChemicalSolution -ROOT_DIR = os.path.dirname(__file__)[:-10] - class Writers: """ @@ -16,7 +14,7 @@ class Writers: """ @staticmethod - def set_up_folders(path: str): + def set_up_folders(path: Path): """ Creates folder to output results. @@ -25,8 +23,7 @@ def set_up_folders(path: str): path: str path to folder to create """ - if not os.path.exists(path): - os.makedirs(path) + path.mkdir(parents=True, exist_ok=True) @staticmethod def save_final_pop(final_pop: List[ChemicalSolution], configs: dict, feval_names: str): @@ -43,9 +40,9 @@ def save_final_pop(final_pop: List[ChemicalSolution], configs: dict, feval_names names of the evaluation functions """ # save all solutions - destFile = os.path.join(configs['output_dir'], f"FINAL_{configs['time']}.csv") + destFile = configs['output_dir'] / f"FINAL_{configs['time']}.csv" configs["final_population_path"] = destFile - with open(destFile, 'w') as f: + with destFile.open('w') as f: f.write("SMILES;" + feval_names + "\n") for i, solution in enumerate(final_pop): f.write(str(solution.variables.smiles) + ";" + @@ -54,8 +51,9 @@ def save_final_pop(final_pop: List[ChemicalSolution], configs: dict, feval_names # save unique solutions df = pd.read_csv(destFile, sep=';', header=0) df = df.drop_duplicates() - df.to_csv(destFile[:-4] + '_UNIQUE_SOLUTIONS.csv', index=False) - configs["final_population_unique_solutions_path"] = destFile[:-4] + '_UNIQUE_SOLUTIONS.csv' + unique_solutions_path = destFile.stem + '_UNIQUE_SOLUTIONS.csv' + df.to_csv(destFile.parent / unique_solutions_path, index=False) + configs["final_population_unique_solutions_path"] = destFile.parent / unique_solutions_path @staticmethod def save_intermediate_transformations(pop: List[ChemicalSolution], configs: dict): @@ -70,9 +68,9 @@ def save_intermediate_transformations(pop: List[ChemicalSolution], configs: dict configs: dict configurations of the experiment """ - destFile = os.path.join(f"{configs['output_dir']}", f"FINAL_TRANSFORMATIONS_{configs['time']}.csv") + destFile = Path(configs['output_dir']) / f"FINAL_TRANSFORMATIONS_{configs['time']}.csv" configs["transformations_path"] = destFile - with open(destFile, 'w') as f: + with destFile.open('w') as f: f.write(f"FINAL_SMILES;INTERMEDIATE_SMILES;RULE_IDS\n") for sol in pop: @@ -93,8 +91,8 @@ def save_configs(configs: dict): configs: dict configurations of the experiment """ - destFile = os.path.join(f"{configs['output_dir']}", f"configs.yaml") - with open(destFile, 'w') as outfile: + destFile = configs['output_dir'] / 'configs.yaml' + with destFile.open('w') as outfile: yaml.dump(configs, outfile) @staticmethod @@ -114,9 +112,7 @@ def update_operators_logs(configs: dict, solution: ChemicalSolution, mutant: str rule_id: str reaction rule id """ - destFile = os.path.join(f"{configs['output_dir']}", f"ReactionMutationLogs.txt") - objectives = [] - for obj in solution.objectives: - objectives.append(str(round(obj, 3)*-1)) - with open(destFile, 'a+') as log: + destFile = configs['output_dir'] / 'ReactionMutationLogs.txt' + objectives = [str(round(obj, 3) * -1) for obj in solution.objectives] + with destFile.open('a+') as log: log.write(f"{solution.variables.smiles},{mutant},{rule_id},{','.join(objectives)}\n") diff --git a/tests/__init__.py b/tests/__init__.py index 2923b7b..cd31d23 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1,3 @@ -import os +from pathlib import Path -TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DIR = Path(__file__).resolve().parent diff --git a/tests/base_test_cases.py b/tests/base_test_cases.py index 53223d7..540b3f0 100644 --- a/tests/base_test_cases.py +++ b/tests/base_test_cases.py @@ -1,6 +1,6 @@ -import os import shutil from abc import ABC, abstractmethod +from pathlib import Path from rdkit import RDLogger @@ -15,13 +15,17 @@ def setUp(self): # Mute RDKit logs RDLogger.DisableLog("rdApp.*") - config_path = os.path.join(TEST_DIR, 'configs/base_config.yaml') + config_path = TEST_DIR / 'configs' / 'base_config.yaml' self.configs = Loaders.get_config_from_yaml(config_path) - self.output_folder = f"{TEST_DIR}/outputs/{self.configs['exp_name']}/" + self.output_folder = TEST_DIR / 'outputs' / self.configs['exp_name'] + self.configs['output_dir'] = self.output_folder + init_pop_path = TEST_DIR / 'data' / 'compounds' / 'compounds_sample.tsv' + self.configs['init_pop_path'] = init_pop_path.as_posix() def tearDown(self): - if os.path.exists(self.output_folder): - shutil.rmtree(self.output_folder) + output_folder_path = Path(self.output_folder) + if output_folder_path.exists(): + shutil.rmtree(output_folder_path) @abstractmethod def test_case_study(self): @@ -34,13 +38,17 @@ def setUp(self): # Mute RDKit logs RDLogger.DisableLog("rdApp.*") - config_path = os.path.join(TEST_DIR, 'configs/base_config.yaml') + config_path = TEST_DIR / 'configs' / 'base_config.yaml' self.configs = Loaders.get_config_from_yaml(config_path) - self.output_folder = f"{TEST_DIR}/outputs/{self.configs['exp_name']}/" + self.output_folder = TEST_DIR / 'outputs' / self.configs['exp_name'] + self.configs['output_dir'] = self.output_folder + init_pop_path = TEST_DIR / 'data' / 'compounds' / 'compounds_sample.tsv' + self.configs['init_pop_path'] = init_pop_path def tearDown(self): - if os.path.exists(self.output_folder): - shutil.rmtree(self.output_folder) + output_folder_path = Path(self.output_folder) + if output_folder_path.exists(): + shutil.rmtree(output_folder_path) @abstractmethod def test_algorithms(self): diff --git a/tests/integration_tests/algorithms/test_mo_algorithms.py b/tests/integration_tests/algorithms/test_mo_algorithms.py index 846125a..dbfe8a7 100644 --- a/tests/integration_tests/algorithms/test_mo_algorithms.py +++ b/tests/integration_tests/algorithms/test_mo_algorithms.py @@ -1,11 +1,12 @@ -import os from unittest import TestCase -from base_test_cases import AlgorithmsBaseTestCase + from reactea.case_studies.compound_quality import CompoundQuality from reactea.io_streams import Loaders, Writers from reactea.optimization.jmetal.ea import ChemicalEA +from tests.base_test_cases import AlgorithmsBaseTestCase + class TestMOAlgorithms(AlgorithmsBaseTestCase, TestCase): @@ -14,7 +15,7 @@ def run_algorithm(self, algorithm): self.configs['algorithm'] = algorithm # set up output folder - self.output_folder = os.path.join(self.output_folder, algorithm) + self.output_folder = self.output_folder / algorithm self.configs['output_dir'] = self.output_folder # define number of molecules to use to only 1 in the case of RandomSearch From da9055e3ae658788e2242a1e30ac95d16005a585 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 12:53:48 +0000 Subject: [PATCH 02/10] [REMOVE] unused imports --- src/reactea/constants.py | 2 +- src/reactea/optimization/ea.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/reactea/constants.py b/src/reactea/constants.py index 265d761..f4c6c09 100644 --- a/src/reactea/constants.py +++ b/src/reactea/constants.py @@ -7,7 +7,7 @@ from reactea.chem import ChEMBLStandardizer from reactea.optimization.comparators import ParetoDominanceComparator from reactea.optimization.jmetal.operators import ReactorPseudoCrossover, ReactorMutation -from reactea.optimization.jmetal.terminators import StoppingByEvaluationsOrImprovement, StoppingByEvaluations +from reactea.optimization.jmetal.terminators import StoppingByEvaluations class ExperimentConstants: diff --git a/src/reactea/optimization/ea.py b/src/reactea/optimization/ea.py index 55ef993..401c1ee 100644 --- a/src/reactea/optimization/ea.py +++ b/src/reactea/optimization/ea.py @@ -3,7 +3,6 @@ from reactea.chem.compounds import Compound from reactea.optimization.problem import Problem -from reactea.constants import EAConstants class AbstractEA(ABC): From 74cde657bec9a0c6159be1376afd5d73cd382126 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 12:53:59 +0000 Subject: [PATCH 03/10] [ADD] example notebook --- examples/example.ipynb | 143 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 examples/example.ipynb diff --git a/examples/example.ipynb b/examples/example.ipynb new file mode 100644 index 0000000..2fd19c4 --- /dev/null +++ b/examples/example.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-02-12T12:51:01.579361Z", + "start_time": "2025-02-12T12:51:00.188Z" + } + }, + "source": [ + "from rdkit.Chem.QED import qed\n", + "from reactea import evaluation_functions_wrapper\n", + "\n", + "# EVALUATION FUNCTIONS\n", + "\n", + "# evaluation function returning the number of rings a molecule\n", + "def number_of_rings(mol):\n", + " ri = mol.GetRingInfo()\n", + " n_rings = len(ri.AtomRings())\n", + " return n_rings\n", + "\n", + "n_rigs_feval = evaluation_functions_wrapper(number_of_rings,\n", + " maximize=False,\n", + " worst_fitness=100,\n", + " name='n_rings')\n", + "\n", + "# evaluation function returning the drug-likeliness score (QED) of a molecule\n", + "def qed_score(mol):\n", + " return qed(mol)\n", + "\n", + "qed_feval = evaluation_functions_wrapper(qed_score,\n", + " maximize=True,\n", + " worst_fitness=0.0,\n", + " name='qed')\n", + "\n", + "# CASE STUDY\n", + "\n", + "from reactea import case_study_wrapper\n", + "\n", + "# SINGLE OBJECTIVE CASE STUDY\n", + "# case study to optimize a single objective `f1` (minimize number of rings in a molecule)\n", + "minimize_rings = case_study_wrapper(n_rigs_feval,\n", + " multi_objective=False,\n", + " name='minimize_rings')\n", + "\n", + "# SINGLE-OBJECTIVE CASE STUDY WITH MULTIPLE EVALUATION FUNCTIONS\n", + "# case study to optimize a single objective but with multiple evaluation functions `f1` and `f2` (minimize number of rings in a molecule and maximize qed)\n", + "# the number of evaluation functions must be the same as the number of values in weights and the sum of the weights must be 1\n", + "minimize_rings_maximize_qed = case_study_wrapper([n_rigs_feval, qed_feval],\n", + " multi_objective=False,\n", + " name='minimize_rings_maximize_qed',\n", + " weights=[0.3, 0.7])\n", + "\n", + "# MULTI-OBJECTIVE CASE STUDY\n", + "# case study to optimize multiple objectives simultaneous\n", + "minimize_rings_maximize_qed_mo = case_study_wrapper([n_rigs_feval, qed_feval],\n", + " multi_objective=True,\n", + " name='minimize_rings_maximize_qed_mo')" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[12:51:00] Initializing Normalizer\n" + ] + } + ], + "execution_count": 1 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-02-12T12:51:33.381067Z", + "start_time": "2025-02-12T12:51:01.642195Z" + } + }, + "cell_type": "code", + "source": [ + "from reactea import run_reactea\n", + "\n", + "case_study_rings = minimize_rings_maximize_qed_mo\n", + "# provide path to configuration file and case study\n", + "run_reactea(configs_path = 'config_files/NSGAII.yaml',\n", + " case_study = case_study_rings)" + ], + "id": "e217633e149b67e5", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running NSGAII\n", + "Eval(s)| Worst Best Median Average Std Dev| Worst Best Median Average Std Dev|\n", + " 11| 4.000000 0.000000 0.000000 1.000000 1.348400| 0.041206 0.508241 0.427593 0.370435 0.139558|\n", + " 22| 2.000000 0.000000 0.000000 0.545455 0.782030| 0.301591 0.662767 0.456096 0.440253 0.098033|\n", + " 33| 3.000000 0.000000 0.000000 0.727273 1.052349| 0.396890 0.788971 0.501522 0.530570 0.110155|\n", + " 44| 3.000000 0.000000 0.000000 0.818182 1.113404| 0.456096 0.788971 0.571526 0.564990 0.096966|\n", + " 55| 3.000000 0.000000 0.000000 0.727273 1.052349| 0.461845 0.788971 0.571526 0.580778 0.085874|\n", + " 66| 3.000000 0.000000 2.000000 1.454545 1.372697| 0.571526 0.788971 0.591504 0.628655 0.068347|\n", + " 77| 3.000000 0.000000 1.000000 1.363636 1.226431| 0.571526 0.788971 0.638008 0.649290 0.065461|\n", + " 88| 3.000000 0.000000 1.000000 1.363636 1.226431| 0.571526 0.788971 0.638008 0.649290 0.065461|\n", + " 99| 3.000000 0.000000 1.000000 1.454545 1.157084| 0.579890 0.788971 0.671357 0.664836 0.062069|\n", + " 110| 3.000000 0.000000 2.000000 1.636364 1.226431| 0.579890 0.788971 0.679245 0.677558 0.058601|\n" + ] + } + ], + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "fdcbdfb72eec84c0" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 67aabcc286f16f1f7cd25e416e0360754169cdbc Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 12:54:14 +0000 Subject: [PATCH 04/10] [ADD] fix jmetalpy version to 1.5.5 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b873039..94216a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ rdkit-pypi==2022.03.1 numpy==1.21.5 pandas==1.3.5 cytoolz==0.11.2 -jmetalpy +jmetalpy==1.5.5 PyYAML==6.0 matplotlib==3.5.1 chembl_structure_pipeline From 20ed20b09d77c7346239e80475523598aa5c7456 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 12:54:21 +0000 Subject: [PATCH 05/10] [ADD] example data --- examples/data/seed_compounds.csv | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 examples/data/seed_compounds.csv diff --git a/examples/data/seed_compounds.csv b/examples/data/seed_compounds.csv new file mode 100644 index 0000000..973fb3a --- /dev/null +++ b/examples/data/seed_compounds.csv @@ -0,0 +1,12 @@ +compound_id smiles +id_0 N=C(O)NC(O)C(=O)O +id_1 Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OC(=O)c2cccc(O)c2O)C(O)C1O +id_2 CC1(O)OCC(O)C1(O)O +id_3 CC1(O)OCC(O)C1=O +id_4 CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(O)=NCCC(O)=NCCSC(=O)CC(O)CCC(=O)O +id_5 CC(C)(O)C(O)C(=O)O +id_6 CCC(C)(O)C(O)C(=O)O +id_7 N=C(N)N=C(O)c1nc(Cl)c(N2CCCCCC2)nc1N +id_8 O=C(O)C(O)CO +id_9 CC(C)(CO)C(O)C(=O)O +id_10 CC(C)(CO)C(O)C(O)=NCCC(=O)O \ No newline at end of file From 2da3c431103fc26efafd1e0936eddf2dbe6a2ecd Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 12:54:33 +0000 Subject: [PATCH 06/10] [UPDATE] configs info --- examples/config_files/ES.yaml | 4 ++-- examples/config_files/GA.yaml | 6 +++--- examples/config_files/IBEA.yaml | 4 ++-- examples/config_files/LS.yaml | 22 ---------------------- examples/config_files/NSGAII.yaml | 8 ++++---- examples/config_files/NSGAIII.yaml | 4 ++-- examples/config_files/SA.yaml | 4 ++-- examples/config_files/SPEA2.yaml | 4 ++-- 8 files changed, 17 insertions(+), 39 deletions(-) delete mode 100644 examples/config_files/LS.yaml diff --git a/examples/config_files/ES.yaml b/examples/config_files/ES.yaml index 9e4335d..0f6f283 100644 --- a/examples/config_files/ES.yaml +++ b/examples/config_files/ES.yaml @@ -1,6 +1,6 @@ exp_name: "ES_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" +init_pop_path: "data/seed_compounds.csv" init_pop_size: 100 standardize: True @@ -11,4 +11,4 @@ generations: 100 algorithm: "ES" elitist: True -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file diff --git a/examples/config_files/GA.yaml b/examples/config_files/GA.yaml index 8797dfa..8fd93be 100644 --- a/examples/config_files/GA.yaml +++ b/examples/config_files/GA.yaml @@ -1,7 +1,7 @@ exp_name: "GA_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" -init_pop_size: 100 +init_pop_path: "data/seed_compounds.csv" +init_pop_size: 11 standardize: True max_rules_by_iter: 22949 @@ -10,4 +10,4 @@ tolerance: 0.1 generations: 100 algorithm: "GA" -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file diff --git a/examples/config_files/IBEA.yaml b/examples/config_files/IBEA.yaml index fb8489d..167de89 100644 --- a/examples/config_files/IBEA.yaml +++ b/examples/config_files/IBEA.yaml @@ -1,6 +1,6 @@ exp_name: "IBEA_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" +init_pop_path: "data/seed_compounds.csv" init_pop_size: 100 standardize: True @@ -11,4 +11,4 @@ generations: 100 algorithm: "IBEA" kappa: 1.0 -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file diff --git a/examples/config_files/LS.yaml b/examples/config_files/LS.yaml deleted file mode 100644 index 0fcc6dd..0000000 --- a/examples/config_files/LS.yaml +++ /dev/null @@ -1,22 +0,0 @@ -exp_name: "INTEGRATION_TESTS" - -case_study: "CompoundQuality" - -init_pop_path: "data/compounds/compounds_sample.tsv" -init_pop_size: 10 -standardize: True - -max_rules_by_iter: 1000 -tolerance: 0.15 - -mutation_probability: 1.0 -crossover_probability: 1.0 - -multi_objective: True -batched: True -generations: 10 -algorithm: "NSGAIII" -verbose: False -visualize: False - -output_path: "data/outputs/" \ No newline at end of file diff --git a/examples/config_files/NSGAII.yaml b/examples/config_files/NSGAII.yaml index 9c62849..fb0a410 100644 --- a/examples/config_files/NSGAII.yaml +++ b/examples/config_files/NSGAII.yaml @@ -1,13 +1,13 @@ exp_name: "NSGAII_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" -init_pop_size: 100 +init_pop_path: "data/seed_compounds.csv" +init_pop_size: 11 standardize: True max_rules_by_iter: 22949 tolerance: 0.1 -generations: 100 +generations: 10 algorithm: "NSGAII" -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file diff --git a/examples/config_files/NSGAIII.yaml b/examples/config_files/NSGAIII.yaml index 5ae4b55..9b7dd9b 100644 --- a/examples/config_files/NSGAIII.yaml +++ b/examples/config_files/NSGAIII.yaml @@ -1,6 +1,6 @@ exp_name: "NSGAIII_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" +init_pop_path: "data/seed_compounds.csv" init_pop_size: 100 standardize: True @@ -10,4 +10,4 @@ tolerance: 0.1 generations: 100 algorithm: "NSGAIII" -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file diff --git a/examples/config_files/SA.yaml b/examples/config_files/SA.yaml index 4646513..878f74d 100644 --- a/examples/config_files/SA.yaml +++ b/examples/config_files/SA.yaml @@ -1,6 +1,6 @@ exp_name: "SA_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" +init_pop_path: "data/seed_compounds.csv" init_pop_size: 100 standardize: True @@ -13,4 +13,4 @@ temperature: 1.0 minimum_temperature: 0.000001 alpha: 0.95 -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file diff --git a/examples/config_files/SPEA2.yaml b/examples/config_files/SPEA2.yaml index 09d6504..aa6d062 100644 --- a/examples/config_files/SPEA2.yaml +++ b/examples/config_files/SPEA2.yaml @@ -1,6 +1,6 @@ exp_name: "SPEA2_EXAMPLE_CONFIG" -init_pop_path: ".../.../path_to_seed_compounds.tsv" +init_pop_path: "data/seed_compounds.csv" init_pop_size: 100 standardize: True @@ -10,4 +10,4 @@ tolerance: 0.1 generations: 100 algorithm: "SPEA2" -output_path: ".../output_dir_path/" \ No newline at end of file +output_path: "output_dir_path/" \ No newline at end of file From 24298902f1c4d45133d03afbe37ad5b47732b629 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 13:03:24 +0000 Subject: [PATCH 07/10] [ADD] path validation --- src/reactea/io_streams/writers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/reactea/io_streams/writers.py b/src/reactea/io_streams/writers.py index e3f64c0..70cd0c7 100644 --- a/src/reactea/io_streams/writers.py +++ b/src/reactea/io_streams/writers.py @@ -23,6 +23,8 @@ def set_up_folders(path: Path): path: str path to folder to create """ + if isinstance(path, str): + path = Path(path) path.mkdir(parents=True, exist_ok=True) @staticmethod From 411073d431f4558dccfa1a3f3c7bacf9805e6383 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 13:12:17 +0000 Subject: [PATCH 08/10] [FIX] path --- src/reactea/io_streams/readers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reactea/io_streams/readers.py b/src/reactea/io_streams/readers.py index de9e645..8e6c903 100644 --- a/src/reactea/io_streams/readers.py +++ b/src/reactea/io_streams/readers.py @@ -41,6 +41,7 @@ def get_config_from_yaml(yaml_file: Path): output_path = Path(config_dict['output_path']).resolve() config_dict['output_dir'] = output_path / config_dict['exp_name'] + config_dict['output_path'] = output_path init_pop_path = Path(config_dict['init_pop_path']).resolve() config_dict['init_pop_path'] = init_pop_path From cec216cf993d99ef69ec9e2015459ce0e3ecbf21 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 14:33:02 +0000 Subject: [PATCH 09/10] [FIX] tests --- tests/integration_tests/algorithms/test_so_algorithms.py | 4 ++-- tests/integration_tests/case_studies/test_compound_quality.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/algorithms/test_so_algorithms.py b/tests/integration_tests/algorithms/test_so_algorithms.py index 44fa5cc..2447c6a 100644 --- a/tests/integration_tests/algorithms/test_so_algorithms.py +++ b/tests/integration_tests/algorithms/test_so_algorithms.py @@ -1,7 +1,7 @@ import os from unittest import TestCase -from base_test_cases import AlgorithmsBaseTestCase +from tests.base_test_cases import AlgorithmsBaseTestCase from reactea.case_studies.compound_quality import CompoundQuality from reactea.io_streams import Loaders, Writers from reactea.optimization.jmetal.ea import ChemicalEA @@ -15,7 +15,7 @@ def run_algorithm(self, algorithm): self.configs['multi_objective'] = False # set up output folder - self.output_folder = os.path.join(self.output_folder, algorithm) + self.output_folder = self.output_folder / algorithm self.configs['output_dir'] = self.output_folder # define number of molecules to use to only 1 in the case of RandomSearch diff --git a/tests/integration_tests/case_studies/test_compound_quality.py b/tests/integration_tests/case_studies/test_compound_quality.py index db41b5d..014f659 100644 --- a/tests/integration_tests/case_studies/test_compound_quality.py +++ b/tests/integration_tests/case_studies/test_compound_quality.py @@ -1,7 +1,7 @@ import os from unittest import TestCase -from base_test_cases import CaseStudiesBaseTestCase +from tests.base_test_cases import CaseStudiesBaseTestCase from reactea.case_studies.compound_quality import CompoundQuality from reactea.io_streams import Loaders, Writers @@ -20,7 +20,7 @@ def run_case_study(self, mo=True): self.configs['algorithm'] = 'GA' # set up output folder - self.output_folder = os.path.join(self.output_folder, self.configs['algorithm']) + self.output_folder = self.output_folder / self.configs['algorithm'] self.configs['output_dir'] = self.output_folder # initialize population and initialize population smiles From aa5585024e5aa71b29a68c82618bc0eff8b4df59 Mon Sep 17 00:00:00 2001 From: jcorreia11 Date: Wed, 12 Feb 2025 14:47:43 +0000 Subject: [PATCH 10/10] [FIX] tests --- src/reactea/vizualization/plot_results.py | 9 +++++++-- tests/unit_tests/visualization/test_plot_results.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/reactea/vizualization/plot_results.py b/src/reactea/vizualization/plot_results.py index 33b4ecd..0f2ae7c 100644 --- a/src/reactea/vizualization/plot_results.py +++ b/src/reactea/vizualization/plot_results.py @@ -190,9 +190,14 @@ def crop_image_with_transparency(img): # Applying transparency # (https://stackoverflow.com/questions/4379978/python-pil-how-to-make-area-transparent-in-png) - for transparent_zone in [(0, 0, l - epsilon, h), (0, 0, w, u - epsilon), (r + epsilon, 0, w, h), - (0, b + epsilon, w, h)]: + for transparent_zone in [ + (0, 0, max(0, l - epsilon), h), + (0, 0, w, max(0, u - epsilon)), + (min(w, r + epsilon), 0, w, h), + (0, min(h, b + epsilon), w, h) + ]: draw = ImageDraw.Draw(mask) + print(transparent_zone) draw.rectangle(transparent_zone, fill=0) img.putalpha(mask) diff --git a/tests/unit_tests/visualization/test_plot_results.py b/tests/unit_tests/visualization/test_plot_results.py index 3920a47..bf1f50e 100644 --- a/tests/unit_tests/visualization/test_plot_results.py +++ b/tests/unit_tests/visualization/test_plot_results.py @@ -1,4 +1,3 @@ -import os.path from unittest import TestCase from reactea.vizualization.plot_results import PlotResults @@ -9,7 +8,8 @@ class TestPlotResults(TestCase): def test_plot_results(self): - mock_output_configs = {'transformations_path': os.path.join(TEST_DIR, 'data/output_example/GA_rr_600esc_400gen_0.6sweet_0.4_caloric/FINAL_TRANSFORMATIONS_04-29_16-51-54.csv')} + path = TEST_DIR / 'data' / 'output_example' / 'GA_rr_600esc_400gen_0.6sweet_0.4_caloric' / 'FINAL_TRANSFORMATIONS_04-29_16-51-54.csv' + mock_output_configs = {'transformations_path': path} PlotResults(mock_output_configs, solution_index=0).plot_results(save_fig=False) PlotResults(mock_output_configs, solution_index=4).plot_results(save_fig=False) PlotResults(mock_output_configs, solution_index=6).plot_results(save_fig=False)