From 94a7a52503ef7bd7d5720e29a7f2c66fe5b737a1 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 12:53:35 +0000
Subject: [PATCH 01/10] [ADD] use pathlib instead of os

---
 src/reactea/__init__.py                       | 13 +++---
 src/reactea/cli.py                            |  3 --
 src/reactea/io_streams/readers.py             | 45 +++++++------------
 src/reactea/io_streams/writers.py             | 34 +++++++-------
 tests/__init__.py                             |  4 +-
 tests/base_test_cases.py                      | 26 +++++++----
 .../algorithms/test_mo_algorithms.py          |  7 +--
 7 files changed, 61 insertions(+), 71 deletions(-)

diff --git a/src/reactea/__init__.py b/src/reactea/__init__.py
index 953a2b7..2f951e2 100644
--- a/src/reactea/__init__.py
+++ b/src/reactea/__init__.py
@@ -1,5 +1,5 @@
-import os
 import warnings
+from pathlib import Path
 from typing import Union
 
 from rdkit import RDLogger
@@ -9,8 +9,6 @@
 from reactea.optimization.jmetal.ea import ChemicalEA
 from reactea.wrappers import case_study_wrapper, evaluation_functions_wrapper
 
-ROOT_DIR = os.path.dirname(__file__)
-
 
 def run_reactea(configs_path: Union[str, dict],
                 case_study: CaseStudy,
@@ -22,13 +20,16 @@ def run_reactea(configs_path: Union[str, dict],
     if ignore_warnings:
         warnings.filterwarnings("ignore")
 
-    if isinstance(configs_path, str) and os.path.exists(configs_path):
+    configs_path = Path(configs_path)
+    if configs_path.exists():
         configs = Loaders.get_config_from_yaml(configs_path)
-    else:
+    elif isinstance(configs_path, dict):
         configs = configs_path
+    else:
+        raise FileNotFoundError(f"Config file {configs_path} not found.")
 
     # set up output folder
-    output_folder = os.path.join(configs['output_dir'], configs['algorithm'])
+    output_folder = Path(configs['output_dir']) / configs['algorithm']
     configs['output_dir'] = output_folder
 
     # initialize population and initialize population smiles
diff --git a/src/reactea/cli.py b/src/reactea/cli.py
index 0efd080..adf142d 100644
--- a/src/reactea/cli.py
+++ b/src/reactea/cli.py
@@ -1,4 +1,3 @@
-import os
 import time
 from datetime import datetime
 
@@ -10,8 +9,6 @@
 from reactea.io_streams import Loaders, Writers
 from reactea.optimization.jmetal.ea import ChemicalEA
 
-DATA_FILES = os.path.dirname(__file__)
-
 
 def setup_configuration_file(args):
     # create dictionary from parser.parse_args()
diff --git a/src/reactea/io_streams/readers.py b/src/reactea/io_streams/readers.py
index a0df2ad..de9e645 100644
--- a/src/reactea/io_streams/readers.py
+++ b/src/reactea/io_streams/readers.py
@@ -1,6 +1,6 @@
-import os
 import time
 from datetime import datetime
+from pathlib import Path
 
 import yaml
 
@@ -10,6 +10,9 @@
 
 from reactea.constants import ChemConstants
 
+DATA_FILES = Path(__file__).resolve().parent.parent / 'data'
+DEEPSWEET_MOLDES = Path(__file__).parent.parent
+
 
 class Loaders:
     """
@@ -17,34 +20,13 @@ class Loaders:
     """
 
     @staticmethod
-    def from_root(file_path: str):
-        """
-        Gets path of file from root.
-
-        Parameters
-        ----------
-        file_path: str
-            file path
-
-        Returns
-        -------
-        str:
-            file path from root
-        """
-        from reactea import ROOT_DIR
-
-        if file_path[0] == '/':
-            file_path = file_path[1:]
-        return f"{ROOT_DIR}/{file_path}"
-
-    @staticmethod
-    def get_config_from_yaml(yaml_file: str):
+    def get_config_from_yaml(yaml_file: Path):
         """
         Reads the configuration file.
 
         Parameters
         ----------
-        yaml_file: str
+        yaml_file: Path
             path to yaml file
 
         Returns
@@ -56,9 +38,12 @@ def get_config_from_yaml(yaml_file: str):
             config_dict = yaml.safe_load(config_file)
         config_dict['time'] = datetime.now().strftime('%m-%d_%H-%M-%S')
         config_dict['start_time'] = time.time()
-        config_dict['output_dir'] = f"{os.path.join(os.getcwd(), config_dict['output_path'])}"
-        config_dict['output_dir'] = f"{os.path.join(config_dict['output_dir'], config_dict['exp_name'])}"
-        config_dict['init_pop_path'] = f"{os.path.join(os.getcwd(), config_dict['init_pop_path'])}"
+
+        output_path = Path(config_dict['output_path']).resolve()
+        config_dict['output_dir'] = output_path / config_dict['exp_name']
+
+        init_pop_path = Path(config_dict['init_pop_path']).resolve()
+        config_dict['init_pop_path'] = init_pop_path
         return config_dict
 
     @staticmethod
@@ -97,7 +82,9 @@ def initialize_rules():
         List[ReactionRule]:
             list of reaction rules to use
         """
-        rules_df = pd.read_csv(Loaders.from_root('/data/reactionrules/reaction_rules_reactea.tsv.bz2'),
+
+        path = DATA_FILES / 'reactionrules' / 'reaction_rules_reactea.tsv.bz2'
+        rules_df = pd.read_csv(path,
                                header=0,
                                sep='\t',
                                compression='bz2')
@@ -119,7 +106,7 @@ def load_deepsweet_ensemble():
         except ImportError:
             raise ImportError("DeepSweet is not installed. Please install it to use this feature "
                               "(https://github.com/BioSystemsUM/DeepSweet).")
-        models_folder_path = Loaders.from_root('/evaluation_models/deepsweet_models/')
+        models_folder_path = DEEPSWEET_MOLDES / 'evaluation_models' / 'deepsweet_models'
         list_of_models = [DeepSweetRF(models_folder_path, "2d", "SelectFromModelFS"),
                           DeepSweetDNN(models_folder_path, "rdk", "all"),
                           # it is necessary to insert the gpu number because it is a torch model and the device needs
diff --git a/src/reactea/io_streams/writers.py b/src/reactea/io_streams/writers.py
index 0ee1733..e3f64c0 100644
--- a/src/reactea/io_streams/writers.py
+++ b/src/reactea/io_streams/writers.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 from typing import List
 
 import yaml
@@ -7,8 +7,6 @@
 
 from reactea.optimization.solution import ChemicalSolution
 
-ROOT_DIR = os.path.dirname(__file__)[:-10]
-
 
 class Writers:
     """
@@ -16,7 +14,7 @@ class Writers:
     """
 
     @staticmethod
-    def set_up_folders(path: str):
+    def set_up_folders(path: Path):
         """
         Creates folder to output results.
 
@@ -25,8 +23,7 @@ def set_up_folders(path: str):
         path: str
             path to folder to create
         """
-        if not os.path.exists(path):
-            os.makedirs(path)
+        path.mkdir(parents=True, exist_ok=True)
 
     @staticmethod
     def save_final_pop(final_pop: List[ChemicalSolution], configs: dict, feval_names: str):
@@ -43,9 +40,9 @@ def save_final_pop(final_pop: List[ChemicalSolution], configs: dict, feval_names
             names of the evaluation functions
         """
         # save all solutions
-        destFile = os.path.join(configs['output_dir'], f"FINAL_{configs['time']}.csv")
+        destFile = configs['output_dir'] / f"FINAL_{configs['time']}.csv"
         configs["final_population_path"] = destFile
-        with open(destFile, 'w') as f:
+        with destFile.open('w') as f:
             f.write("SMILES;" + feval_names + "\n")
             for i, solution in enumerate(final_pop):
                 f.write(str(solution.variables.smiles) + ";" +
@@ -54,8 +51,9 @@ def save_final_pop(final_pop: List[ChemicalSolution], configs: dict, feval_names
         # save unique solutions
         df = pd.read_csv(destFile, sep=';', header=0)
         df = df.drop_duplicates()
-        df.to_csv(destFile[:-4] + '_UNIQUE_SOLUTIONS.csv', index=False)
-        configs["final_population_unique_solutions_path"] = destFile[:-4] + '_UNIQUE_SOLUTIONS.csv'
+        unique_solutions_path = destFile.stem + '_UNIQUE_SOLUTIONS.csv'
+        df.to_csv(destFile.parent / unique_solutions_path, index=False)
+        configs["final_population_unique_solutions_path"] = destFile.parent / unique_solutions_path
 
     @staticmethod
     def save_intermediate_transformations(pop: List[ChemicalSolution], configs: dict):
@@ -70,9 +68,9 @@ def save_intermediate_transformations(pop: List[ChemicalSolution], configs: dict
         configs: dict
             configurations of the experiment
         """
-        destFile = os.path.join(f"{configs['output_dir']}", f"FINAL_TRANSFORMATIONS_{configs['time']}.csv")
+        destFile = Path(configs['output_dir']) / f"FINAL_TRANSFORMATIONS_{configs['time']}.csv"
         configs["transformations_path"] = destFile
-        with open(destFile, 'w') as f:
+        with destFile.open('w') as f:
             f.write(f"FINAL_SMILES;INTERMEDIATE_SMILES;RULE_IDS\n")
 
             for sol in pop:
@@ -93,8 +91,8 @@ def save_configs(configs: dict):
         configs: dict
             configurations of the experiment
         """
-        destFile = os.path.join(f"{configs['output_dir']}", f"configs.yaml")
-        with open(destFile, 'w') as outfile:
+        destFile = configs['output_dir'] / 'configs.yaml'
+        with destFile.open('w') as outfile:
             yaml.dump(configs, outfile)
 
     @staticmethod
@@ -114,9 +112,7 @@ def update_operators_logs(configs: dict, solution: ChemicalSolution, mutant: str
         rule_id: str
             reaction rule id
         """
-        destFile = os.path.join(f"{configs['output_dir']}", f"ReactionMutationLogs.txt")
-        objectives = []
-        for obj in solution.objectives:
-            objectives.append(str(round(obj, 3)*-1))
-        with open(destFile, 'a+') as log:
+        destFile = configs['output_dir'] / 'ReactionMutationLogs.txt'
+        objectives = [str(round(obj, 3) * -1) for obj in solution.objectives]
+        with destFile.open('a+') as log:
             log.write(f"{solution.variables.smiles},{mutant},{rule_id},{','.join(objectives)}\n")
diff --git a/tests/__init__.py b/tests/__init__.py
index 2923b7b..cd31d23 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,3 +1,3 @@
-import os
+from pathlib import Path
 
-TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_DIR = Path(__file__).resolve().parent
diff --git a/tests/base_test_cases.py b/tests/base_test_cases.py
index 53223d7..540b3f0 100644
--- a/tests/base_test_cases.py
+++ b/tests/base_test_cases.py
@@ -1,6 +1,6 @@
-import os
 import shutil
 from abc import ABC, abstractmethod
+from pathlib import Path
 
 from rdkit import RDLogger
 
@@ -15,13 +15,17 @@ def setUp(self):
         # Mute RDKit logs
         RDLogger.DisableLog("rdApp.*")
 
-        config_path = os.path.join(TEST_DIR, 'configs/base_config.yaml')
+        config_path = TEST_DIR / 'configs' / 'base_config.yaml'
         self.configs = Loaders.get_config_from_yaml(config_path)
-        self.output_folder = f"{TEST_DIR}/outputs/{self.configs['exp_name']}/"
+        self.output_folder = TEST_DIR / 'outputs' / self.configs['exp_name']
+        self.configs['output_dir'] = self.output_folder
+        init_pop_path = TEST_DIR / 'data' / 'compounds' / 'compounds_sample.tsv'
+        self.configs['init_pop_path'] = init_pop_path.as_posix()
 
     def tearDown(self):
-        if os.path.exists(self.output_folder):
-            shutil.rmtree(self.output_folder)
+        output_folder_path = Path(self.output_folder)
+        if output_folder_path.exists():
+            shutil.rmtree(output_folder_path)
 
     @abstractmethod
     def test_case_study(self):
@@ -34,13 +38,17 @@ def setUp(self):
         # Mute RDKit logs
         RDLogger.DisableLog("rdApp.*")
 
-        config_path = os.path.join(TEST_DIR, 'configs/base_config.yaml')
+        config_path = TEST_DIR / 'configs' / 'base_config.yaml'
         self.configs = Loaders.get_config_from_yaml(config_path)
-        self.output_folder = f"{TEST_DIR}/outputs/{self.configs['exp_name']}/"
+        self.output_folder = TEST_DIR / 'outputs' / self.configs['exp_name']
+        self.configs['output_dir'] = self.output_folder
+        init_pop_path = TEST_DIR / 'data' / 'compounds' / 'compounds_sample.tsv'
+        self.configs['init_pop_path'] = init_pop_path
 
     def tearDown(self):
-        if os.path.exists(self.output_folder):
-            shutil.rmtree(self.output_folder)
+        output_folder_path = Path(self.output_folder)
+        if output_folder_path.exists():
+            shutil.rmtree(output_folder_path)
 
     @abstractmethod
     def test_algorithms(self):
diff --git a/tests/integration_tests/algorithms/test_mo_algorithms.py b/tests/integration_tests/algorithms/test_mo_algorithms.py
index 846125a..dbfe8a7 100644
--- a/tests/integration_tests/algorithms/test_mo_algorithms.py
+++ b/tests/integration_tests/algorithms/test_mo_algorithms.py
@@ -1,11 +1,12 @@
-import os
 from unittest import TestCase
 
-from base_test_cases import AlgorithmsBaseTestCase
+
 from reactea.case_studies.compound_quality import CompoundQuality
 from reactea.io_streams import Loaders, Writers
 from reactea.optimization.jmetal.ea import ChemicalEA
 
+from tests.base_test_cases import AlgorithmsBaseTestCase
+
 
 class TestMOAlgorithms(AlgorithmsBaseTestCase, TestCase):
 
@@ -14,7 +15,7 @@ def run_algorithm(self, algorithm):
         self.configs['algorithm'] = algorithm
 
         # set up output folder
-        self.output_folder = os.path.join(self.output_folder, algorithm)
+        self.output_folder = self.output_folder / algorithm
         self.configs['output_dir'] = self.output_folder
 
         # define number of molecules to use to only 1 in the case of RandomSearch

From da9055e3ae658788e2242a1e30ac95d16005a585 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 12:53:48 +0000
Subject: [PATCH 02/10] [REMOVE] unused imports

---
 src/reactea/constants.py       | 2 +-
 src/reactea/optimization/ea.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/reactea/constants.py b/src/reactea/constants.py
index 265d761..f4c6c09 100644
--- a/src/reactea/constants.py
+++ b/src/reactea/constants.py
@@ -7,7 +7,7 @@
 from reactea.chem import ChEMBLStandardizer
 from reactea.optimization.comparators import ParetoDominanceComparator
 from reactea.optimization.jmetal.operators import ReactorPseudoCrossover, ReactorMutation
-from reactea.optimization.jmetal.terminators import StoppingByEvaluationsOrImprovement, StoppingByEvaluations
+from reactea.optimization.jmetal.terminators import StoppingByEvaluations
 
 
 class ExperimentConstants:
diff --git a/src/reactea/optimization/ea.py b/src/reactea/optimization/ea.py
index 55ef993..401c1ee 100644
--- a/src/reactea/optimization/ea.py
+++ b/src/reactea/optimization/ea.py
@@ -3,7 +3,6 @@
 
 from reactea.chem.compounds import Compound
 from reactea.optimization.problem import Problem
-from reactea.constants import EAConstants
 
 
 class AbstractEA(ABC):

From 74cde657bec9a0c6159be1376afd5d73cd382126 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 12:53:59 +0000
Subject: [PATCH 03/10] [ADD] example notebook

---
 examples/example.ipynb | 143 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 examples/example.ipynb

diff --git a/examples/example.ipynb b/examples/example.ipynb
new file mode 100644
index 0000000..2fd19c4
--- /dev/null
+++ b/examples/example.ipynb
@@ -0,0 +1,143 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2025-02-12T12:51:01.579361Z",
+     "start_time": "2025-02-12T12:51:00.188Z"
+    }
+   },
+   "source": [
+    "from rdkit.Chem.QED import qed\n",
+    "from reactea import evaluation_functions_wrapper\n",
+    "\n",
+    "# EVALUATION FUNCTIONS\n",
+    "\n",
+    "# evaluation function returning the number of rings a molecule\n",
+    "def number_of_rings(mol):\n",
+    "    ri = mol.GetRingInfo()\n",
+    "    n_rings = len(ri.AtomRings())\n",
+    "    return n_rings\n",
+    "\n",
+    "n_rigs_feval = evaluation_functions_wrapper(number_of_rings,\n",
+    "                                            maximize=False,\n",
+    "                                            worst_fitness=100,\n",
+    "                                            name='n_rings')\n",
+    "\n",
+    "# evaluation function returning the drug-likeliness score (QED) of a molecule\n",
+    "def qed_score(mol):\n",
+    "    return qed(mol)\n",
+    "\n",
+    "qed_feval = evaluation_functions_wrapper(qed_score,\n",
+    "                                         maximize=True,\n",
+    "                                         worst_fitness=0.0,\n",
+    "                                         name='qed')\n",
+    "\n",
+    "# CASE STUDY\n",
+    "\n",
+    "from reactea import case_study_wrapper\n",
+    "\n",
+    "# SINGLE OBJECTIVE CASE STUDY\n",
+    "# case study to optimize a single objective `f1` (minimize number of rings in a molecule)\n",
+    "minimize_rings = case_study_wrapper(n_rigs_feval,\n",
+    "                                    multi_objective=False,\n",
+    "                                    name='minimize_rings')\n",
+    "\n",
+    "# SINGLE-OBJECTIVE CASE STUDY WITH MULTIPLE EVALUATION FUNCTIONS\n",
+    "# case study to optimize a single objective but with multiple evaluation functions `f1` and `f2` (minimize number of rings in a molecule and maximize qed)\n",
+    "# the number of evaluation functions must be the same as the number of values in weights and the sum of the weights must be 1\n",
+    "minimize_rings_maximize_qed = case_study_wrapper([n_rigs_feval, qed_feval],\n",
+    "                                                 multi_objective=False,\n",
+    "                                                 name='minimize_rings_maximize_qed',\n",
+    "                                                 weights=[0.3, 0.7])\n",
+    "\n",
+    "# MULTI-OBJECTIVE CASE STUDY\n",
+    "# case study to optimize multiple objectives simultaneous\n",
+    "minimize_rings_maximize_qed_mo = case_study_wrapper([n_rigs_feval, qed_feval],\n",
+    "                                                    multi_objective=True,\n",
+    "                                                    name='minimize_rings_maximize_qed_mo')"
+   ],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[12:51:00] Initializing Normalizer\n"
+     ]
+    }
+   ],
+   "execution_count": 1
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-12T12:51:33.381067Z",
+     "start_time": "2025-02-12T12:51:01.642195Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from reactea import run_reactea\n",
+    "\n",
+    "case_study_rings = minimize_rings_maximize_qed_mo\n",
+    "# provide path to configuration file and case study\n",
+    "run_reactea(configs_path = 'config_files/NSGAII.yaml',\n",
+    "            case_study = case_study_rings)"
+   ],
+   "id": "e217633e149b67e5",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running NSGAII\n",
+      "Eval(s)|     Worst      Best    Median   Average   Std Dev|     Worst      Best    Median   Average   Std Dev|\n",
+      "     11|  4.000000  0.000000  0.000000  1.000000  1.348400|  0.041206  0.508241  0.427593  0.370435  0.139558|\n",
+      "     22|  2.000000  0.000000  0.000000  0.545455  0.782030|  0.301591  0.662767  0.456096  0.440253  0.098033|\n",
+      "     33|  3.000000  0.000000  0.000000  0.727273  1.052349|  0.396890  0.788971  0.501522  0.530570  0.110155|\n",
+      "     44|  3.000000  0.000000  0.000000  0.818182  1.113404|  0.456096  0.788971  0.571526  0.564990  0.096966|\n",
+      "     55|  3.000000  0.000000  0.000000  0.727273  1.052349|  0.461845  0.788971  0.571526  0.580778  0.085874|\n",
+      "     66|  3.000000  0.000000  2.000000  1.454545  1.372697|  0.571526  0.788971  0.591504  0.628655  0.068347|\n",
+      "     77|  3.000000  0.000000  1.000000  1.363636  1.226431|  0.571526  0.788971  0.638008  0.649290  0.065461|\n",
+      "     88|  3.000000  0.000000  1.000000  1.363636  1.226431|  0.571526  0.788971  0.638008  0.649290  0.065461|\n",
+      "     99|  3.000000  0.000000  1.000000  1.454545  1.157084|  0.579890  0.788971  0.671357  0.664836  0.062069|\n",
+      "    110|  3.000000  0.000000  2.000000  1.636364  1.226431|  0.579890  0.788971  0.679245  0.677558  0.058601|\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "fdcbdfb72eec84c0"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 67aabcc286f16f1f7cd25e416e0360754169cdbc Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 12:54:14 +0000
Subject: [PATCH 04/10] [ADD] fix jmetalpy version to 1.5.5

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b873039..94216a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ rdkit-pypi==2022.03.1
 numpy==1.21.5
 pandas==1.3.5
 cytoolz==0.11.2
-jmetalpy
+jmetalpy==1.5.5
 PyYAML==6.0
 matplotlib==3.5.1
 chembl_structure_pipeline

From 20ed20b09d77c7346239e80475523598aa5c7456 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 12:54:21 +0000
Subject: [PATCH 05/10] [ADD] example data

---
 examples/data/seed_compounds.csv | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 examples/data/seed_compounds.csv

diff --git a/examples/data/seed_compounds.csv b/examples/data/seed_compounds.csv
new file mode 100644
index 0000000..973fb3a
--- /dev/null
+++ b/examples/data/seed_compounds.csv
@@ -0,0 +1,12 @@
+compound_id	smiles
+id_0	N=C(O)NC(O)C(=O)O
+id_1	Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OC(=O)c2cccc(O)c2O)C(O)C1O
+id_2	CC1(O)OCC(O)C1(O)O
+id_3	CC1(O)OCC(O)C1=O
+id_4	CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(O)=NCCC(O)=NCCSC(=O)CC(O)CCC(=O)O
+id_5	CC(C)(O)C(O)C(=O)O
+id_6	CCC(C)(O)C(O)C(=O)O
+id_7	N=C(N)N=C(O)c1nc(Cl)c(N2CCCCCC2)nc1N
+id_8	O=C(O)C(O)CO
+id_9	CC(C)(CO)C(O)C(=O)O
+id_10	CC(C)(CO)C(O)C(O)=NCCC(=O)O
\ No newline at end of file

From 2da3c431103fc26efafd1e0936eddf2dbe6a2ecd Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 12:54:33 +0000
Subject: [PATCH 06/10] [UPDATE] configs info

---
 examples/config_files/ES.yaml      |  4 ++--
 examples/config_files/GA.yaml      |  6 +++---
 examples/config_files/IBEA.yaml    |  4 ++--
 examples/config_files/LS.yaml      | 22 ----------------------
 examples/config_files/NSGAII.yaml  |  8 ++++----
 examples/config_files/NSGAIII.yaml |  4 ++--
 examples/config_files/SA.yaml      |  4 ++--
 examples/config_files/SPEA2.yaml   |  4 ++--
 8 files changed, 17 insertions(+), 39 deletions(-)
 delete mode 100644 examples/config_files/LS.yaml

diff --git a/examples/config_files/ES.yaml b/examples/config_files/ES.yaml
index 9e4335d..0f6f283 100644
--- a/examples/config_files/ES.yaml
+++ b/examples/config_files/ES.yaml
@@ -1,6 +1,6 @@
 exp_name: "ES_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
+init_pop_path: "data/seed_compounds.csv"
 init_pop_size: 100
 standardize: True
 
@@ -11,4 +11,4 @@ generations: 100
 algorithm: "ES"
 elitist: True
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file
diff --git a/examples/config_files/GA.yaml b/examples/config_files/GA.yaml
index 8797dfa..8fd93be 100644
--- a/examples/config_files/GA.yaml
+++ b/examples/config_files/GA.yaml
@@ -1,7 +1,7 @@
 exp_name: "GA_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
-init_pop_size: 100
+init_pop_path: "data/seed_compounds.csv"
+init_pop_size: 11
 standardize: True
 
 max_rules_by_iter: 22949
@@ -10,4 +10,4 @@ tolerance: 0.1
 generations: 100
 algorithm: "GA"
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file
diff --git a/examples/config_files/IBEA.yaml b/examples/config_files/IBEA.yaml
index fb8489d..167de89 100644
--- a/examples/config_files/IBEA.yaml
+++ b/examples/config_files/IBEA.yaml
@@ -1,6 +1,6 @@
 exp_name: "IBEA_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
+init_pop_path: "data/seed_compounds.csv"
 init_pop_size: 100
 standardize: True
 
@@ -11,4 +11,4 @@ generations: 100
 algorithm: "IBEA"
 kappa: 1.0
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file
diff --git a/examples/config_files/LS.yaml b/examples/config_files/LS.yaml
deleted file mode 100644
index 0fcc6dd..0000000
--- a/examples/config_files/LS.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-exp_name: "INTEGRATION_TESTS"
-
-case_study: "CompoundQuality"
-
-init_pop_path: "data/compounds/compounds_sample.tsv"
-init_pop_size: 10
-standardize: True
-
-max_rules_by_iter: 1000
-tolerance: 0.15
-
-mutation_probability: 1.0
-crossover_probability: 1.0
-
-multi_objective: True
-batched: True
-generations: 10
-algorithm: "NSGAIII"
-verbose: False
-visualize: False
-
-output_path: "data/outputs/"
\ No newline at end of file
diff --git a/examples/config_files/NSGAII.yaml b/examples/config_files/NSGAII.yaml
index 9c62849..fb0a410 100644
--- a/examples/config_files/NSGAII.yaml
+++ b/examples/config_files/NSGAII.yaml
@@ -1,13 +1,13 @@
 exp_name: "NSGAII_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
-init_pop_size: 100
+init_pop_path: "data/seed_compounds.csv"
+init_pop_size: 11
 standardize: True
 
 max_rules_by_iter: 22949
 tolerance: 0.1
 
-generations: 100
+generations: 10
 algorithm: "NSGAII"
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file
diff --git a/examples/config_files/NSGAIII.yaml b/examples/config_files/NSGAIII.yaml
index 5ae4b55..9b7dd9b 100644
--- a/examples/config_files/NSGAIII.yaml
+++ b/examples/config_files/NSGAIII.yaml
@@ -1,6 +1,6 @@
 exp_name: "NSGAIII_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
+init_pop_path: "data/seed_compounds.csv"
 init_pop_size: 100
 standardize: True
 
@@ -10,4 +10,4 @@ tolerance: 0.1
 generations: 100
 algorithm: "NSGAIII"
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file
diff --git a/examples/config_files/SA.yaml b/examples/config_files/SA.yaml
index 4646513..878f74d 100644
--- a/examples/config_files/SA.yaml
+++ b/examples/config_files/SA.yaml
@@ -1,6 +1,6 @@
 exp_name: "SA_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
+init_pop_path: "data/seed_compounds.csv"
 init_pop_size: 100
 standardize: True
 
@@ -13,4 +13,4 @@ temperature: 1.0
 minimum_temperature: 0.000001
 alpha: 0.95
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file
diff --git a/examples/config_files/SPEA2.yaml b/examples/config_files/SPEA2.yaml
index 09d6504..aa6d062 100644
--- a/examples/config_files/SPEA2.yaml
+++ b/examples/config_files/SPEA2.yaml
@@ -1,6 +1,6 @@
 exp_name: "SPEA2_EXAMPLE_CONFIG"
 
-init_pop_path: ".../.../path_to_seed_compounds.tsv"
+init_pop_path: "data/seed_compounds.csv"
 init_pop_size: 100
 standardize: True
 
@@ -10,4 +10,4 @@ tolerance: 0.1
 generations: 100
 algorithm: "SPEA2"
 
-output_path: ".../output_dir_path/"
\ No newline at end of file
+output_path: "output_dir_path/"
\ No newline at end of file

From 24298902f1c4d45133d03afbe37ad5b47732b629 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 13:03:24 +0000
Subject: [PATCH 07/10] [ADD] path validation

---
 src/reactea/io_streams/writers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/reactea/io_streams/writers.py b/src/reactea/io_streams/writers.py
index e3f64c0..70cd0c7 100644
--- a/src/reactea/io_streams/writers.py
+++ b/src/reactea/io_streams/writers.py
@@ -23,6 +23,8 @@ def set_up_folders(path: Path):
         path: str
             path to folder to create
         """
+        if isinstance(path, str):
+            path = Path(path)
         path.mkdir(parents=True, exist_ok=True)
 
     @staticmethod

From 411073d431f4558dccfa1a3f3c7bacf9805e6383 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 13:12:17 +0000
Subject: [PATCH 08/10] [FIX] path

---
 src/reactea/io_streams/readers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/reactea/io_streams/readers.py b/src/reactea/io_streams/readers.py
index de9e645..8e6c903 100644
--- a/src/reactea/io_streams/readers.py
+++ b/src/reactea/io_streams/readers.py
@@ -41,6 +41,7 @@ def get_config_from_yaml(yaml_file: Path):
 
         output_path = Path(config_dict['output_path']).resolve()
         config_dict['output_dir'] = output_path / config_dict['exp_name']
+        config_dict['output_path'] = output_path
 
         init_pop_path = Path(config_dict['init_pop_path']).resolve()
         config_dict['init_pop_path'] = init_pop_path

From cec216cf993d99ef69ec9e2015459ce0e3ecbf21 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 14:33:02 +0000
Subject: [PATCH 09/10] [FIX] tests

---
 tests/integration_tests/algorithms/test_so_algorithms.py      | 4 ++--
 tests/integration_tests/case_studies/test_compound_quality.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration_tests/algorithms/test_so_algorithms.py b/tests/integration_tests/algorithms/test_so_algorithms.py
index 44fa5cc..2447c6a 100644
--- a/tests/integration_tests/algorithms/test_so_algorithms.py
+++ b/tests/integration_tests/algorithms/test_so_algorithms.py
@@ -1,7 +1,7 @@
 import os
 from unittest import TestCase
 
-from base_test_cases import AlgorithmsBaseTestCase
+from tests.base_test_cases import AlgorithmsBaseTestCase
 from reactea.case_studies.compound_quality import CompoundQuality
 from reactea.io_streams import Loaders, Writers
 from reactea.optimization.jmetal.ea import ChemicalEA
@@ -15,7 +15,7 @@ def run_algorithm(self, algorithm):
         self.configs['multi_objective'] = False
 
         # set up output folder
-        self.output_folder = os.path.join(self.output_folder, algorithm)
+        self.output_folder = self.output_folder / algorithm
         self.configs['output_dir'] = self.output_folder
 
         # define number of molecules to use to only 1 in the case of RandomSearch
diff --git a/tests/integration_tests/case_studies/test_compound_quality.py b/tests/integration_tests/case_studies/test_compound_quality.py
index db41b5d..014f659 100644
--- a/tests/integration_tests/case_studies/test_compound_quality.py
+++ b/tests/integration_tests/case_studies/test_compound_quality.py
@@ -1,7 +1,7 @@
 import os
 from unittest import TestCase
 
-from base_test_cases import CaseStudiesBaseTestCase
+from tests.base_test_cases import CaseStudiesBaseTestCase
 from reactea.case_studies.compound_quality import CompoundQuality
 from reactea.io_streams import Loaders, Writers
 
@@ -20,7 +20,7 @@ def run_case_study(self, mo=True):
             self.configs['algorithm'] = 'GA'
 
         # set up output folder
-        self.output_folder = os.path.join(self.output_folder, self.configs['algorithm'])
+        self.output_folder = self.output_folder / self.configs['algorithm']
         self.configs['output_dir'] = self.output_folder
 
         # initialize population and initialize population smiles

From aa5585024e5aa71b29a68c82618bc0eff8b4df59 Mon Sep 17 00:00:00 2001
From: jcorreia11 <jfscorreia95@gmail.com>
Date: Wed, 12 Feb 2025 14:47:43 +0000
Subject: [PATCH 10/10] [FIX] tests

---
 src/reactea/vizualization/plot_results.py           | 9 +++++++--
 tests/unit_tests/visualization/test_plot_results.py | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/reactea/vizualization/plot_results.py b/src/reactea/vizualization/plot_results.py
index 33b4ecd..0f2ae7c 100644
--- a/src/reactea/vizualization/plot_results.py
+++ b/src/reactea/vizualization/plot_results.py
@@ -190,9 +190,14 @@ def crop_image_with_transparency(img):
 
         # Applying transparency
         # (https://stackoverflow.com/questions/4379978/python-pil-how-to-make-area-transparent-in-png)
-        for transparent_zone in [(0, 0, l - epsilon, h), (0, 0, w, u - epsilon), (r + epsilon, 0, w, h),
-                                 (0, b + epsilon, w, h)]:
+        for transparent_zone in [
+            (0, 0, max(0, l - epsilon), h),
+            (0, 0, w, max(0, u - epsilon)),
+            (min(w, r + epsilon), 0, w, h),
+            (0, min(h, b + epsilon), w, h)
+        ]:
             draw = ImageDraw.Draw(mask)
+            print(transparent_zone)
             draw.rectangle(transparent_zone, fill=0)
             img.putalpha(mask)
 
diff --git a/tests/unit_tests/visualization/test_plot_results.py b/tests/unit_tests/visualization/test_plot_results.py
index 3920a47..bf1f50e 100644
--- a/tests/unit_tests/visualization/test_plot_results.py
+++ b/tests/unit_tests/visualization/test_plot_results.py
@@ -1,4 +1,3 @@
-import os.path
 from unittest import TestCase
 
 from reactea.vizualization.plot_results import PlotResults
@@ -9,7 +8,8 @@
 class TestPlotResults(TestCase):
 
     def test_plot_results(self):
-        mock_output_configs = {'transformations_path': os.path.join(TEST_DIR, 'data/output_example/GA_rr_600esc_400gen_0.6sweet_0.4_caloric/FINAL_TRANSFORMATIONS_04-29_16-51-54.csv')}
+        path = TEST_DIR / 'data' / 'output_example' / 'GA_rr_600esc_400gen_0.6sweet_0.4_caloric' / 'FINAL_TRANSFORMATIONS_04-29_16-51-54.csv'
+        mock_output_configs = {'transformations_path': path}
         PlotResults(mock_output_configs, solution_index=0).plot_results(save_fig=False)
         PlotResults(mock_output_configs, solution_index=4).plot_results(save_fig=False)
         PlotResults(mock_output_configs, solution_index=6).plot_results(save_fig=False)