Merge pull request #344 from datamol-io/cleanup

hadim · web-flow · commit 4a7ec8545f46 · 2023-06-07T07:24:54.000-04:00
Cleanup of dependencies and files
diff --git a/docs/baseline.md b/docs/baseline.md
diff --git a/docs/design.md b/docs/design.md
@@ -101,24 +101,3 @@ Some loss functions are already implemented in the PredictorModule, including `m
 Our current code is compatible with the metrics defined by _pytorch-lightning_, which include a great set of metrics. We also added the PearsonR and SpearmanR as they are important correlation metrics. You can define any new metric in the file `graphium/trainer/metrics.py`. The metric must inherit from `TensorMetric` and must be added to the dictionary `graphium.trainer.metrics.METRICS_DICT`.
 
 To use the metric, you can easily add it's name from `METRICS_DICT` in the yaml configuration file, at the address `metrics.metrics_dict`. Each metric has an underlying dictionnary with a mandatory `threshold` key containing information on how to threshold the prediction/target before computing the metric. Any `kwargs` arguments of the metric must also be added.
-
-## (OLD) Running a hyper-parameter search
-
-In the current repository, we use `hydra-core` to launch multiple experiments in a grid-search manner. It works by specifying the parameters that we want to change from a given YAML file.
-
-Below is an example of running a set of 3\*2\*2\*2=24 experiments, 3 variations of the gnn type _layer_name_, 2 variations of the learning rate _lr_, 2 variations of the hidden dimension _hidden_dim_, 2 variations of the network depth _hidden_depth_. All parameters not mentionned in the code below are unchanged from the file `expts/main_micro_ZINC.py`.
-
-    python expts/main_micro_ZINC.py --multirun \
-    model.layer_name=gin,gcn,pna-conv3 \
-    constants.exp_name="testing_hydra" \
-    constants.device="cuda:0" \
-    constants.ignore_train_error=true \
-    predictor.lr=1e-4,1e-3 \
-    model.gnn_kwargs.hidden_dim=32,64 \
-    model.gnn_kwargs.hidden_depth=4,8
-
-The results of the run will be available in the folder `multirun/[CURRENT-DATE]/[CURRENT-TIME]`. To open the results in tensorflow, run the following command using _bash_ or _powershell_
-
-`tensorboard --logdir 'multirun/[CURRENT-DATE]/[CURRENT-TIME]/' --port 8000`
-
-Then open a web-browser and enter the address `http://localhost:8000/`.
diff --git a/env.yml b/env.yml
@@ -30,14 +30,12 @@ dependencies:
   # ML packages
   - cudatoolkit  # works also with CPU-only system.
   - pytorch >=1.10.2,<2.0
-  - tensorboard
+  - torchvision
   - pytorch-lightning >=1.9
   - torchmetrics >=0.7.0,<0.11
-  - hydra-core >=1.0
   - ogb
   - pytorch_geometric >=2.0  # Use `pyg` for Windows instead of `pytorch_geometric`
   - wandb
-  - optuna
   - mup
   - pytorch_sparse >=0.6
   - pytorch_cluster >=1.5
diff --git a/expts/optuna/optuna_base_config.py b/expts/optuna/optuna_base_config.py
diff --git a/graphium/data/QM9/qm9.csv b/graphium/data/QM9/qm9.csv
diff --git a/graphium/trainer/predictor.py b/graphium/trainer/predictor.py
@@ -523,7 +523,7 @@ def _general_epoch_end(self, outputs: Dict[str, Any], step_name: str) -> None:
         metrics_logs = self.task_epoch_summary.get_metrics_logs()
         self.task_epoch_summary.set_results(task_metrics=metrics_logs)
 
-        return metrics_logs  # Consider returning concatenated dict for tensorboard
+        return metrics_logs  # Consider returning concatenated dict for logging
 
     def on_train_epoch_start(self) -> None:
         self.epoch_start_time = time.time()
diff --git a/graphium/trainer/predictor_options.py b/graphium/trainer/predictor_options.py
@@ -132,7 +132,7 @@ class EvalOptions:
 
         metrics:
             A dictionnary of metrics to compute on the prediction, other than the loss function.
-            These metrics will be logged into TensorBoard.
+            These metrics will be logged into WandB or other.
 
         metrics_on_progress_bar:
             The metrics names from `metrics` to display also on the progress bar of the training
diff --git a/graphium/trainer/predictor_summaries.py b/graphium/trainer/predictor_summaries.py
@@ -51,7 +51,7 @@ def __init__(
 
             metrics:
             A dictionnary of metrics to compute on the prediction, other than the loss function.
-            These metrics will be logged into TensorBoard.
+            These metrics will be logged into WandB or similar.
 
             metrics_on_training_set:
             The metrics names from `metrics` to be computed on the training set for each iteration.
@@ -118,7 +118,7 @@ def set_results(
             metrics: a dictionary of metrics
         """
 
-        # Include the task_name in the loss for tensorboard, and similarly for other metrics
+        # Include the task_name in the loss for logging, and similarly for other metrics
         metrics[self.metric_log_name(self.task_name, "loss", self.step_name)] = self.loss
         self.summaries[self.step_name] = Summary.Results(
             targets=self.targets,
@@ -144,7 +144,7 @@ def is_best_epoch(self, step_name: str, loss: Tensor, metrics: Dict[str, Tensor]
         if not (step_name in self.best_summaries.keys()):
             return True
 
-        # Include the task_name in the loss for tensorboard, and similarly for other metrics
+        # Include the task_name in the loss for logging, and similarly for other metrics
         metrics[self.metric_log_name(self.task_name, "loss", self.step_name)] = loss
         monitor_name = f"{self.monitor}/{step_name}"  # Include task_name?
         if (
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -11,7 +11,7 @@ docs_dir: "docs"
 
 nav:
   - Overview: index.md
-  - Benchmark: benchmark.md
+  - Baseline: baseline.md
   - API:
       - graphium.nn:
           - graphium.nn: api/graphium.nn/graphium.nn.md
diff --git a/pyproject.toml b/pyproject.toml
@@ -47,14 +47,11 @@ dependencies = [
     "gcsfs >=2021.6",
     "platformdirs",
     # ML packages
-    "tensorboard",
     "pytorch-lightning >=1.9",
     "torchmetrics >=0.7.0,<0.11",
-    "hydra-core >=1.0",
     "ogb",
     "torch-geometric >=2.0",
     "wandb",
-    "optuna",
     "mup",
     "torch_sparse >=0.6",
     "torch_cluster >=1.5",
@@ -98,10 +95,8 @@ minversion = "6.0"
 addopts = "--verbose --durations=10 -n auto"
 testpaths = ["tests"]
 filterwarnings = [
-    "ignore::DeprecationWarning:tensorboard.*:",
     "ignore::DeprecationWarning:ray.*:",
     "ignore::DeprecationWarning:numba.*:",
     "ignore::DeprecationWarning:lightning_fabric.*:",
     "ignore::DeprecationWarning:pytorch_lightning.*:",
-    "ignore::UserWarning:umap.*:",
 ]