Merge pull request #872 from suny-downstate-medical-center/development

vvbragin · web-flow · commit 26533937ff16 · 2025-09-12T12:10:37.000+02:00
PR from development to master - VERSION 1.1.1
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,11 @@
+# Version 1.1.1
+
+**Bug fixes**
+
+- Fixed ignored synMech[Weight/Delay/Loc]Factor
+- Fixed issue with SSH based searches
+- Fixed issue with data and trial logging file creation
+
 # Version 1.1.0
 
 **New features**
@@ -10,6 +18,8 @@
 
 - Updated options for batch communication (see netpyne documentation: "running a batch job (beta)")
 
+- new batchtools subpackage includes data and trial logging features.
+
 **Bug fixes**
 
 - Solve minor bug in the uniform field for extracelular stimulation (sign). Deposited the xtra.mod in support module.
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -67,9 +67,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.1.0'
+version = '1.1.1'
 # The full version, including alpha/beta/rc tags.
-release = '1.1.0'
+release = '1.1.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/doc/source/user_documentation.rst b/doc/source/user_documentation.rst
@@ -2997,6 +2997,8 @@ Batch job handling is implemented from ``netpyne.batchtools.search``. Below is a
            mode: Optional[str] = "min",  # either 'min' or 'max' (whether to minimize or maximize the metric)
            sample_interval: Optional[int] = 15,  # interval to poll for new results (in seconds)
            attempt_restore: Optional[bool] = True, # whether to attempt to restore from a checkpoint
+           file_cleanup: Optional[bool] = True, # whether to clean up accessory files after the search is completed
+           advanced_logging: Optional[bool|str] = True, # advanced logging including generation of a batch .log file and an sqlite .db file (will be created in a timestamped directory by default).
            ) -> study: # results of the search
 
 The default parameter search implemented with the ``search`` function uses ``ray.tune`` as the search algorithm backend, creates a `.csv` storing the results, and returns a ``study`` object containing the output. It takes the following two parameters:
diff --git a/netpyne/__init__.py b/netpyne/__init__.py
@@ -4,7 +4,7 @@
 NetPyNE consists of a number of sub-packages and modules.
 """
 
-__version__ = '1.1.0'
+__version__ = '1.1.1'
 import os, sys
 
 display = os.getenv('DISPLAY')
diff --git a/netpyne/batchtools/examples/CA3/optuna_search.py b/netpyne/batchtools/examples/CA3/optuna_search.py
@@ -29,8 +29,9 @@
        comm_type = 'socket', # 'socket', 'sfs', None
        label = 'optuna',
        params = params,
-#       output_path = './batch', # defaults to ./batch
-#       checkpoint_path = './checkpoint', # defaults to ./checkpoint
+       output_path = './batch',
+       checkpoint_path = './checkpoint',
+       advanced_logging = True,
        run_config = run_config,
        num_samples = 27,
        metric = 'loss',
diff --git a/netpyne/batchtools/search.py b/netpyne/batchtools/search.py
@@ -7,12 +7,14 @@
 from ray.tune.search import create_searcher, ConcurrencyLimiter, SEARCH_ALG_IMPORT
 from netpyne.batchtools import runtk
 from collections import namedtuple
-from batchtk.raytk.search import ray_trial, LABEL_POINTER
-from batchtk.utils import get_path
+from batchtk.utils import get_path, SQLiteStorage, ScriptLogger
 from io import StringIO
 import numpy
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from netpyne.batchtools import submits
+from batchtk import runtk
+from batchtk.runtk.trial import trial, LABEL_POINTER
+import datetime
 #import signal #incompatible with signal and threading from ray
 #import threading
 
@@ -84,62 +86,8 @@ def ray_optuna_search(dispatcher_constructor: Callable, # constructor for the di
     -------
     Study: namedtuple('Study', ['algo', 'results'])(algo, results), # named tuple containing the created algorithm and the results of the search
     """
-    from ray.tune.search.optuna import OptunaSearch
-
-    if ray_config is None:
-        ray_config = {}
-    ray_init_kwargs = ray_config#{"runtime_env": {"working_dir:": "."}} | ray_config # do not actually need to specify a working dir, can
-    ray.init(**ray_init_kwargs)# TODO needed for python import statements ?
-    if optuna_config == None:
-        optuna_config = {}
-
-    storage_path = get_path(checkpoint_path)
-    algo = ConcurrencyLimiter(searcher=OptunaSearch(metric=metric, mode=mode, **optuna_config),
-                              max_concurrent=max_concurrent,
-                              batch=batch) #TODO does max_concurrent and batch work?
-
-    #submit = submit_constructor()
-    #submit.update_templates(
-    #    **run_config
-    #)
-    project_path = os.getcwd()
-
-    def run(config):
-        config.update({'saveFolder': output_path, 'simLabel': LABEL_POINTER})
-        data = ray_trial(config=config, label=label, dispatcher_constructor=dispatcher_constructor,
-                         project_path=project_path, output_path=output_path, submit_constructor=submit_constructor,
-                         submit_kwargs=run_config, log=None)
-        if isinstance(metric, str):#TODO only Optuna supports multiobjective?
-            metrics = {'config': config, 'data': data, metric: data[metric]}
-            session.report(metrics)
-        elif isinstance(metric, (list, tuple)):
-            metrics = {k: data[k] for k in metric}
-            metrics['config'] = config
-            metrics['data'] = data
-            session.report(metrics)
-        else:
-            raise ValueError("metric must be a string or a list/tuple of strings")
-    tuner = tune.Tuner(
-        run,
-        tune_config=tune.TuneConfig(
-            search_alg=algo,
-            num_samples=num_samples,
-        ),
-        run_config=RunConfig(
-            storage_path=storage_path,
-            name=label,
-        ),
-        param_space=params,
-    )
-
-    results = tuner.fit()
-    resultsdf = results.get_dataframe()
-    resultsdf.to_csv("{}.csv".format(label))
-    #return namedtuple('Study', ['algo', 'results'])(algo, results)
-    if clean_checkpoint:
-        os.system("rm -r {}".format(storage_path))
-    return namedtuple('Study', ['algo', 'results'])(algo.searcher._ot_study, results)
-
+    from warnings import warn
+    warn("ray_optuna_search is deprecated, please use ray_search with algorithm='optuna' instead", DeprecationWarning)
 """
 Parameters
 :
@@ -183,7 +131,7 @@ def ray_search(dispatcher_constructor: Callable, # constructor for the dispatche
                output_path: Optional[str] = './batch', # directory for storing generated files
                checkpoint_path: Optional[str] = './checkpoint', # directory for storing checkpoint files
                max_concurrent: Optional[int] = 1, # number of concurrent trials to run at one time
-               batch: Optional[bool] = True, # whether concurrent trials should run synchronously or asynchronously
+               batch: Optional[bool] = True, # whether concurrent trials should run synch\ronously or asynchronously
                num_samples: Optional[int] = 1, # number of trials to run
                metric: Optional[str] = None, # metric to optimize, if not supplied, no data will be collated.
                mode: Optional[str] = "min",  # either 'min' or 'max' (whether to minimize or maximize the metric
@@ -196,18 +144,23 @@ def ray_search(dispatcher_constructor: Callable, # constructor for the dispatche
                prune_metadata = True, # whether to prune the metadata from the results.csv
                remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
                host: Optional[str] = None,  # host to run the search on
-               key: Optional[str] = None  # key for TOTP generator...
+               key: Optional[str] = None,  # key for TOTP generator...
+               file_cleanup: Optional[bool|list|tuple] = True, # whether to clean up accessory files after the search is completed
+               advanced_logging: Optional[bool|str] = True,
                ) -> study:
 
     expected_total = params.pop('_expected_trials_per_sample') * num_samples
     if (dispatcher_constructor == runtk.dispatchers.SSHDispatcher) or \
        (dispatcher_constructor == SSHGridDispatcher):
-        if submit_constructor == submits.SGESubmitSFS:
+        dispatcher_kwargs = None
+        if submit_constructor == submits.SGESubmitSSH:
             from fabric import connection
             dispatcher_kwargs = {'connection': connection.Connection(host)}
         if submit_constructor == submits.SlurmSubmitSSH:
             from batchtk.utils import TOTPConnection
             dispatcher_kwargs = {'connection': TOTPConnection(host, key)}
+        if dispatcher_kwargs == None:
+            raise ValueError("for SSH based methods, please provide either 'sftp' or None as the comm_type")
     else:
         dispatcher_kwargs = {}
     if ray_config is None:
@@ -233,6 +186,18 @@ def ray_search(dispatcher_constructor: Callable, # constructor for the dispatche
     #TODO class this object for self calls? cleaner? vs nested functions
     #TODO clean up working_dir and excludes
     storage_path = get_path(checkpoint_path)
+    adv_path = None
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    if advanced_logging:
+        if advanced_logging is True:
+            advanced_logging = "./" #follows from os.getcwd()
+            adv_path = get_path("{}/run_{}".format(advanced_logging, timestamp))
+        if isinstance(advanced_logging, str):
+            adv_path = get_path(advanced_logging)
+        os.makedirs(adv_path, exist_ok=True)
+
+    if file_cleanup is True:
+        file_cleanup = (runtk.SGLOUT, runtk.MSGOUT)
     load_path = "{}/{}".format(storage_path, label)
     algo = create_searcher(algorithm, **algorithm_config) #concurrency may not be accepted by all algo
     #search_alg – The search algorithm to use.
@@ -248,13 +213,29 @@ def ray_search(dispatcher_constructor: Callable, # constructor for the dispatche
     #submit.update_templates(
     #    **run_config
     #)
+    def ray_trial(config, label, dispatcher_constructor, project_path, output_path, submit_constructor,
+                  dispatcher_kwargs=None, submit_kwargs=None, interval=60, data_storage=None, debug_log=None,
+                  report=('path', 'config', 'data'), cleanup=(runtk.SGLOUT, runtk.MSGOUT), check_storage=False):
+        debug_log, data_storage = None, None
+        if adv_path:
+            debug_log = ScriptLogger(file_out="{}/trials.log".format(adv_path))
+            data_storage = SQLiteStorage(label='trials', path=adv_path, entries=('path', 'config', 'data'))
+        tid = tune.get_context().get_trial_id()
+        tid = tid.split('_')[-1]  # value for trial (can be int/string)
+        return trial(
+            config=config, label=label, tid=tid, dispatcher_constructor=dispatcher_constructor,
+            project_path=project_path, output_path=output_path, submit_constructor=submit_constructor,
+            dispatcher_kwargs=dispatcher_kwargs, submit_kwargs=submit_kwargs, interval=interval,
+            data_storage=data_storage, debug_log=debug_log, report=report, cleanup=cleanup, check_storage=check_storage)
+
     project_path = remote_dir or os.getcwd() # if remote_dir is None, then use the current working directory
     def run(config):
         config.update({'saveFolder': output_path, 'simLabel': LABEL_POINTER})
         data = ray_trial(config=config, label=label, dispatcher_constructor=dispatcher_constructor,
                          project_path=project_path, output_path=output_path, submit_constructor=submit_constructor,
                          dispatcher_kwargs=dispatcher_kwargs, submit_kwargs=run_config,
-                         interval=sample_interval, log=None, report=report_config)
+                         interval=sample_interval, report=report_config,
+                         cleanup=file_cleanup, check_storage=False)
         if metric is None:
             metrics = {'data': data, '_none_placeholder': 0} #TODO, should include 'config' now with purge_metadata?
             session.report(metrics)
@@ -411,7 +392,9 @@ def shim(dispatcher_constructor: Optional[Callable] = None, # constructor for th
          prune_metadata: Optional[bool] = True, # whether to prune the metadata from the results.csv
          remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
          host: Optional[str] = None,  # host to run the search on
-         key: Optional[str] = None  # key for TOTP generator...
+         key: Optional[str] = None,  # key for TOTP generator...
+         file_cleanup: Optional[bool] = True,  # whether to clean up accessory files after the search is completed
+         advanced_logging: Optional[bool|str] = True,
          ) -> Dict:
     kwargs = locals()
     if metric is None and algorithm not in ['variant_generator', 'random', 'grid']:
@@ -464,7 +447,9 @@ def search(dispatcher_constructor: Optional[Callable] = None, # constructor for
            prune_metadata: Optional[bool] = True, # whether to prune the metadata from the results.csv
            remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
            host: Optional[str] = None, # host to run the search on
-           key: Optional[str] = None # key for TOTP generator.
+           key: Optional[str] = None, # key for TOTP generator.
+           file_cleanup: Optional[bool] = True, # whether to clean up accessory files after the search is completed
+           advanced_logging: Optional[bool|str] = True,
            ) -> study: # results of the search -> study.results (raw tune.ResultGrid), study.data (pandas.DataFrame conversion)
     """
     search(...)
@@ -495,6 +480,10 @@ def search(dispatcher_constructor: Optional[Callable] = None, # constructor for
     remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
     host: Optional[str] = None, # host to run the search on (for submissions over SSH)
     key: Optional[str] = None # key for TOTP generator (for submissions over SSH)
+    file_cleanup: Optional[bool] = True, # whether to clean up accessory files after the search is completed
+    advanced_logging: Optional[bool] = True, # enables advanced logging features, checkpoint_db and log_file.
+    checkpoint_db: Optional[str] = None, # path for checkpoint db file.
+    log_file: Optional[str] = None, # path for the log file
     Creates (upon completed fitting run...)
     -------
     <label>.csv: file containing the results of the search
@@ -514,20 +503,20 @@ def search(dispatcher_constructor: Optional[Callable] = None, # constructor for
 """
 SEE:
 'variant_generator'
-'random' -> points to variant_generator
+'random' <- deprecated -> points to variant_generator
 'ax'
-'dragonfly'
-'skopt'
+'dragonfly' <- deprecated
+'skopt' <- deprecated
 'hyperopt'
 'bayesopt'
 'bohb'
 'nevergrad'
 'optuna'
 'zoopt'
-'sigopt'
+'sigopt' <- deprecated
 'hebo'
-'blendsearch'
-'cfo'
+'blendsearch' <- deprecated
+'cfo' <- deprecated
 """
 
 
diff --git a/netpyne/network/conn.py b/netpyne/network/conn.py
@@ -949,15 +949,8 @@ def _addCellConn(self, connParam, preCellGid, postCellGid, preCellsTags={}):
 
     for i, synMech in enumerate(synMechs):
 
-        # weight, delay and loc (and also sec - if corresp. flag is set) are either single value or list of values. If single value, use it for all synMechs. If it is a list, use the value at index i
-        if connParam.get('distinctSecsPerSynMech', False):
-            paramNames = ['weight', 'delay', 'loc', 'synsPerConn', 'sec']
-        else:
-            paramNames = ['weight', 'delay', 'loc', 'synsPerConn']
-            # keep sec as is to be handled later in CompartCell.addConn()
-            sec = connParam.get('sec')
-
-        for param in paramNames:
+        # synsPerConn, weight, delay and loc are either single value or list of values. If single value, use it for all synMechs. If it is a list, use the value at index i
+        for param in ['weight', 'delay', 'loc', 'synsPerConn']:
             if numSynMechs == 1:
                 finalParamVal = finalParam.get(param)
             else:
@@ -966,10 +959,11 @@ def _addCellConn(self, connParam, preCellGid, postCellGid, preCellsTags={}):
                     _ensure(len(finalParam[param]) == numSynMechs, connParam['label'], f"{param} should be {numSynMechs}-element list or a single value")
                     finalParamVal = finalParam[param][i]
 
-                elif (f'synMech{param}Factor' in connParam) and (param is not 'sec'):  # adapt weight/delay/loc for each synMech
-                    factors = connParam[f'synMech{param}Factor']
-                    _ensure(len(factors) == numSynMechs, connParam['label'], f"{f'synMech{param}Factor'} should be {numSynMechs}-element list")
-                    _ensure((type(finalParam[param]) in int, float), connParam['label'], f"{params} should be list of numbers")
+                elif (param in ['weight', 'delay', 'loc']) and (synMechFactorParam := f'synMech{param.capitalize()}Factor') in connParam:
+                    # adapt weight/delay/loc for each synMech (e.g. 'synMechWeightFactor')
+                    factors = connParam[synMechFactorParam]
+                    _ensure(len(factors) == numSynMechs, connParam['label'], f"{synMechFactorParam} should be {numSynMechs}-element list")
+                    _ensure(isinstance(finalParam[param], Number), connParam['label'], f"'{param}' should be numeric")
                     finalParamVal = finalParam[param] * factors[i]
 
                 else:
@@ -979,7 +973,7 @@ def _addCellConn(self, connParam, preCellGid, postCellGid, preCellsTags={}):
 
         params = {
             'preGid': preCellGid,
-            'sec': sec, # TODO: will not work with `distinctSecsPerSynMech`?
+            'sec': connParam.get('sec'), # keep sec as is to be handled later in CompartCell.addConn()
             'loc': finalParam['locSynMech'],
             'synMech': synMech,
             'weight': finalParam['weightSynMech'],