Merge pull request #7 from gperdrizet/dev

gperdrizet · web-flow · commit 0553a3115f6a · 2025-11-15T00:40:04.000-05:00
Added progress plot option to single climber
diff --git a/CITATION.cff b/CITATION.cff
@@ -21,5 +21,5 @@ keywords:
   - data-science
   - Python
 license: GPL-3.0
-version: 0.1.7
+version: 0.1.10
 date-released: 2025-11-14
diff --git a/docs/source/advanced.rst b/docs/source/advanced.rst
@@ -81,22 +81,69 @@ Replicate Noise Tuning
 - **Medium noise (0.3-0.7)**: General purpose exploration
 - **High noise (0.7-1.5)**: When you need very diverse starting points
 
-Checkpoint Strategies
----------------------
+Checkpointing
+-------------
 
-For Very Long Runs
-~~~~~~~~~~~~~~~~~~
+For long optimizations, save intermediate progress:
 
 .. code-block:: python
 
-   # Save every 10 minutes for 24-hour runs
    climber = HillClimber(
        data=data,
-       objective_func=objective,
-       max_time=1440,  # 24 hours
-       checkpoint_file='long_run.pkl',
-       save_interval=600  # 10 minutes
+       objective_func=my_objective,
+       max_time=60,
+       checkpoint_file='optimization.pkl',
+       save_interval=300  # Save every 5 minutes
+   )
+   
+   result = climber.climb()
+
+Resume from a checkpoint:
+
+.. code-block:: python
+
+   resumed = HillClimber.resume_from_checkpoint(
+       checkpoint_file='optimization.pkl',
+       objective_func=my_objective,
+       new_max_time=30  # Continue for 30 more minutes
    )
+   
+   result = resumed.climb()
+
+Progress Monitoring
+-------------------
+
+Live Progress Plots
+~~~~~~~~~~~~~~~~~~~
+
+Monitor optimization progress in real-time with automatic plotting:
+
+.. code-block:: python
+
+   climber = HillClimber(
+       data=data,
+       objective_func=my_objective,
+       max_time=60,
+       plot_progress=5  # Plot every 5 minutes
+   )
+   
+   result = climber.climb()
+
+This is particularly useful for:
+
+- Long-running optimizations (>10 minutes)
+- Interactive Jupyter notebooks
+- Debugging objective functions
+- Monitoring convergence behavior
+
+**Important Notes**:
+
+- Only works with ``climb()`` method (single-process mode)
+- Does **not** work with ``climb_parallel()`` because worker processes don't
+  report intermediate results
+- If no steps are accepted between plot intervals, displays time information
+  instead of plotting
+- In Jupyter notebooks, each plot replaces the previous one for clean output
 
 Performance Optimization
 ------------------------
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -8,7 +8,7 @@
 project = 'Hill Climber'
 copyright = '2025, Hill Climber Contributors'
 author = 'Hill Climber Contributors'
-release = '0.1.7'
+release = '0.1.10'
 
 # -- General configuration ---------------------------------------------------
 extensions = [
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -33,6 +33,16 @@ To explore the examples, modify the code, or contribute:
 2. Open in GitHub Codespaces
 3. The development environment will be configured automatically
 
+Verify Installation
+-------------------
+
+Test that the installation was successful:
+
+.. code-block:: python
+
+   import hill_climber
+   print(f"Hill Climber {hill_climber.__version__} successfully installed!")
+
 **Option 2: Local Development**
 
 1. Clone or fork the repository:
@@ -48,16 +58,6 @@ To explore the examples, modify the code, or contribute:
 
       pip install -e .
 
-Verify Installation
--------------------
-
-Test that the installation was successful:
-
-.. code-block:: python
-
-   from hill_climber import HillClimber
-   print("Hill Climber successfully installed!")
-
 Running Tests
 -------------
 
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
@@ -42,6 +42,26 @@ Here's a simple example that creates a dataset with high Pearson correlation:
    # View results
    print(f"Final correlation: {result['Pearson correlation']:.3f}")
 
+Monitoring Progress
+-------------------
+
+For longer runs, monitor progress with live plots:
+
+.. code-block:: python
+
+   climber = HillClimber(
+       data=data,
+       objective_func=objective_high_correlation,
+       max_time=30,
+       mode='maximize',
+       plot_progress=5  # Plot every 5 minutes
+   )
+
+   result = climber.climb()
+
+.. note::
+   Progress plotting only works with ``climb()`` (not ``climb_parallel()``).
+
 Understanding the Results
 --------------------------
 
diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst
@@ -72,6 +72,16 @@ Hyperparameters
    Amount of uniform noise to add when creating replicate starting points.
    Only used in ``climb_parallel()``.
 
+**plot_progress** (default: None)
+   Interval in minutes for plotting optimization progress during a run.
+   When set, creates scatter plots showing the current best solution at
+   regular intervals. For example, ``plot_progress=5`` plots every 5 minutes.
+   
+   .. note::
+      This option only works in single-process mode (``climb()``). It does not
+      work with parallel mode (``climb_parallel()``) because results from worker
+      processes are not collected until the end of the run.
+
 Boundary Handling
 -----------------
 
diff --git a/hill_climber/__init__.py b/hill_climber/__init__.py
@@ -56,7 +56,7 @@
     - plotting_functions: Visualization utilities
 """
 
-__version__ = '0.1.7'
+__version__ = '0.1.10'
 __author__ = 'gperdrizet'
 
 from .optimizer import HillClimber
diff --git a/hill_climber/optimizer.py b/hill_climber/optimizer.py
@@ -6,6 +6,7 @@
 import time
 import os
 from multiprocessing import Pool, cpu_count
+import matplotlib.pyplot as plt
 
 from .climber_functions import perturb_vectors, calculate_objective
 from .plotting_functions import plot_input_data, plot_results as plot_results_func
@@ -45,7 +46,8 @@ def __init__(
         mode='maximize',
         target_value=None,
         checkpoint_file=None,
-        save_interval=60
+        save_interval=60,
+        plot_progress=None
     ):
         """Initialize HillClimber.
         
@@ -65,6 +67,8 @@ def __init__(
             target_value: Target objective value for target mode (default: None)
             checkpoint_file: Path to save/load checkpoints (default: None)
             save_interval: Seconds between checkpoint saves (default: 60)
+            plot_progress: Plot results every N minutes during optimization. 
+                          If None (default), no plots are drawn during optimization.
             
         Raises:
             ValueError: If mode is invalid or target_value missing for target mode
@@ -94,6 +98,7 @@ def __init__(
         self.step_size = step_size
         self.perturb_fraction = perturb_fraction
         self.temperature = temperature
+
         # Convert user-provided cooling_rate to multiplicative factor
         # User specifies 1 - multiplicative_rate, we store the multiplicative rate
         self.cooling_rate = 1 - cooling_rate
@@ -102,6 +107,7 @@ def __init__(
         self.target_value = target_value
         self.checkpoint_file = checkpoint_file
         self.save_interval = save_interval
+        self.plot_progress = plot_progress
         
         # These will be set during climb
         self.best_data = None
@@ -115,6 +121,7 @@ def __init__(
         self.temp = temperature
         self.start_time = None
         self.last_save_time = None
+        self.last_plot_time = None
 
 
     def save_checkpoint(self, force=False):
@@ -162,6 +169,7 @@ def save_checkpoint(self, force=False):
         
         # Create checkpoint directory if needed
         checkpoint_dir = os.path.dirname(self.checkpoint_file)
+
         if checkpoint_dir and not os.path.exists(checkpoint_dir):
             os.makedirs(checkpoint_dir)
         
@@ -172,6 +180,74 @@ def save_checkpoint(self, force=False):
         print(f"Checkpoint saved: {self.checkpoint_file}")
 
 
+    def plot_progress_check(self, force=False):
+        """Plot optimization progress if plot_progress interval has elapsed.
+        
+        Args:
+            force: Plot even if plot_progress interval hasn't elapsed (default: False)
+        """
+
+        if self.plot_progress is None:
+            return
+        
+        if self.start_time is None:
+            return
+            
+        current_time = time.time()
+        
+        if not force and self.last_plot_time is not None:
+            if (current_time - self.last_plot_time) / 60 < self.plot_progress:
+                return
+        
+        # Clear any existing plots
+        plt.close('all')
+        
+        # Clear output in Jupyter notebooks to replace previous plot
+        try:
+            from IPython.display import clear_output
+            clear_output(wait=True)
+
+        except ImportError:
+            # Not in IPython/Jupyter environment
+            pass
+        
+        # Create a result structure for single climb
+        best_data_output = (
+            pd.DataFrame(self.best_data, columns=self.columns) 
+            if self.is_dataframe else self.best_data
+        )
+        
+        # Format as expected by plot_results (single replicate)
+        results = {
+            'input_data': self.data,
+            'results': [(self.data, best_data_output, pd.DataFrame(self.steps))]
+        }
+        
+        # Plot current progress
+        elapsed_min = (current_time - self.start_time) / 60
+        last_elapsed_min = (self.last_plot_time - self.start_time) / 60 if self.last_plot_time else 0
+        
+        # Format elapsed time based on duration
+        def format_elapsed(minutes):
+            if minutes < 60:
+                return f"{int(minutes)} minutes"
+            else:
+                hours = minutes / 60
+                return f"{hours:.1f} hours"
+        
+        # Check if there are any steps to plot
+        if len(self.steps['Step']) == 0:
+            print(f"\nNo accepted steps since last progress update")
+            print(f"Last progress update: {format_elapsed(last_elapsed_min)}")
+            print(f"Current time: {format_elapsed(elapsed_min)}")
+
+        else:
+            print(f"\nPlotting progress at {format_elapsed(elapsed_min)}...")
+            plot_results_func(results, plot_type='scatter')
+        
+        self.last_plot_time = current_time
+
+
     def load_checkpoint(self, checkpoint_file):
         """Load optimization state from checkpoint file.
         
@@ -359,10 +435,16 @@ def climb(self):
             
             # Save checkpoint periodically
             self.save_checkpoint()
+            
+            # Plot progress periodically
+            self.plot_progress_check()
         
         # Save final checkpoint
         self.save_checkpoint(force=True)
         
+        # Plot final results
+        self.plot_progress_check(force=True)
+        
         # Convert back to DataFrame if input was DataFrame
         best_data_output = (
             pd.DataFrame(self.best_data, columns=self.columns) 
@@ -457,7 +539,7 @@ def climb_parallel(self, replicates=4, initial_noise=0.0, output_file=None,
                 data_rep, self.objective_func, self.max_time, self.step_size,
                 self.perturb_fraction, self.temperature, self.cooling_rate,
                 self.mode, self.target_value, self.is_dataframe, self.columns,
-                checkpoint_file, self.save_interval
+                checkpoint_file, self.save_interval, None  # Disable plot_progress for parallel
             ))
         
         # Execute in parallel
@@ -509,7 +591,8 @@ def climb_parallel(self, replicates=4, initial_noise=0.0, output_file=None,
             print(f"Results saved to: {output_file}")
         
         return results
-    
+
+
     def plot_input(self, plot_type='scatter'):
         """Plot the input data distribution.
         
@@ -588,15 +671,15 @@ def _climb_wrapper(args):
     Args:
         args: Tuple of (data_numpy, objective_func, max_time, step_size, 
               perturb_fraction, temperature, cooling_rate, mode, target_value, 
-              is_dataframe, columns, checkpoint_file, save_interval)
+              is_dataframe, columns, checkpoint_file, save_interval, plot_progress)
         
     Returns:
         Result from climb(): (best_data, steps_df)
     """
 
     (data_numpy, objective_func, max_time, step_size, perturb_fraction, 
      temperature, cooling_rate, mode, target_value, is_dataframe, columns,
-     checkpoint_file, save_interval) = args
+     checkpoint_file, save_interval, plot_progress) = args
     
     # Reconstruct original data format for HillClimber
     data_input = (
@@ -615,7 +698,8 @@ def _climb_wrapper(args):
         mode=mode,
         target_value=target_value,
         checkpoint_file=checkpoint_file,
-        save_interval=save_interval
+        save_interval=save_interval,
+        plot_progress=plot_progress
     )
     
     return climber.climb()
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "parallel-hill-climber"
-version = "0.1.7"
+version = "0.1.10"
 authors = [
     {name = "gperdrizet", email = "george@perdrizet.org"},
 ]

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "parallel-hill-climber"`
`7`		`-version = "0.1.7"`
	`7`	`+version = "0.1.10"`
`8`	`8`	`authors = [`
`9`	`9`	`{name = "gperdrizet", email = "[email protected]"},`
`10`	`10`	`]`