Make initialStepSize and maxStepSize user parameters.

kjytay · kjytay · commit 4099ea15bc4d · 2023-10-13T12:46:54.000-07:00
diff --git a/dualip/src/main/scala/com/linkedin/dualip/maximizer/DualPrimalMaximizerLoader.scala b/dualip/src/main/scala/com/linkedin/dualip/maximizer/DualPrimalMaximizerLoader.scala
@@ -17,11 +17,12 @@ object DualPrimalMaximizerLoader {
     val solver: DualPrimalMaximizer = solverType match {
       case OptimizerType.LBFGSB => new LBFGSB(maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
       case OptimizerType.LBFGS => new LBFGS(alpha = alpha, maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
-      case OptimizerType.AGD => new AcceleratedGradientDescent(maxIter = maxIter, dualTolerance = dualTolerance,
+      case OptimizerType.AGD => new AcceleratedGradientDescent(initialStepSize = initialStepSize, maxStepSize = maxStepSize,
+        maxIter = maxIter, dualTolerance = dualTolerance,
         slackTolerance = slackTolerance, designInequality = designInequality, mixedDesignPivotNum = mixedDesignPivotNum,
         pivotPositionsForStepSize = pivotPositionsForStepSize)
-      case OptimizerType.GD => new GradientDescent(maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
-      case OptimizerType.SUBGD => new SubgradientDescent(maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
+      case OptimizerType.GD => new GradientDescent(initialStepSize = initialStepSize, maxStepSize = maxStepSize, maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
+      case OptimizerType.SUBGD => new SubgradientDescent(initialStepSize = initialStepSize, maxStepSize = maxStepSize, maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
     }
     solver
   }
@@ -33,7 +34,9 @@ object DualPrimalMaximizerLoader {
   * @param solverType                Solver type
   * @param designInequality          True if Ax <= b, false if Ax = b or have mixed constraints
   * @param mixedDesignPivotNum       The pivot number if we have mixed A_1x <= b1 and A_2x = b2, i.e. how many inequality constraints come first
-  * @param alpha                     LBFGS positivity constraint relaxation
+  * @param alpha                     LBFGS positivity constraint relaxation (optional)
+  * @param initialStepSize           Initial step-size for gradient descent (optional)
+  * @param maxStepSize               Maximum step-size for gradient descent (optional)
   * @param dualTolerance             Tolerance criteria for dual variable change
   * @param slackTolerance            Tolerance criteria for slack
   * @param maxIter                   Number of iterations
@@ -43,6 +46,8 @@ case class DualPrimalMaximizerParams(solverType: OptimizerType = OptimizerType.L
   designInequality: Boolean = true,
   mixedDesignPivotNum: Int = 0,
   alpha: Double = 1E-6,
+  initialStepSize: Double = 1E-5,
+  maxStepSize: Double = 0.1,
   dualTolerance: Double = 1E-8,
   slackTolerance: Double = 5E-6,
   maxIter: Int = 100,
@@ -63,6 +68,8 @@ object DualPrimalMaximizerParamsParser {
       opt[Boolean](s"$namespace.designInequality") optional() action { (x, c) => c.copy(designInequality = x) }
       opt[Int](s"$namespace.mixedDesignPivotNum") optional() action { (x, c) => c.copy(mixedDesignPivotNum = x) }
       opt[Double](s"$namespace.alpha") optional() action { (x, c) => c.copy(alpha = x) }
+      opt[Double](s"$namespace.initialStepSize") optional() action { (x, c) => c.copy(initialStepSize = x) }
+      opt[Double](s"$namespace.maxStepSize") optional() action { (x, c) => c.copy(maxStepSize = x) }
       opt[Double](s"$namespace.dualTolerance") required() action { (x, c) => c.copy(dualTolerance = x) }
       opt[Double](s"$namespace.slackTolerance") required() action { (x, c) => c.copy(slackTolerance = x) }
       opt[Int](s"$namespace.maxIter") required() action { (x, c) => c.copy(maxIter = x) }
diff --git a/dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/gradientbased/AcceleratedGradientDescent.scala b/dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/gradientbased/AcceleratedGradientDescent.scala
@@ -14,6 +14,8 @@ import scala.collection.mutable.ListBuffer
 /**
   * Implementation of accelerated gradient descent.
   *
+  * @param initialStepSize           The initial step size (default is 1e-5).
+  * @param maxStepSize               The maximum step size (default is 0.1).
   * @param maxIter                   The maximum number of iterations (default is 1000).
   * @param dualTolerance             The dual tolerance limit (default is 1e-6).
   * @param slackTolerance            The slack tolerance limit (default is 0.05).
@@ -24,7 +26,10 @@ import scala.collection.mutable.ListBuffer
   *                                  For example, if the total length of the Duals is 10 and we have three groups of
   *                                  sizes 3, 4, and 3 respectively, then pivotPositionsForStepSize must be set at [3, 7].
   */
-class AcceleratedGradientDescent(maxIter: Int = 1000,
+class AcceleratedGradientDescent(
+  initialStepSize: Double = 1e-5,
+  maxStepSize: Double = 0.1,
+  maxIter: Int = 1000,
   dualTolerance: Double = 1e-6,
   slackTolerance: Double = 0.05,
   designInequality: Boolean = true,
@@ -100,9 +105,10 @@ class AcceleratedGradientDescent(maxIter: Int = 1000,
       var stepSize = 0.0
       if (useGroupedStepSize)
         groupedStepSize = calculateGroupStepSize(result.dualGradient.data, y.data, gradientHistory, lambdaHistory,
-          pivotPositionsForStepSize)
+          pivotPositionsForStepSize, initialStepSize = initialStepSize, maxStepSize = maxStepSize)
       else
-        stepSize = calculateStepSize(result.dualGradient.data, y.data, gradientHistory, lambdaHistory)
+        stepSize = calculateStepSize(result.dualGradient.data, y.data, gradientHistory, lambdaHistory,
+          initialStepSize = initialStepSize, maxStepSize = maxStepSize)
 
       // log adaptive step size
       if (useGroupedStepSize) {
diff --git a/dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/gradientbased/GradientDescent.scala b/dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/gradientbased/GradientDescent.scala
@@ -17,11 +17,15 @@ import scala.math.abs
  * A custom implementation of Gradient Descent to solve a maximization problem with non-negativity constraints on the solution
  *
  * @see breeze.optimize.StochasticGradientDescent for the structure of an optimizer
- * @param maxIter        is the maximum number of gradient descent iterations to run
- * @param dualTolerance  change in dual (tolerance) to decide convergence
- * @param slackTolerance change in max slack (tolerance) to decide convergence
+ * @param initialStepSize The initial step size (default is 1e-5).
+ * @param maxStepSize     The maximum step size (default is 0.1).
+ * @param maxIter         The maximum number of gradient descent iterations to run
+ * @param dualTolerance   Change in dual (tolerance) to decide convergence
+ * @param slackTolerance  Change in max slack (tolerance) to decide convergence
  */
-class GradientDescent(maxIter: Int = 100,
+class GradientDescent(initialStepSize: Double = 1e-5,
+                      maxStepSize: Double = 0.1,
+                      maxIter: Int = 100,
                       dualTolerance: Double = 1e-8,
                       slackTolerance: Double = 5e-6
                      ) extends Serializable with DualPrimalMaximizer {
@@ -136,7 +140,8 @@ class GradientDescent(maxIter: Int = 100,
       val grad = state.grad
       val ff = functionFromSearchDirection(f, x, dir)
 
-      val init = SolverUtility.calculateStepSize(grad.data, x.data, GradHist, XHist)
+      val init = SolverUtility.calculateStepSize(grad.data, x.data, GradHist, XHist, initialStepSize = initialStepSize,
+        maxStepSize = maxStepSize)
       bisectionLineSearch(ff, init, 20)
     }
 
diff --git a/dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/subgradientbased/SubgradientDescent.scala b/dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/subgradientbased/SubgradientDescent.scala
@@ -14,14 +14,18 @@ import scala.collection.mutable.ListBuffer
 /**
  * Implementation of subgradient descent.
  *
+ * @param initialStepSize     The initial step size (default is 1e-5).
+ * @param maxStepSize         The maximum step size (default is 0.1).
  * @param maxIter             The maximum number of iterations (default is 1000).
  * @param dualTolerance       The dual tolerance limit (default is 1e-6).
  * @param slackTolerance      The slack tolerance limit (default is 0.05).
  * @param designInequality    True if Ax <= b (default), false if Ax = b or have mixed constraints.
  * @param mixedDesignPivotNum The pivot number if we have mixed A_1x <= b1 and A_2x = b2, i.e. how many inequality
  *                            constraints come first (default is 0).
  */
-class SubgradientDescent(maxIter: Int = 1000,
+class SubgradientDescent(initialStepSize: Double = 1e-5,
+                         maxStepSize: Double = 0.1,
+                         maxIter: Int = 1000,
                          dualTolerance: Double = 1e-6,
                          slackTolerance: Double = 0.05,
                          designInequality: Boolean = true,
@@ -76,7 +80,8 @@ class SubgradientDescent(maxIter: Int = 1000,
       }
 
       // calculate step-size
-      val stepSize = calculateStepSize(result.dualGradient.data, result.lambda.data, gradientHistory, lambdaHistory)
+      val stepSize = calculateStepSize(result.dualGradient.data, result.lambda.data, gradientHistory, lambdaHistory,
+        initialStepSize = initialStepSize, maxStepSize = maxStepSize)
 
       // log adaptive step size
       iLog += ("step" -> f"$stepSize%1.2E")
diff --git a/dualip/src/main/scala/com/linkedin/dualip/util/SolverUtility.scala b/dualip/src/main/scala/com/linkedin/dualip/util/SolverUtility.scala
@@ -61,7 +61,7 @@ object SolverUtility {
     * @param gradientHistory  - The gradient history
     * @param lambdaHistory    - The dual variable history
     * @param maxHistoryLength - The length of the history
-    * @param minStepSize      - Minimum step size
+    * @param initialStepSize  - Initial step size
     * @param maxStepSize      - Maximum step size
     * @return
     */
@@ -71,7 +71,7 @@ object SolverUtility {
     gradientHistory: ListBuffer[Array[Double]],
     lambdaHistory: ListBuffer[Array[Double]],
     maxHistoryLength: Int = 15,
-    minStepSize: Double = 1e-5,
+    initialStepSize: Double = 1e-5,
     maxStepSize: Double = 0.1
   ): Double = {
 
@@ -85,7 +85,7 @@ object SolverUtility {
           lambdaHistory(timeIndex + 1)
         )
       }
-    stepSizeFromLipschitzConstants(lipschitzConstants, maxHistoryLength, minStepSize, maxStepSize)
+    stepSizeFromLipschitzConstants(lipschitzConstants, maxHistoryLength, initialStepSize, maxStepSize)
   }
 
   /**
@@ -97,7 +97,7 @@ object SolverUtility {
     * @param lambdaHistory             - The dual variable history
     * @param pivotPositionsForStepSize - Pivot positions that mark different groups for which the step-sizes need to be tuned
     * @param maxHistoryLength          - The length of the history
-    * @param minStepSize               - Minimum step size
+    * @param initialStepSize           - Initial step size
     * @param maxStepSize               - Maximum step size
     * @return
     */
@@ -108,7 +108,7 @@ object SolverUtility {
     lambdaHistory: ListBuffer[Array[Double]],
     pivotPositionsForStepSize: Array[Int],
     maxHistoryLength: Int = 15,
-    minStepSize: Double = 1e-5,
+    initialStepSize: Double = 1e-5,
     maxStepSize: Double = 0.1
   ): Array[Double] = {
 
@@ -147,7 +147,8 @@ object SolverUtility {
     prevPivotIndex = 0
     (pivotPositionsForStepSize :+ dualLength).map { pivotIndex =>
       val lipschitzConstants = lipschitzConstantCollection(prevPivotIndex)
-      val stepSizeValuesPerGroup = stepSizeFromLipschitzConstants(lipschitzConstants, lipschitzConstants.length, minStepSize, maxStepSize)
+      val stepSizeValuesPerGroup = stepSizeFromLipschitzConstants(lipschitzConstants,
+        lipschitzConstants.length, initialStepSize, maxStepSize)
       prevPivotIndex = pivotIndex
       stepSizeValuesPerGroup
     }
@@ -213,15 +214,15 @@ object SolverUtility {
     *
     * @param lipschitzConstants
     * @param maxHistoryLength
-    * @param minStepSize
+    * @param initialStepSize
     * @param maxStepSize
     * @return
     */
-  def stepSizeFromLipschitzConstants(lipschitzConstants: Seq[Double], maxHistoryLength: Int, minStepSize: Double,
+  def stepSizeFromLipschitzConstants(lipschitzConstants: Seq[Double], maxHistoryLength: Int, initialStepSize: Double,
     maxStepSize: Double): Double = {
     if (lipschitzConstants.isEmpty || lipschitzConstants.max.isNaN || lipschitzConstants.max.isInfinite ||
       lipschitzConstants.length < maxHistoryLength - 1)
-      minStepSize else math.min(1.0 / lipschitzConstants.max, maxStepSize)
+      initialStepSize else math.min(1.0 / lipschitzConstants.max, maxStepSize)
   }
 
   /**
diff --git a/dualip/src/test/scala/com/linkedin/dualip/objective/Objectives.scala b/dualip/src/test/scala/com/linkedin/dualip/objective/Objectives.scala
@@ -7,6 +7,25 @@ import org.testng.annotations.Test
 
 import scala.collection.mutable
 
+// This file contains a collection of simple objective functions for testing purposes.
+
+/**
+  * Just a 1-d objective function f = -(x-3)^2. We maximize it subject to x>=0.
+  * Maximum is at x=3. dualObjective = 0, there is no primalObjective.
+  */
+class Quadratic1DObjective() extends DualPrimalObjective {
+  override def dualDimensionality: Int = 1
+
+  override def calculate(lambda: BSV[Double], log: mutable.Map[String, String]=null, verbosity: Int = 1, designInequality: Boolean = true, mixedDesignPivotNum: Int = 0): DualPrimalComputationResult = {
+    val Array(x) = lambda.toArray
+    val obj = -(x - 3.0)*(x - 3.0)
+    val grad = Array(-2.0 * (x - 3.0))
+    // primal, slack and maxSlack are dummy, they are used for logging and extra convergence criteria,
+    // so they should not impact the testing of basic functionality
+    DualPrimalComputationResult(lambda, obj, obj, BSV(grad), 0.0, BSV(Array(0.0)), SlackMetadata(null, 0.0, 0.0, 0.0, 0.0))
+  }
+}
+
 /**
   * Just a simple 2-d objective function f = -(x-3)^2 - (y+5)^2
   * because we maximize subject to x>=0 and y>=0
@@ -23,14 +42,6 @@ class SimpleObjective() extends DualPrimalObjective {
     // so they should not impact the testing of basic functionality
     DualPrimalComputationResult(lambda, obj, obj, BSV(grad), 0.0, BSV(Array(0.0, 0.0)), SlackMetadata(null, 0.0, 0.0, 0.0, 0.0))
   }
-
-  @Test
-  def testObjectiveFunction(): Unit = {
-    val x = BSV(Array(1.0, 1.0))
-    val result = new SimpleObjective().calculate(x)
-    Assert.assertEquals(result.dualObjective, -40.0)
-    Assert.assertEquals(result.dualGradient, BSV(Array(4.0, -12.0)))
-  }
 }
 
 /**
diff --git a/dualip/src/test/scala/com/linkedin/dualip/solver/firstorder/gradientbased/AcceleratedGradientDescentTest.scala b/dualip/src/test/scala/com/linkedin/dualip/solver/firstorder/gradientbased/AcceleratedGradientDescentTest.scala
@@ -2,7 +2,7 @@ package com.linkedin.dualip.solver.firstorder.gradientbased
 
 import breeze.linalg.{SparseVector => BSV}
 import com.linkedin.dualip.maximizer.solver.firstorder.gradientbased.AcceleratedGradientDescent
-import com.linkedin.dualip.objective.SimpleObjective
+import com.linkedin.dualip.objective.{Quadratic1DObjective, SimpleObjective}
 import org.testng.Assert
 import org.testng.annotations.Test
 
@@ -24,4 +24,21 @@ class AcceleratedGradientDescentTest {
     Assert.assertTrue(Math.abs(x - 3.0) < 1e-3)
     Assert.assertEquals(y, 0.0)
   }
+
+  @Test
+  def testQuadratic1DFunction(): Unit = {
+    // This test checks the functionality of the initialStepSize parameter.
+    // For Quadratic1DObjective, we know that the initial gradient is 6.0. So after one step, the solution should
+    // be at 6.0 * initialStepSize.
+    val initialGradient = 6.0
+    val defaultStepSize = 1E-5
+    val solverDefault = new AcceleratedGradientDescent(maxIter = 1)
+    val (solutionDefault, _, _) = solverDefault.maximize(new Quadratic1DObjective(), BSV(Array(0.0)))
+    Assert.assertEquals(solutionDefault(0), initialGradient * defaultStepSize, "Test fails for default initialStepSize")
+
+    val newStepSize = 0.1
+    val solverNewStepSize = new AcceleratedGradientDescent(maxIter = 1, initialStepSize = newStepSize)
+    val (solutionNewStepSize, _, _) = solverNewStepSize.maximize(new Quadratic1DObjective(), BSV(Array(0.0)))
+    Assert.assertEquals(solutionNewStepSize(0), initialGradient * newStepSize, "Test fails for new initialStepSize")
+  }
 }