Skip to content

Commit 4099ea1

Browse files
committed
Make initialStepSize and maxStepSize user parameters.
1 parent dc3cace commit 4099ea1

7 files changed

Lines changed: 84 additions & 32 deletions

File tree

dualip/src/main/scala/com/linkedin/dualip/maximizer/DualPrimalMaximizerLoader.scala

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ object DualPrimalMaximizerLoader {
1717
val solver: DualPrimalMaximizer = solverType match {
1818
case OptimizerType.LBFGSB => new LBFGSB(maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
1919
case OptimizerType.LBFGS => new LBFGS(alpha = alpha, maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
20-
case OptimizerType.AGD => new AcceleratedGradientDescent(maxIter = maxIter, dualTolerance = dualTolerance,
20+
case OptimizerType.AGD => new AcceleratedGradientDescent(initialStepSize = initialStepSize, maxStepSize = maxStepSize,
21+
maxIter = maxIter, dualTolerance = dualTolerance,
2122
slackTolerance = slackTolerance, designInequality = designInequality, mixedDesignPivotNum = mixedDesignPivotNum,
2223
pivotPositionsForStepSize = pivotPositionsForStepSize)
23-
case OptimizerType.GD => new GradientDescent(maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
24-
case OptimizerType.SUBGD => new SubgradientDescent(maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
24+
case OptimizerType.GD => new GradientDescent(initialStepSize = initialStepSize, maxStepSize = maxStepSize, maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
25+
case OptimizerType.SUBGD => new SubgradientDescent(initialStepSize = initialStepSize, maxStepSize = maxStepSize, maxIter = maxIter, dualTolerance = dualTolerance, slackTolerance = slackTolerance)
2526
}
2627
solver
2728
}
@@ -33,7 +34,9 @@ object DualPrimalMaximizerLoader {
3334
* @param solverType Solver type
3435
* @param designInequality True if Ax <= b, false if Ax = b or have mixed constraints
3536
* @param mixedDesignPivotNum The pivot number if we have mixed A_1x <= b1 and A_2x = b2, i.e. how many inequality constraints come first
36-
* @param alpha LBFGS positivity constraint relaxation
37+
* @param alpha LBFGS positivity constraint relaxation (optional)
38+
* @param initialStepSize Initial step-size for gradient descent (optional)
39+
* @param maxStepSize Maximum step-size for gradient descent (optional)
3740
* @param dualTolerance Tolerance criteria for dual variable change
3841
* @param slackTolerance Tolerance criteria for slack
3942
* @param maxIter Number of iterations
@@ -43,6 +46,8 @@ case class DualPrimalMaximizerParams(solverType: OptimizerType = OptimizerType.L
4346
designInequality: Boolean = true,
4447
mixedDesignPivotNum: Int = 0,
4548
alpha: Double = 1E-6,
49+
initialStepSize: Double = 1E-5,
50+
maxStepSize: Double = 0.1,
4651
dualTolerance: Double = 1E-8,
4752
slackTolerance: Double = 5E-6,
4853
maxIter: Int = 100,
@@ -63,6 +68,8 @@ object DualPrimalMaximizerParamsParser {
6368
opt[Boolean](s"$namespace.designInequality") optional() action { (x, c) => c.copy(designInequality = x) }
6469
opt[Int](s"$namespace.mixedDesignPivotNum") optional() action { (x, c) => c.copy(mixedDesignPivotNum = x) }
6570
opt[Double](s"$namespace.alpha") optional() action { (x, c) => c.copy(alpha = x) }
71+
opt[Double](s"$namespace.initialStepSize") optional() action { (x, c) => c.copy(initialStepSize = x) }
72+
opt[Double](s"$namespace.maxStepSize") optional() action { (x, c) => c.copy(maxStepSize = x) }
6673
opt[Double](s"$namespace.dualTolerance") required() action { (x, c) => c.copy(dualTolerance = x) }
6774
opt[Double](s"$namespace.slackTolerance") required() action { (x, c) => c.copy(slackTolerance = x) }
6875
opt[Int](s"$namespace.maxIter") required() action { (x, c) => c.copy(maxIter = x) }

dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/gradientbased/AcceleratedGradientDescent.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import scala.collection.mutable.ListBuffer
1414
/**
1515
* Implementation of accelerated gradient descent.
1616
*
17+
* @param initialStepSize The initial step size (default is 1e-5).
18+
* @param maxStepSize The maximum step size (default is 0.1).
1719
* @param maxIter The maximum number of iterations (default is 1000).
1820
* @param dualTolerance The dual tolerance limit (default is 1e-6).
1921
* @param slackTolerance The slack tolerance limit (default is 0.05).
@@ -24,7 +26,10 @@ import scala.collection.mutable.ListBuffer
2426
* For example, if the total length of the Duals is 10 and we have three groups of
2527
* sizes 3, 4, and 3 respectively, then pivotPositionsForStepSize must be set at [3, 7].
2628
*/
27-
class AcceleratedGradientDescent(maxIter: Int = 1000,
29+
class AcceleratedGradientDescent(
30+
initialStepSize: Double = 1e-5,
31+
maxStepSize: Double = 0.1,
32+
maxIter: Int = 1000,
2833
dualTolerance: Double = 1e-6,
2934
slackTolerance: Double = 0.05,
3035
designInequality: Boolean = true,
@@ -100,9 +105,10 @@ class AcceleratedGradientDescent(maxIter: Int = 1000,
100105
var stepSize = 0.0
101106
if (useGroupedStepSize)
102107
groupedStepSize = calculateGroupStepSize(result.dualGradient.data, y.data, gradientHistory, lambdaHistory,
103-
pivotPositionsForStepSize)
108+
pivotPositionsForStepSize, initialStepSize = initialStepSize, maxStepSize = maxStepSize)
104109
else
105-
stepSize = calculateStepSize(result.dualGradient.data, y.data, gradientHistory, lambdaHistory)
110+
stepSize = calculateStepSize(result.dualGradient.data, y.data, gradientHistory, lambdaHistory,
111+
initialStepSize = initialStepSize, maxStepSize = maxStepSize)
106112

107113
// log adaptive step size
108114
if (useGroupedStepSize) {

dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/gradientbased/GradientDescent.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@ import scala.math.abs
1717
* A custom implementation of Gradient Descent to solve a maximization problem with non-negativity constraints on the solution
1818
*
1919
* @see breeze.optimize.StochasticGradientDescent for the structure of an optimizer
20-
* @param maxIter is the maximum number of gradient descent iterations to run
21-
* @param dualTolerance change in dual (tolerance) to decide convergence
22-
* @param slackTolerance change in max slack (tolerance) to decide convergence
20+
* @param initialStepSize The initial step size (default is 1e-5).
21+
* @param maxStepSize The maximum step size (default is 0.1).
22+
* @param maxIter The maximum number of gradient descent iterations to run
23+
* @param dualTolerance Change in dual (tolerance) to decide convergence
24+
* @param slackTolerance Change in max slack (tolerance) to decide convergence
2325
*/
24-
class GradientDescent(maxIter: Int = 100,
26+
class GradientDescent(initialStepSize: Double = 1e-5,
27+
maxStepSize: Double = 0.1,
28+
maxIter: Int = 100,
2529
dualTolerance: Double = 1e-8,
2630
slackTolerance: Double = 5e-6
2731
) extends Serializable with DualPrimalMaximizer {
@@ -136,7 +140,8 @@ class GradientDescent(maxIter: Int = 100,
136140
val grad = state.grad
137141
val ff = functionFromSearchDirection(f, x, dir)
138142

139-
val init = SolverUtility.calculateStepSize(grad.data, x.data, GradHist, XHist)
143+
val init = SolverUtility.calculateStepSize(grad.data, x.data, GradHist, XHist, initialStepSize = initialStepSize,
144+
maxStepSize = maxStepSize)
140145
bisectionLineSearch(ff, init, 20)
141146
}
142147

dualip/src/main/scala/com/linkedin/dualip/maximizer/solver/firstorder/subgradientbased/SubgradientDescent.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,18 @@ import scala.collection.mutable.ListBuffer
1414
/**
1515
* Implementation of subgradient descent.
1616
*
17+
* @param initialStepSize The initial step size (default is 1e-5).
18+
* @param maxStepSize The maximum step size (default is 0.1).
1719
* @param maxIter The maximum number of iterations (default is 1000).
1820
* @param dualTolerance The dual tolerance limit (default is 1e-6).
1921
* @param slackTolerance The slack tolerance limit (default is 0.05).
2022
* @param designInequality True if Ax <= b (default), false if Ax = b or have mixed constraints.
2123
* @param mixedDesignPivotNum The pivot number if we have mixed A_1x <= b1 and A_2x = b2, i.e. how many inequality
2224
* constraints come first (default is 0).
2325
*/
24-
class SubgradientDescent(maxIter: Int = 1000,
26+
class SubgradientDescent(initialStepSize: Double = 1e-5,
27+
maxStepSize: Double = 0.1,
28+
maxIter: Int = 1000,
2529
dualTolerance: Double = 1e-6,
2630
slackTolerance: Double = 0.05,
2731
designInequality: Boolean = true,
@@ -76,7 +80,8 @@ class SubgradientDescent(maxIter: Int = 1000,
7680
}
7781

7882
// calculate step-size
79-
val stepSize = calculateStepSize(result.dualGradient.data, result.lambda.data, gradientHistory, lambdaHistory)
83+
val stepSize = calculateStepSize(result.dualGradient.data, result.lambda.data, gradientHistory, lambdaHistory,
84+
initialStepSize = initialStepSize, maxStepSize = maxStepSize)
8085

8186
// log adaptive step size
8287
iLog += ("step" -> f"$stepSize%1.2E")

dualip/src/main/scala/com/linkedin/dualip/util/SolverUtility.scala

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ object SolverUtility {
6161
* @param gradientHistory - The gradient history
6262
* @param lambdaHistory - The dual variable history
6363
* @param maxHistoryLength - The length of the history
64-
* @param minStepSize - Minimum step size
64+
* @param initialStepSize - Initial step size
6565
* @param maxStepSize - Maximum step size
6666
* @return
6767
*/
@@ -71,7 +71,7 @@ object SolverUtility {
7171
gradientHistory: ListBuffer[Array[Double]],
7272
lambdaHistory: ListBuffer[Array[Double]],
7373
maxHistoryLength: Int = 15,
74-
minStepSize: Double = 1e-5,
74+
initialStepSize: Double = 1e-5,
7575
maxStepSize: Double = 0.1
7676
): Double = {
7777

@@ -85,7 +85,7 @@ object SolverUtility {
8585
lambdaHistory(timeIndex + 1)
8686
)
8787
}
88-
stepSizeFromLipschitzConstants(lipschitzConstants, maxHistoryLength, minStepSize, maxStepSize)
88+
stepSizeFromLipschitzConstants(lipschitzConstants, maxHistoryLength, initialStepSize, maxStepSize)
8989
}
9090

9191
/**
@@ -97,7 +97,7 @@ object SolverUtility {
9797
* @param lambdaHistory - The dual variable history
9898
* @param pivotPositionsForStepSize - Pivot positions that mark different groups for which the step-sizes need to be tuned
9999
* @param maxHistoryLength - The length of the history
100-
* @param minStepSize - Minimum step size
100+
* @param initialStepSize - Initial step size
101101
* @param maxStepSize - Maximum step size
102102
* @return
103103
*/
@@ -108,7 +108,7 @@ object SolverUtility {
108108
lambdaHistory: ListBuffer[Array[Double]],
109109
pivotPositionsForStepSize: Array[Int],
110110
maxHistoryLength: Int = 15,
111-
minStepSize: Double = 1e-5,
111+
initialStepSize: Double = 1e-5,
112112
maxStepSize: Double = 0.1
113113
): Array[Double] = {
114114

@@ -147,7 +147,8 @@ object SolverUtility {
147147
prevPivotIndex = 0
148148
(pivotPositionsForStepSize :+ dualLength).map { pivotIndex =>
149149
val lipschitzConstants = lipschitzConstantCollection(prevPivotIndex)
150-
val stepSizeValuesPerGroup = stepSizeFromLipschitzConstants(lipschitzConstants, lipschitzConstants.length, minStepSize, maxStepSize)
150+
val stepSizeValuesPerGroup = stepSizeFromLipschitzConstants(lipschitzConstants,
151+
lipschitzConstants.length, initialStepSize, maxStepSize)
151152
prevPivotIndex = pivotIndex
152153
stepSizeValuesPerGroup
153154
}
@@ -213,15 +214,15 @@ object SolverUtility {
213214
*
214215
* @param lipschitzConstants
215216
* @param maxHistoryLength
216-
* @param minStepSize
217+
* @param initialStepSize
217218
* @param maxStepSize
218219
* @return
219220
*/
220-
def stepSizeFromLipschitzConstants(lipschitzConstants: Seq[Double], maxHistoryLength: Int, minStepSize: Double,
221+
def stepSizeFromLipschitzConstants(lipschitzConstants: Seq[Double], maxHistoryLength: Int, initialStepSize: Double,
221222
maxStepSize: Double): Double = {
222223
if (lipschitzConstants.isEmpty || lipschitzConstants.max.isNaN || lipschitzConstants.max.isInfinite ||
223224
lipschitzConstants.length < maxHistoryLength - 1)
224-
minStepSize else math.min(1.0 / lipschitzConstants.max, maxStepSize)
225+
initialStepSize else math.min(1.0 / lipschitzConstants.max, maxStepSize)
225226
}
226227

227228
/**

dualip/src/test/scala/com/linkedin/dualip/objective/Objectives.scala

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,25 @@ import org.testng.annotations.Test
77

88
import scala.collection.mutable
99

10+
// This file contains a collection of simple objective functions for testing purposes.
11+
12+
/**
13+
* Just a 1-d objective function f = -(x-3)^2. We maximize it subject to x>=0.
14+
* Maximum is at x=3. dualObjective = 0, there is no primalObjective.
15+
*/
16+
class Quadratic1DObjective() extends DualPrimalObjective {
17+
override def dualDimensionality: Int = 1
18+
19+
override def calculate(lambda: BSV[Double], log: mutable.Map[String, String]=null, verbosity: Int = 1, designInequality: Boolean = true, mixedDesignPivotNum: Int = 0): DualPrimalComputationResult = {
20+
val Array(x) = lambda.toArray
21+
val obj = -(x - 3.0)*(x - 3.0)
22+
val grad = Array(-2.0 * (x - 3.0))
23+
// primal, slack and maxSlack are dummy, they are used for logging and extra convergence criteria,
24+
// so they should not impact the testing of basic functionality
25+
DualPrimalComputationResult(lambda, obj, obj, BSV(grad), 0.0, BSV(Array(0.0)), SlackMetadata(null, 0.0, 0.0, 0.0, 0.0))
26+
}
27+
}
28+
1029
/**
1130
* Just a simple 2-d objective function f = -(x-3)^2 - (y+5)^2
1231
* because we maximize subject to x>=0 and y>=0
@@ -23,14 +42,6 @@ class SimpleObjective() extends DualPrimalObjective {
2342
// so they should not impact the testing of basic functionality
2443
DualPrimalComputationResult(lambda, obj, obj, BSV(grad), 0.0, BSV(Array(0.0, 0.0)), SlackMetadata(null, 0.0, 0.0, 0.0, 0.0))
2544
}
26-
27-
@Test
28-
def testObjectiveFunction(): Unit = {
29-
val x = BSV(Array(1.0, 1.0))
30-
val result = new SimpleObjective().calculate(x)
31-
Assert.assertEquals(result.dualObjective, -40.0)
32-
Assert.assertEquals(result.dualGradient, BSV(Array(4.0, -12.0)))
33-
}
3445
}
3546

3647
/**

dualip/src/test/scala/com/linkedin/dualip/solver/firstorder/gradientbased/AcceleratedGradientDescentTest.scala

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package com.linkedin.dualip.solver.firstorder.gradientbased
22

33
import breeze.linalg.{SparseVector => BSV}
44
import com.linkedin.dualip.maximizer.solver.firstorder.gradientbased.AcceleratedGradientDescent
5-
import com.linkedin.dualip.objective.SimpleObjective
5+
import com.linkedin.dualip.objective.{Quadratic1DObjective, SimpleObjective}
66
import org.testng.Assert
77
import org.testng.annotations.Test
88

@@ -24,4 +24,21 @@ class AcceleratedGradientDescentTest {
2424
Assert.assertTrue(Math.abs(x - 3.0) < 1e-3)
2525
Assert.assertEquals(y, 0.0)
2626
}
27+
28+
@Test
29+
def testQuadratic1DFunction(): Unit = {
30+
// This test checks the functionality of the initialStepSize parameter.
31+
// For Quadratic1DObjective, we know that the initial gradient is 6.0. So after one step, the solution should
32+
// be at 6.0 * initialStepSize.
33+
val initialGradient = 6.0
34+
val defaultStepSize = 1E-5
35+
val solverDefault = new AcceleratedGradientDescent(maxIter = 1)
36+
val (solutionDefault, _, _) = solverDefault.maximize(new Quadratic1DObjective(), BSV(Array(0.0)))
37+
Assert.assertEquals(solutionDefault(0), initialGradient * defaultStepSize, "Test fails for default initialStepSize")
38+
39+
val newStepSize = 0.1
40+
val solverNewStepSize = new AcceleratedGradientDescent(maxIter = 1, initialStepSize = newStepSize)
41+
val (solutionNewStepSize, _, _) = solverNewStepSize.maximize(new Quadratic1DObjective(), BSV(Array(0.0)))
42+
Assert.assertEquals(solutionNewStepSize(0), initialGradient * newStepSize, "Test fails for new initialStepSize")
43+
}
2744
}

0 commit comments

Comments
 (0)