add tqdm progress bar to l-bfgs

levimcclenny · levimcclenny · commit ba0e44cc2654 · 2021-03-27T00:17:20.000-05:00
diff --git a/tensordiffeq/fit.py b/tensordiffeq/fit.py
@@ -34,7 +34,7 @@ def fit(obj, tf_iter=0, newton_iter=0, batch_sz=None, newton_eager=True):
     # these cant be tf.functions on initialization since the distributed strategy requires its own
     # graph using grad and adaptgrad, so they cant be compiled as tf.functions until we know dist/non-dist
     obj.grad = tf.function(obj.grad)
-    print_screen(obj, obj.domain)
+    print_screen(obj)
     print("starting Adam training")
     # tf.profiler.experimental.start('../cache/tblogdir1')
     train_op_fn = train_op_inner(obj)
diff --git a/tensordiffeq/optimizers.py b/tensordiffeq/optimizers.py
@@ -4,6 +4,7 @@
 import tensorflow as tf
 import tensorflow_probability as tfp
 from matplotlib import pyplot
+from tqdm.auto import tqdm, trange
 import time
 
 
@@ -140,144 +141,147 @@ def eager_lbfgs(opfunc, x, state, maxIter=100, learningRate=1, do_verbose=True):
     # optimize for a max of maxIter iterations
     nIter = 0
     times = []
-    while nIter < maxIter:
-        start_time = time.time()
-        if state.nIter == 1:
-            tmp1 = tf.abs(g)
-            t = min(1, 1 / tf.reduce_sum(tmp1))
-        else:
-            t = learningRate
-        # keep track of nb of iterations
-        nIter = nIter + 1
-        state.nIter = state.nIter + 1
-
-        ############################################################
-        ## compute gradient descent direction
-        ############################################################
-        if state.nIter == 1:
-            d = -g
-            old_dirs = []
-            old_stps = []
-            Hdiag = 1
-        else:
-            # do lbfgs update (update memory)
-            y = g - g_old
-            s = d * t
-            ys = dot(y, s)
-
-            if ys > 1e-10:
-                # updating memory
-                if len(old_dirs) == nCorrection:
-                    # shift history by one (limited-memory)
-                    del old_dirs[0]
-                    del old_stps[0]
-
-                # store new direction/step
-                old_dirs.append(s)
-                old_stps.append(y)
-
-                # update scale of initial Hessian approximation
-                Hdiag = ys / dot(y, y)
-
-            # compute the approximate (L-BFGS) inverse Hessian
-            # multiplied by the gradient
-            k = len(old_dirs)
-
-            # need to be accessed element-by-element, so don't re-type tensor:
-            ro = [0] * nCorrection
-            for i in range(k):
-                ro[i] = 1 / dot(old_stps[i], old_dirs[i])
-
-            # iteration in L-BFGS loop collapsed to use just one buffer
-            # need to be accessed element-by-element, so don't re-type tensor:
-            al = [0] * nCorrection
-
-            q = -g
-            for i in range(k - 1, -1, -1):
-                al[i] = dot(old_dirs[i], q) * ro[i]
-                q = q - al[i] * old_stps[i]
-
-            # multiply by initial Hessian
-            r = q * Hdiag
-            for i in range(k):
-                be_i = dot(old_stps[i], r) * ro[i]
-                r += (al[i] - be_i) * old_dirs[i]
-
-            d = r
-            # final direction is in r/d (same object)
-
-        g_old = g
-        f_old = f
-
-        ############################################################
-        ## compute step length
-        ############################################################
-        # directional derivative
-        gtd = dot(g, d)
-
-        # check that progress can be made along that direction
-        if gtd > -tolX:
-            verbose("Can not make progress along direction.")
-            break
-
-        # reset initial guess for step size
-        if state.nIter == 1:
+    with trange(maxIter) as t_:
+        for epoch in t_:
+            start_time = time.time()
+            if state.nIter == 1:
+                tmp1 = tf.abs(g)
+                t = min(1, 1 / tf.reduce_sum(tmp1))
+            else:
+                t = learningRate
+            # keep track of nb of iterations
+            nIter = nIter + 1
+            state.nIter = state.nIter + 1
+
+            ############################################################
+            ## compute gradient descent direction
+            ############################################################
+            if state.nIter == 1:
+                d = -g
+                old_dirs = []
+                old_stps = []
+                Hdiag = 1
+            else:
+                # do lbfgs update (update memory)
+                y = g - g_old
+                s = d * t
+                ys = dot(y, s)
+
+                if ys > 1e-10:
+                    # updating memory
+                    if len(old_dirs) == nCorrection:
+                        # shift history by one (limited-memory)
+                        del old_dirs[0]
+                        del old_stps[0]
+
+                    # store new direction/step
+                    old_dirs.append(s)
+                    old_stps.append(y)
+
+                    # update scale of initial Hessian approximation
+                    Hdiag = ys / dot(y, y)
+
+                # compute the approximate (L-BFGS) inverse Hessian
+                # multiplied by the gradient
+                k = len(old_dirs)
+
+                # need to be accessed element-by-element, so don't re-type tensor:
+                ro = [0] * nCorrection
+                for i in range(k):
+                    ro[i] = 1 / dot(old_stps[i], old_dirs[i])
+
+                # iteration in L-BFGS loop collapsed to use just one buffer
+                # need to be accessed element-by-element, so don't re-type tensor:
+                al = [0] * nCorrection
+
+                q = -g
+                for i in range(k - 1, -1, -1):
+                    al[i] = dot(old_dirs[i], q) * ro[i]
+                    q = q - al[i] * old_stps[i]
+
+                # multiply by initial Hessian
+                r = q * Hdiag
+                for i in range(k):
+                    be_i = dot(old_stps[i], r) * ro[i]
+                    r += (al[i] - be_i) * old_dirs[i]
+
+                d = r
+                # final direction is in r/d (same object)
+
+            g_old = g
+            f_old = f
+
+            ############################################################
+            ## compute step length
+            ############################################################
+            # directional derivative
+            gtd = dot(g, d)
+
+            # check that progress can be made along that direction
+            if gtd > -tolX:
+                verbose("Can not make progress along direction.")
+                break
+
+            # reset initial guess for step size
+            if state.nIter == 1:
+                tmp1 = tf.abs(g)
+                t = min(1, 1 / tf.reduce_sum(tmp1))
+            else:
+                t = learningRate
+
+            x += t * d
+
+            if nIter != maxIter:
+                # re-evaluate function only if not in last iteration
+                # the reason we do this: in a stochastic setting,
+                # no use to re-evaluate that function here
+                f, g = opfunc(x)
+
+            lsFuncEval = 1
+            f_hist.append(f)
+
+            # update func eval
+            currentFuncEval = currentFuncEval + lsFuncEval
+            state.funcEval = state.funcEval + lsFuncEval
+
+            ############################################################
+            ## check conditions
+            ############################################################
+            if nIter == maxIter:
+                break
+
+            if currentFuncEval >= maxEval:
+                # max nb of function evals
+                print('max nb of function evals')
+                break
+
             tmp1 = tf.abs(g)
-            t = min(1, 1 / tf.reduce_sum(tmp1))
-        else:
-            t = learningRate
-
-        x += t * d
-
-        if nIter != maxIter:
-            # re-evaluate function only if not in last iteration
-            # the reason we do this: in a stochastic setting,
-            # no use to re-evaluate that function here
-            f, g = opfunc(x)
-
-        lsFuncEval = 1
-        f_hist.append(f)
-
-        # update func eval
-        currentFuncEval = currentFuncEval + lsFuncEval
-        state.funcEval = state.funcEval + lsFuncEval
-
-        ############################################################
-        ## check conditions
-        ############################################################
-        if nIter == maxIter:
-            break
-
-        if currentFuncEval >= maxEval:
-            # max nb of function evals
-            print('max nb of function evals')
-            break
-
-        tmp1 = tf.abs(g)
-        if tf.reduce_sum(tmp1) <= tolFun:
-            # check optimality
-            print('optimality condition below tolFun')
-            break
-
-        tmp1 = tf.abs(d * t)
-        if tf.reduce_sum(tmp1) <= tolX:
-            # step size below tolX
-            print('step size below tolX')
-            break
-
-        if tf.abs(f, f_old) < tolX:
-            # function value changing less than tolX
-            print('function value changing less than tolX' + str(tf.abs(f - f_old)))
-            break
-
-        if do_verbose:
-            if nIter % 100 == 0:
-                elapsed = time.time() - state.start_time
-                print("Step: %3d, loss: %9.8f, time: " % (nIter, f.numpy()), elapsed)
-                state.start_time = time.time()
-
-        if nIter == maxIter - 1:
-            final_loss = f.numpy()
+            if tf.reduce_sum(tmp1) <= tolFun:
+                # check optimality
+                print('optimality condition below tolFun')
+                break
+
+            tmp1 = tf.abs(d * t)
+            if tf.reduce_sum(tmp1) <= tolX:
+                # step size below tolX
+                print('step size below tolX')
+                break
+
+            if tf.abs(f, f_old) < tolX:
+                # function value changing less than tolX
+                print('function value changing less than tolX' + str(tf.abs(f - f_old)))
+                break
+
+            t_.set_description('L-BFGS epoch %i' % epoch)
+            if do_verbose:
+                if nIter % 10 == 0:
+                    t_.set_postfix(loss=f.numpy())
+                    elapsed = time.time() - state.start_time
+                    #print("Step: %3d, loss: %9.8f, time: " % (nIter, f.numpy()), elapsed)
+                    state.start_time = time.time()
+
+            if nIter == maxIter - 1:
+                final_loss = f.numpy()
 
     # save state
     state.old_dirs = old_dirs