Add a processing time limit, and various clean

cestpasphoto · cestpasphoto · commit 865270fbcc9d · 2021-02-11T10:03:03.000+01:00
diff --git a/Coach.py b/Coach.py
@@ -13,7 +13,6 @@
 
 log = logging.getLogger(__name__)
 
-
 class Coach():
     """
     This class executes the self-play + learning. It uses the functions defined
@@ -77,6 +76,8 @@ def learn(self):
         only if it wins >= updateThreshold fraction of games.
         """
 
+        start_time = time.time()
+
         for i in range(1, self.args.numIters + 1):
             # bookkeeping
             log.info(f'Starting Iter #{i} ...')
@@ -92,8 +93,6 @@ def learn(self):
                 self.trainExamplesHistory.append(iterationTrainExamples)
 
             if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
-                log.warning(
-                    f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}")
                 self.trainExamplesHistory.pop(0)
             # backup history to a file
             # NB! the examples were collected using the model from the previous iteration, so (i-1)  
@@ -127,6 +126,10 @@ def learn(self):
                 self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                 self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pt')
 
+            if self.args.timeIters > 0:
+                if time.time() - start_time > self.args.timeIters*3600:
+                    log.info(f'Above timelimit, stopping here after {i} iterations')
+                    break
 
     def getCheckpointFile(self, iteration):
         return 'checkpoint_' + str(iteration) + '.pt'
diff --git a/main.py b/main.py
@@ -66,7 +66,7 @@ def main():
 #   rollout = joue sur la perf et le temps...
 #	learn_rate = 0.001 ? ou bien 0.02 puis diviser à chaque raté ?
 	parser.add_argument('--numIters'        , '-N' , action='store', default=1000  , type=int  , help='')
-	# parser.add_argument('--timeIters'       , '-T' , action='store', default=0.   , type=float, help='')
+	parser.add_argument('--timeIters'       , '-T' , action='store', default=0.   , type=float, help='')
 	parser.add_argument('--numEps'          , '-s' , action='store', default=100   , type=int  , help='Number of complete self-play games to simulate during a new iteration')
 	parser.add_argument('--tempThreshold'   , '-t' , action='store', default=15    , type=int  , help='')
 	parser.add_argument('--updateThreshold' , '-u' , action='store', default=0.6   , type=float, help='During arena playoff, new neural net will be accepted if threshold or more of games are won')
@@ -88,8 +88,8 @@ def main():
 	args = parser.parse_args()
 	args.arenaCompare = 40
 	# args.maxlenOfQueue = int(2e6/(1.1*args.numItersForTrainExamplesHistory)) # at most 2GB per process, with each example weighing 1.1kB
-	# if args.timeIters > 0:
-	# 	args.numIters = 1000
+	if args.timeIters > 0:
+		args.numIters = 1000
 
 	args.load_model = (args.load_folder_file is not None)
 	if args.profile:
diff --git a/othello/OthelloPlayers.py b/othello/OthelloPlayers.py
@@ -1,58 +1,55 @@
 import numpy as np
-
+import random
 
 class RandomPlayer():
-    def __init__(self, game):
-        self.game = game
+	def __init__(self, game):
+		self.game = game
 
-    def play(self, board):
-        a = np.random.randint(self.game.getActionSize())
-        valids = self.game.getValidMoves(board, 1)
-        while valids[a]!=1:
-            a = np.random.randint(self.game.getActionSize())
-        return a
+	def play(self, board):
+		valids = self.game.getValidMoves(board, 1)
+		return random.choices(range(self.game.getActionSize()), weights=valids, k=1)[0]
 
 
 class HumanOthelloPlayer():
-    def __init__(self, game):
-        self.game = game
-
-    def play(self, board):
-        # display(board)
-        valid = self.game.getValidMoves(board, 1)
-        for i in range(len(valid)):
-            if valid[i]:
-                print("[", int(i/self.game.n), int(i%self.game.n), end="] ")
-        while True:
-            input_move = input()
-            input_a = input_move.split(" ")
-            if len(input_a) == 2:
-                try:
-                    x,y = [int(i) for i in input_a]
-                    if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n)) or \
-                            ((x == self.game.n) and (y == 0)):
-                        a = self.game.n * x + y if x != -1 else self.game.n ** 2
-                        if valid[a]:
-                            break
-                except ValueError:
-                    # Input needs to be an integer
-                    'Invalid integer'
-            print('Invalid move')
-        return a
+	def __init__(self, game):
+		self.game = game
+
+	def play(self, board):
+		# display(board)
+		valid = self.game.getValidMoves(board, 1)
+		for i in range(len(valid)):
+			if valid[i]:
+				print("[", int(i/self.game.n), int(i%self.game.n), end="] ")
+		while True:
+			input_move = input()
+			input_a = input_move.split(" ")
+			if len(input_a) == 2:
+				try:
+					x,y = [int(i) for i in input_a]
+					if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n)) or \
+							((x == self.game.n) and (y == 0)):
+						a = self.game.n * x + y if x != -1 else self.game.n ** 2
+						if valid[a]:
+							break
+				except ValueError:
+					# Input needs to be an integer
+					'Invalid integer'
+			print('Invalid move')
+		return a
 
 
 class GreedyOthelloPlayer():
-    def __init__(self, game):
-        self.game = game
-
-    def play(self, board):
-        valids = self.game.getValidMoves(board, 1)
-        candidates = []
-        for a in range(self.game.getActionSize()):
-            if valids[a]==0:
-                continue
-            nextBoard, _ = self.game.getNextState(board, 1, a)
-            score = self.game.getScore(nextBoard, 1)
-            candidates += [(-score, a)]
-        candidates.sort()
-        return candidates[0][1]
+	def __init__(self, game):
+		self.game = game
+
+	def play(self, board):
+		valids = self.game.getValidMoves(board, 1)
+		candidates = []
+		for a in range(self.game.getActionSize()):
+			if valids[a]==0:
+				continue
+			nextBoard, _ = self.game.getNextState(board, 1, a)
+			score = self.game.getScore(nextBoard, 1)
+			candidates += [(-score, a)]
+		candidates.sort()
+		return candidates[0][1]
diff --git a/othello/pytorch/NNet.py b/othello/pytorch/NNet.py
@@ -108,20 +108,30 @@ def loss_v(self, targets, outputs):
     def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
         filepath = os.path.join(folder, filename)
         if not os.path.exists(folder):
-            print("Checkpoint Directory does not exist! Making directory {}".format(folder))
+            # print("Checkpoint Directory does not exist! Making directory {}".format(folder))
             os.mkdir(folder)
-        else:
-            print("Checkpoint Directory exists! ")
+        # else:
+        #     print("Checkpoint Directory exists! ")
+        current_uptime = get_uptime()
         torch.save({
             'state_dict': self.nnet.state_dict(),
+            'full_model': self.nnet,
+            'cumulated_uptime': self.cumulated_uptime + current_uptime-self.begin_uptime,
+            'end_uptime': current_uptime,
+            'begin': self.begin_time,
         }, filepath)
         # print(f'SAVE: {self.cumulated_uptime=} {self.begin_uptime=} ==> cumulated_uptime={self.cumulated_uptime + current_uptime-self.begin_uptime}')
 
     def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar', ongoing_experiment=False):
         # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98
         filepath = os.path.join(folder, filename)
         if not os.path.exists(filepath):
-            raise ("No model in path {}".format(filepath))
+            print("No model in path {}".format(filepath))
+            return
         map_location = None if self.args['cuda'] else 'cpu'
         checkpoint = torch.load(filepath, map_location=map_location)
-        self.nnet.load_state_dict(checkpoint['state_dict'])
+        self.nnet = checkpoint['full_model']
+        self.cumulated_uptime = checkpoint.get('cumulated_uptime', 0)
+        self.begin_time = checkpoint.get('begin', int(time.time()))
+        self.begin_uptime = checkpoint.get('end_uptime', 0) if ongoing_experiment else get_uptime()
+            
diff --git a/othello/pytorch/OthelloNNet.py b/othello/pytorch/OthelloNNet.py
@@ -1,14 +1,6 @@
-import sys
-sys.path.append('..')
-from utils import *
-
-import argparse
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.optim as optim
-from torchvision import datasets, transforms
-from torch.autograd import Variable
 
 class OthelloNNet(nn.Module):
     def __init__(self, game, args):