diff --git a/gpu_backend.py b/gpu_backend.py new file mode 100644 index 0000000..d8009fb --- /dev/null +++ b/gpu_backend.py @@ -0,0 +1,16 @@ +from neon.backends import gen_backend +import numpy as np + +global backend +backend = None + +def initialize_backend(args): + global backend + if backend is None: + backend = gen_backend(backend=args.backend, + batch_size=args.batch_size, + rng_seed=args.random_seed, + device_id=args.device_id, + datatype=np.dtype(args.datatype).type, + stochastic_round=args.stochastic_round) + return backend \ No newline at end of file diff --git a/src/deepqnetwork.py b/src/deepqnetwork.py index 0e75e9d..d9e5e7e 100644 --- a/src/deepqnetwork.py +++ b/src/deepqnetwork.py @@ -8,6 +8,7 @@ from neon.transforms import SumSquared from neon.util.persist import save_obj import numpy as np +import gpu_backend import os import logging logger = logging.getLogger(__name__) @@ -26,16 +27,12 @@ def __init__(self, num_actions, args): self.batch_norm = args.batch_norm # create Neon backend - self.be = gen_backend(backend = args.backend, - batch_size = args.batch_size, - rng_seed = args.random_seed, - device_id = args.device_id, - datatype = np.dtype(args.datatype).type, - stochastic_round = args.stochastic_round) + self.be = gpu_backend.initialize_backend(args) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) + self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) @@ -93,11 +90,13 @@ def _createLayers(self, num_actions): def _setInput(self, states): # change order of axes to match what Neon expects - states = np.transpose(states, axes = (1, 2, 3, 0)) + #states = states.get() + self.be.copy_transpose(states, self.input_uint8, axes=(1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise - self.input.set(states.copy()) + #self.input.set(states.copy()) # normalize network input between 0 and 1 - self.be.divide(self.input, 255, self.input) + #self.be.divide(self.input, 255, self.input) + self.input[:] = self.input_uint8 / 255 def train(self, minibatch, epoch): # expand components of minibatch diff --git a/src/gpu_backend.py b/src/gpu_backend.py new file mode 100644 index 0000000..d8009fb --- /dev/null +++ b/src/gpu_backend.py @@ -0,0 +1,16 @@ +from neon.backends import gen_backend +import numpy as np + +global backend +backend = None + +def initialize_backend(args): + global backend + if backend is None: + backend = gen_backend(backend=args.backend, + batch_size=args.batch_size, + rng_seed=args.random_seed, + device_id=args.device_id, + datatype=np.dtype(args.datatype).type, + stochastic_round=args.stochastic_round) + return backend \ No newline at end of file diff --git a/src/replay_memory.py b/src/replay_memory.py index 75d08a6..b2ff3c3 100644 --- a/src/replay_memory.py +++ b/src/replay_memory.py @@ -1,4 +1,5 @@ import numpy as np +import gpu_backend import random import logging logger = logging.getLogger(__name__) @@ -6,6 +7,7 @@ class ReplayMemory: def __init__(self, size, args): self.size = size + self.be = gpu_backend.initialize_backend(args) # preallocate memory self.actions = np.empty(self.size, dtype = np.uint8) self.rewards = np.empty(self.size, dtype = np.integer) @@ -18,8 +20,10 @@ def __init__(self, size, args): self.current = 0 # pre-allocate prestates and poststates for minibatch - self.prestates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.uint8) - self.poststates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.uint8) + self.prestates = self.be.empty((self.batch_size, self.history_length,) + self.dims, dtype=np.uint8) + self.poststates = self.be.empty((self.batch_size, self.history_length,) + self.dims, dtype=np.uint8) + self.prestates_view = [self.prestates[i, ...] for i in xrange(self.batch_size)] + self.poststates_view = [self.poststates[i, ...] for i in xrange(self.batch_size)] logger.info("Replay memory size: %d" % self.size) @@ -50,7 +54,7 @@ def getState(self, index): def getCurrentState(self): # reuse first row of prestates in minibatch to minimize memory consumption - self.prestates[0, ...] = self.getState(self.current - 1) + self.prestates_view[0][:] = self.getState(self.current - 1) return self.prestates def getMinibatch(self): @@ -74,8 +78,8 @@ def getMinibatch(self): break # NB! having index first is fastest in C-order matrices - self.prestates[len(indexes), ...] = self.getState(index - 1) - self.poststates[len(indexes), ...] = self.getState(index) + self.prestates_view[len(indexes)][:] = self.getState(index - 1) + self.poststates_view[len(indexes)][:] = self.getState(index) indexes.append(index) # copy actions, rewards and terminals with direct slicing diff --git a/src/state_buffer.py b/src/state_buffer.py index ee39fe3..850ee76 100644 --- a/src/state_buffer.py +++ b/src/state_buffer.py @@ -1,16 +1,18 @@ import numpy as np +import gpu_backend class StateBuffer: def __init__(self, args): + self.be = gpu_backend.initialize_backend(args) self.history_length = args.history_length self.dims = (args.screen_height, args.screen_width) self.batch_size = args.batch_size - self.buffer = np.zeros((self.batch_size, self.history_length) + self.dims, dtype=np.uint8) + self.buffer = self.be.zeros((self.batch_size, self.history_length) + self.dims, dtype=np.uint8) def add(self, observation): assert observation.shape == self.dims - self.buffer[0, :-1] = self.buffer[0, 1:] - self.buffer[0, -1] = observation + self.buffer[0, :-1][:] = self.buffer[0, 1:] + self.buffer[0, -1][:] = observation def getState(self): return self.buffer[0] @@ -19,7 +21,7 @@ def getStateMinibatch(self): return self.buffer def reset(self): - self.buffer *= 0 + self.buffer[:] = 0 if __name__ == '__main__': import argparse