Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions gpu_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from neon.backends import gen_backend
import numpy as np

global backend
backend = None

def initialize_backend(args):
global backend
if backend is None:
backend = gen_backend(backend=args.backend,
batch_size=args.batch_size,
rng_seed=args.random_seed,
device_id=args.device_id,
datatype=np.dtype(args.datatype).type,
stochastic_round=args.stochastic_round)
return backend
17 changes: 8 additions & 9 deletions src/deepqnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from neon.transforms import SumSquared
from neon.util.persist import save_obj
import numpy as np
import gpu_backend
import os
import logging
logger = logging.getLogger(__name__)
Expand All @@ -26,16 +27,12 @@ def __init__(self, num_actions, args):
self.batch_norm = args.batch_norm

# create Neon backend
self.be = gen_backend(backend = args.backend,
batch_size = args.batch_size,
rng_seed = args.random_seed,
device_id = args.device_id,
datatype = np.dtype(args.datatype).type,
stochastic_round = args.stochastic_round)
self.be = gpu_backend.initialize_backend(args)

# prepare tensors once and reuse them
self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
self.input = self.be.empty(self.input_shape)
self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
self.input.lshape = self.input_shape # HACK: needed for convolutional networks
self.targets = self.be.empty((self.num_actions, self.batch_size))

Expand Down Expand Up @@ -93,11 +90,13 @@ def _createLayers(self, num_actions):

def _setInput(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states, axes = (1, 2, 3, 0))
#states = states.get()
self.be.copy_transpose(states, self.input_uint8, axes=(1, 2, 3, 0))
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.input.set(states.copy())
#self.input.set(states.copy())
# normalize network input between 0 and 1
self.be.divide(self.input, 255, self.input)
#self.be.divide(self.input, 255, self.input)
self.input[:] = self.input_uint8 / 255

def train(self, minibatch, epoch):
# expand components of minibatch
Expand Down
16 changes: 16 additions & 0 deletions src/gpu_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from neon.backends import gen_backend
import numpy as np

global backend
backend = None

def initialize_backend(args):
global backend
if backend is None:
backend = gen_backend(backend=args.backend,
batch_size=args.batch_size,
rng_seed=args.random_seed,
device_id=args.device_id,
datatype=np.dtype(args.datatype).type,
stochastic_round=args.stochastic_round)
return backend
14 changes: 9 additions & 5 deletions src/replay_memory.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import numpy as np
import gpu_backend
import random
import logging
logger = logging.getLogger(__name__)

class ReplayMemory:
def __init__(self, size, args):
self.size = size
self.be = gpu_backend.initialize_backend(args)
# preallocate memory
self.actions = np.empty(self.size, dtype = np.uint8)
self.rewards = np.empty(self.size, dtype = np.integer)
Expand All @@ -18,8 +20,10 @@ def __init__(self, size, args):
self.current = 0

# pre-allocate prestates and poststates for minibatch
self.prestates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.uint8)
self.poststates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.uint8)
self.prestates = self.be.empty((self.batch_size, self.history_length,) + self.dims, dtype=np.uint8)
self.poststates = self.be.empty((self.batch_size, self.history_length,) + self.dims, dtype=np.uint8)
self.prestates_view = [self.prestates[i, ...] for i in xrange(self.batch_size)]
self.poststates_view = [self.poststates[i, ...] for i in xrange(self.batch_size)]

logger.info("Replay memory size: %d" % self.size)

Expand Down Expand Up @@ -50,7 +54,7 @@ def getState(self, index):

def getCurrentState(self):
# reuse first row of prestates in minibatch to minimize memory consumption
self.prestates[0, ...] = self.getState(self.current - 1)
self.prestates_view[0][:] = self.getState(self.current - 1)
return self.prestates

def getMinibatch(self):
Expand All @@ -74,8 +78,8 @@ def getMinibatch(self):
break

# NB! having index first is fastest in C-order matrices
self.prestates[len(indexes), ...] = self.getState(index - 1)
self.poststates[len(indexes), ...] = self.getState(index)
self.prestates_view[len(indexes)][:] = self.getState(index - 1)
self.poststates_view[len(indexes)][:] = self.getState(index)
indexes.append(index)

# copy actions, rewards and terminals with direct slicing
Expand Down
10 changes: 6 additions & 4 deletions src/state_buffer.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import numpy as np
import gpu_backend

class StateBuffer:
def __init__(self, args):
self.be = gpu_backend.initialize_backend(args)
self.history_length = args.history_length
self.dims = (args.screen_height, args.screen_width)
self.batch_size = args.batch_size
self.buffer = np.zeros((self.batch_size, self.history_length) + self.dims, dtype=np.uint8)
self.buffer = self.be.zeros((self.batch_size, self.history_length) + self.dims, dtype=np.uint8)

def add(self, observation):
assert observation.shape == self.dims
self.buffer[0, :-1] = self.buffer[0, 1:]
self.buffer[0, -1] = observation
self.buffer[0, :-1][:] = self.buffer[0, 1:]
self.buffer[0, -1][:] = observation

def getState(self):
return self.buffer[0]
Expand All @@ -19,7 +21,7 @@ def getStateMinibatch(self):
return self.buffer

def reset(self):
self.buffer *= 0
self.buffer[:] = 0

if __name__ == '__main__':
import argparse
Expand Down