-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcollector_srs.pyx
40 lines (34 loc) · 1.35 KB
/
collector_srs.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""
Collects (state, immediate rewards) tuples, trying each action at every step
of a level playthrough.
"""
from cython import ccall, cclass, locals, returns
from numpy import empty
from collector_base import BaseCollector
from interface cimport c_do_action, c_get_score, c_get_state
@cclass
class Collector(BaseCollector):
@ccall
@returns('dict')
@locals(steps='int', step='int', actions='int', action='int',
features='int', feature='int', state='float*', checkpoint='int',
states='float[:, :]', rewards='float[:, :]', score='float')
def collect(self):
steps = self.level['steps']
actions = self.level['actions']
features = self.level['features']
states = empty((steps, features), dtype='f4')
rewards = empty((steps, actions), dtype='f4')
for step in range(steps):
state = c_get_state()
for feature in range(features):
states[step, feature] = state[feature]
score = c_get_score()
checkpoint = self.create_checkpoint()
for action in range(actions):
c_do_action(action)
rewards[step, action] = c_get_score() - score
self.load_checkpoint(checkpoint)
self.clear_checkpoints()
self.bot.act(1)
return {'states': states, 'rewards': rewards}