Commit eebe6ea7 authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parent 0a6fc010
......@@ -11,7 +11,7 @@ class Action(IntEnum):
RIGHT = 4
class afterstateAgent:
def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2):
def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2):
self.state_per_tile = 12
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.alpha = alpha
......@@ -143,8 +143,8 @@ class afterstateAgent:
else:
self._reset_trace()
for tr, ind in zip(self.trace, self.index):
v = tr.get(ind,0)
v += 1
v = tr.get(ind,0.0)
v += 1.0
tr[ind] = v
# print(self.trace[0])
# print(np.sum(self.trace,axis=1))
......@@ -164,7 +164,7 @@ class afterstateAgent:
return -1
if done== 1:
done = 0 #1 means no reward
phi_array = np.array([w[i] for w,i in zip(self.W, self._index(state))])
phi_array = np.array([w[i] for w,i in zip(self.W, self._index(afterstate))])
if act:
# in the act phase, explore when there is 0
if 0 in phi_array:
......@@ -203,7 +203,7 @@ class afterstateAgent:
return
s,r = self.test_next(self._action_index,self.state)
n = next_state
if self.symmetric>0:
if self.symmetric>0.1:
for i in range(4):
s = transpose(s)
n = transpose(n)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment