diff --git a/__pycache__/agent_afterstate.cpython-35.pyc b/__pycache__/agent_afterstate.cpython-35.pyc index c98f07408ce6dd5b46f6601976789ee53da6ad7a..072919ca89ffc8dd8cc1e54a537e19784fcef4e9 100644 Binary files a/__pycache__/agent_afterstate.cpython-35.pyc and b/__pycache__/agent_afterstate.cpython-35.pyc differ diff --git a/__pycache__/logic.cpython-35.pyc b/__pycache__/logic.cpython-35.pyc index b6f2e6f8362795f1630ed908724d5b72ef96b480..dd61672a13e9ee6b4c277e0eb230bfb7d2b4d481 100644 Binary files a/__pycache__/logic.cpython-35.pyc and b/__pycache__/logic.cpython-35.pyc differ diff --git a/agent_afterstate.py b/agent_afterstate.py index 49533e23638c546fe6c8e9bb0bb2002d07516387..312991a04ab0d005103964554d1c04d594a04a8b 100644 --- a/agent_afterstate.py +++ b/agent_afterstate.py @@ -11,7 +11,7 @@ class Action(IntEnum): RIGHT = 4 class afterstateAgent: - def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2): + def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2): self.state_per_tile = 12 self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} self.alpha = alpha @@ -143,8 +143,8 @@ class afterstateAgent: else: self._reset_trace() for tr, ind in zip(self.trace, self.index): - v = tr.get(ind,0) - v += 1 + v = tr.get(ind,0.0) + v += 1.0 tr[ind] = v # print(self.trace[0]) # print(np.sum(self.trace,axis=1)) @@ -164,7 +164,7 @@ class afterstateAgent: return -1 if done== 1: done = 0 #1 means no reward - phi_array = np.array([w[i] for w,i in zip(self.W, self._index(state))]) + phi_array = np.array([w[i] for w,i in zip(self.W, self._index(afterstate))]) if act: # in the act phase, explore when there is 0 if 0 in phi_array: @@ -203,7 +203,7 @@ class afterstateAgent: return s,r = self.test_next(self._action_index,self.state) n = next_state - if self.symmetric>0: + if self.symmetric>0.1: for i in range(4): s = transpose(s) n = transpose(n)