go

eebe6ea7 · Wen Yao Jin · 0a6fc010 · eebe6ea7 · eebe6ea7 · eebe6ea7
Commit eebe6ea7 authored 8 years ago by Wen Yao Jin
--- a/__pycache__/agent_afterstate.cpython-35.pyc
+++ b/__pycache__/agent_afterstate.cpython-35.pyc
--- a/__pycache__/logic.cpython-35.pyc
+++ b/__pycache__/logic.cpython-35.pyc
--- a/agent_afterstate.py
+++ b/agent_afterstate.py
@@ -11,7 +11,7 @@ class Action(IntEnum):
    RIGHT = 4

 class afterstateAgent:
-    def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2):
+    def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2):
        self.state_per_tile = 12
        self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
        self.alpha = alpha
@@ -143,8 +143,8 @@ class afterstateAgent:
        else:
            self._reset_trace()
        for tr, ind in zip(self.trace, self.index):
-            v = tr.get(ind,0)
-            v += 1 
+            v = tr.get(ind,0.0)
+            v += 1.0
            tr[ind] = v
        # print(self.trace[0])
        # print(np.sum(self.trace,axis=1))
@@ -164,7 +164,7 @@ class afterstateAgent:
            return -1
        if done== 1:
            done = 0 #1 means no reward
-        phi_array = np.array([w[i] for w,i in zip(self.W, self._index(state))])
+        phi_array = np.array([w[i] for w,i in zip(self.W, self._index(afterstate))])
        if act:
            # in the act phase, explore when there is 0
            if 0 in phi_array:
@@ -203,7 +203,7 @@ class afterstateAgent:
            return
        s,r = self.test_next(self._action_index,self.state)
        n = next_state
-        if self.symmetric>0:
+        if self.symmetric>0.1:
            for i in range(4):
                s = transpose(s)
                n = transpose(n)