diff --git a/agent_afterstate.py b/agent_afterstate.py index 0e98b4083d1049ebd9de9f0360d237afd94e7285..49533e23638c546fe6c8e9bb0bb2002d07516387 100644 --- a/agent_afterstate.py +++ b/agent_afterstate.py @@ -115,6 +115,8 @@ class afterstateAgent: comb += [int(np.log2(state[r][l]))] else: comb += [0] + # if sum(comb) > sum(list(self._max_ind)) + # self._max_ind = tuple(comb) return tuple(comb) # def _size(self, mat): @@ -122,6 +124,7 @@ class afterstateAgent: def act(self): self.forget = self.TD_lambda + # self._max_ind = (0,0,0,0) action_index = np.argmax(np.array([self._afterstate(a,act=True) for a in range(len(Action))])) self._action_index = action_index return Action(action_index+1)