diff --git a/__pycache__/agent_afterstate.cpython-35.pyc b/__pycache__/agent_afterstate.cpython-35.pyc index 76885ca5ecc92570533a76bddbf7a6eb8628c6ea..868c7462c3ecc9b239e39999006428952923ed71 100644 Binary files a/__pycache__/agent_afterstate.cpython-35.pyc and b/__pycache__/agent_afterstate.cpython-35.pyc differ diff --git a/agent_afterstate.py b/agent_afterstate.py index 11ab05fa3ad53a3b2cce53bcc8fe76243eb02d8e..0717424b47a94cdcce6f7974802dc87d62710e5c 100644 --- a/agent_afterstate.py +++ b/agent_afterstate.py @@ -11,7 +11,7 @@ class Action(IntEnum): RIGHT = 4 class afterstateAgent: - def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=True): + def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=True, tuple = 2): self.state_per_tile = 12 self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} self.alpha = alpha @@ -21,11 +21,12 @@ class afterstateAgent: self.TD_lambda = TD_lambda self.forget = self.TD_lambda self.symmetric = symmetric - if self.symmetric: - # self.tuple = self._tuple_advance() + if tuple==0: + self.tuple = self._tuple() + elif tuple == 1: self.tuple = self._tuple_advance() else: - self.tuple = self._tuple() + self.tuple = self._tuple_advance_plus() if verbose: print(len(self.tuple)) self.W = self._generate_dict() @@ -58,7 +59,14 @@ class afterstateAgent: print(list) return list + def _tuple_advance(self): + return [[(0,0),(1,0),(2,0),(3,0)],\ + [(0,1),(1,1),(2,1),(3,1)],\ + [(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\ + [(0,2),(1,2),(2,2),(0,3),(1,3),(2,3)]] + + def _tuple_advance_plus(self): return [[(0,0),(1,0),(2,0),(3,0),(3,1),(2,1)],\ [(0,1),(1,1),(2,1),(3,1),(3,2),(2,2)],\ [(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\ diff --git a/puzzle.py b/puzzle.py index bd2f6151ef9f57c36500d3d294200d3a1e38580e..c32e3f85426ffc54878ece317c856021cf6bcf08 100755 --- a/puzzle.py +++ b/puzzle.py @@ -176,7 +176,8 @@ if __name__ == '__main__': parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate") parser.add_option("-t", "--train", dest="train", help ="training episodes") parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling") - parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration") + parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration") + parser.add_option("-u", "--tuple", dest="tuple", help ="the tuple to use") (options,args)= parser.parse_args() print(vars(options)) start_time = time.time()