Skip to content
Snippets Groups Projects
Commit 31a4f82d authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parent 9ff0c414
No related branches found
No related tags found
No related merge requests found
No preview for this file type
...@@ -11,7 +11,7 @@ class Action(IntEnum): ...@@ -11,7 +11,7 @@ class Action(IntEnum):
RIGHT = 4 RIGHT = 4
class afterstateAgent: class afterstateAgent:
def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=True): def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, gamma = 0.95, epsilon = 0.01, verbose= True, symmetric=True, tuple = 2):
self.state_per_tile = 12 self.state_per_tile = 12
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.alpha = alpha self.alpha = alpha
...@@ -21,11 +21,12 @@ class afterstateAgent: ...@@ -21,11 +21,12 @@ class afterstateAgent:
self.TD_lambda = TD_lambda self.TD_lambda = TD_lambda
self.forget = self.TD_lambda self.forget = self.TD_lambda
self.symmetric = symmetric self.symmetric = symmetric
if self.symmetric: if tuple==0:
# self.tuple = self._tuple_advance() self.tuple = self._tuple()
elif tuple == 1:
self.tuple = self._tuple_advance() self.tuple = self._tuple_advance()
else: else:
self.tuple = self._tuple() self.tuple = self._tuple_advance_plus()
if verbose: if verbose:
print(len(self.tuple)) print(len(self.tuple))
self.W = self._generate_dict() self.W = self._generate_dict()
...@@ -58,7 +59,14 @@ class afterstateAgent: ...@@ -58,7 +59,14 @@ class afterstateAgent:
print(list) print(list)
return list return list
def _tuple_advance(self): def _tuple_advance(self):
return [[(0,0),(1,0),(2,0),(3,0)],\
[(0,1),(1,1),(2,1),(3,1)],\
[(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\
[(0,2),(1,2),(2,2),(0,3),(1,3),(2,3)]]
def _tuple_advance_plus(self):
return [[(0,0),(1,0),(2,0),(3,0),(3,1),(2,1)],\ return [[(0,0),(1,0),(2,0),(3,0),(3,1),(2,1)],\
[(0,1),(1,1),(2,1),(3,1),(3,2),(2,2)],\ [(0,1),(1,1),(2,1),(3,1),(3,2),(2,2)],\
[(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\ [(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\
......
...@@ -177,6 +177,7 @@ if __name__ == '__main__': ...@@ -177,6 +177,7 @@ if __name__ == '__main__':
parser.add_option("-t", "--train", dest="train", help ="training episodes") parser.add_option("-t", "--train", dest="train", help ="training episodes")
parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling") parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling")
parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration") parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration")
parser.add_option("-u", "--tuple", dest="tuple", help ="the tuple to use")
(options,args)= parser.parse_args() (options,args)= parser.parse_args()
print(vars(options)) print(vars(options))
start_time = time.time() start_time = time.time()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment