Skip to content
Snippets Groups Projects
Commit d7f9c8b7 authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parent a1ba0170
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -36,10 +36,7 @@ class qLearningAgent:
self.gamma = gamma
self.epsilon = epsilon # e-greedy
self.TD_lambda = TD_lambda # TD(lamdba)
self.tuple = [[(0,0),(1,0),(2,0),(3,0)],\
[(0,1),(1,1),(2,1),(3,1)],\
[(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\
[(0,2),(1,2),(2,2),(0,3),(1,3),(2,3)]]
self.tuple = self._tuple()
self.W = self._generate_dict()
print(sum([len(w.keys()) for w in self.W]))
self.feature_size = sum([self.state_per_tile**len(k) for k in self.tuple])
......@@ -47,6 +44,30 @@ class qLearningAgent:
print(self.feature_size)
self.reset()
# [[(0,0),(1,0),(2,0),(3,0)],\
# [(0,1),(1,1),(2,1),(3,1)],\
# [(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\
# [(0,2),(1,2),(2,2),(0,3),(1,3),(2,3)]]
def _tuple(self):
list=[]
for i in range(4):
l = []
for j in range(4):
l+=[(i,j)]
list+=[l]
for i in range(4):
l = []
for j in range(4):
l+=[(j,i)]
list+=[l]
for i in range(3):
for j in range(3):
l = [(i,j),(i,j+1),(i+1,j),(i+1,j+1)]
list += [l]
print(list)
return list
def reset(self):
self._reset_trace() #eligibility trace
self.first_step = True# used to avoid update the first time
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment