Skip to content
Snippets Groups Projects
Commit b3eb2a1a authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parent 876cc25c
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -30,8 +30,8 @@ class RandomAgent():
pass
class qLearningAgent:
def __init__(self, mat, TD_lambda = 0.8, alpha = 0.05, gamma = 0.9, epsilon = 0.01):
self.state_per_tile = 11
def __init__(self, mat, TD_lambda = 0.8, alpha = 0.05, gamma = 0.95, epsilon = 0.005):
self.state_per_tile = 12
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon # e-greedy
......@@ -161,7 +161,7 @@ class qLearningAgent:
n[k] += tr[k]
index = np.where(n[k]!=0)# can't divide by zeros :/
# print(n[k])
w[k][index] += target*tr[k][index]/n[k][index]
w[k][index] += target*tr[k][index]/np.sqrt(n[k][index])
# w[k] += target*tr[k]
# print(w[k])
# print(self._target(next_state,reward) \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment