Skip to content
Snippets Groups Projects
Commit dc88ace1 authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parent 695ea879
Branches
No related tags found
No related merge requests found
No preview for this file type
...@@ -10,6 +10,9 @@ from agent_afterstate import * ...@@ -10,6 +10,9 @@ from agent_afterstate import *
import numpy as np import numpy as np
import pickle import pickle
import time import time
import sys
from optparse import OptionParser
import os
TRAIN = 2000 TRAIN = 2000
...@@ -28,8 +31,18 @@ CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \ ...@@ -28,8 +31,18 @@ CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
FONT = ("Verdana", 40, "bold") FONT = ("Verdana", 40, "bold")
class GameGrid(Frame): class GameGrid(Frame):
def __init__(self): def __init__(self,args=None):
for k in list(args.keys()):
if args[k] == None:
args.pop(k)
else :
args[k] = float(args[k])
if "train" in args.keys():
self.train = args["train"]
args.pop("train")
else:
self.train = 10000
self.DISPLAY = False self.DISPLAY = False
if self.DISPLAY: if self.DISPLAY:
Frame.__init__(self) Frame.__init__(self)
...@@ -44,12 +57,12 @@ class GameGrid(Frame): ...@@ -44,12 +57,12 @@ class GameGrid(Frame):
self.history = [] self.history = []
self.count = 0 self.count = 0
# self.agent = RandomAgent() # self.agent = RandomAgent()
self.agent = afterstateAgent(self.matrix) self.agent = afterstateAgent(self.matrix,**args)
if self.DISPLAY: if self.DISPLAY:
self.key_down() self.key_down()
self.mainloop() self.mainloop()
else: else:
while self.count<=TRAIN: while self.count<=self.train:
self.key_down() self.key_down()
def reset(self): def reset(self):
...@@ -114,7 +127,6 @@ class GameGrid(Frame): ...@@ -114,7 +127,6 @@ class GameGrid(Frame):
if game_state(self.matrix)=='win': if game_state(self.matrix)=='win':
reward = 1024
print("win") print("win")
# self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY) # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
# self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY) # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
...@@ -129,11 +141,11 @@ class GameGrid(Frame): ...@@ -129,11 +141,11 @@ class GameGrid(Frame):
if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'): if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'):
# print(self.agent.W) # print(self.agent.W)
if (self.count == TRAIN): if (self.count == self.train):
f = open("train_result_after_"+str(self.count)+".txt",'wb') f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+ " _result_after_"+str(self.count)+".txt",'wb')
pickle.dump(self.agent.W ,f) pickle.dump(self.agent.W ,f)
f.close() f.close()
f = open("train_history_after_"+str(self.count)+".txt",'wb') f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+ "_history_after_"+str(self.count)+".txt",'wb')
np.savetxt(f, self.history) np.savetxt(f, self.history)
f.close() f.close()
self.history += [np.max(self.matrix)] self.history += [np.max(self.matrix)]
...@@ -156,6 +168,13 @@ class GameGrid(Frame): ...@@ -156,6 +168,13 @@ class GameGrid(Frame):
index = (self.gen(), self.gen()) index = (self.gen(), self.gen())
self.matrix[index[0]][index[1]] = 2 self.matrix[index[0]][index[1]] = 2
if __name__ == '__main__':
parser = OptionParser()
parser.add_option("-g", "--TD", dest="TD_lambda", help ="TD_lambda the forget coefficient")
parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate")
parser.add_option("-t", "--train", dest="train", help ="training episodes")
(options,args)= parser.parse_args()
print(vars(options))
start_time = time.time() start_time = time.time()
gamegrid = GameGrid() gamegrid = GameGrid(vars(options))
print("--- %s seconds ---" % (time.time() - start_time)) print("--- %s seconds ---" % (time.time() - start_time))
File deleted
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment