diff --git a/__pycache__/agent_afterstate.cpython-35.pyc b/__pycache__/agent_afterstate.cpython-35.pyc index 422c71110586341870108dab6ccae79cd800d8e9..0502c89ce3c26e333eaaae771771fdfec0375b9a 100644 Binary files a/__pycache__/agent_afterstate.cpython-35.pyc and b/__pycache__/agent_afterstate.cpython-35.pyc differ diff --git a/puzzle.py b/puzzle.py index c77eed1d3aa08d1dd974c2e3e0b917328539ed92..0c730de5aee955e9dd6728876c48a0cccd5e6867 100755 --- a/puzzle.py +++ b/puzzle.py @@ -10,6 +10,9 @@ from agent_afterstate import * import numpy as np import pickle import time +import sys +from optparse import OptionParser +import os TRAIN = 2000 @@ -28,8 +31,18 @@ CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \ FONT = ("Verdana", 40, "bold") class GameGrid(Frame): - def __init__(self): + def __init__(self,args=None): + for k in list(args.keys()): + if args[k] == None: + args.pop(k) + else : + args[k] = float(args[k]) + if "train" in args.keys(): + self.train = args["train"] + args.pop("train") + else: + self.train = 10000 self.DISPLAY = False if self.DISPLAY: Frame.__init__(self) @@ -44,12 +57,12 @@ class GameGrid(Frame): self.history = [] self.count = 0 # self.agent = RandomAgent() - self.agent = afterstateAgent(self.matrix) + self.agent = afterstateAgent(self.matrix,**args) if self.DISPLAY: self.key_down() self.mainloop() else: - while self.count<=TRAIN: + while self.count<=self.train: self.key_down() def reset(self): @@ -114,7 +127,6 @@ class GameGrid(Frame): if game_state(self.matrix)=='win': - reward = 1024 print("win") # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY) # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY) @@ -129,11 +141,11 @@ class GameGrid(Frame): if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'): # print(self.agent.W) - if (self.count == TRAIN): - f = open("train_result_after_"+str(self.count)+".txt",'wb') + if (self.count == self.train): + f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+ " _result_after_"+str(self.count)+".txt",'wb') pickle.dump(self.agent.W ,f) f.close() - f = open("train_history_after_"+str(self.count)+".txt",'wb') + f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+ "_history_after_"+str(self.count)+".txt",'wb') np.savetxt(f, self.history) f.close() self.history += [np.max(self.matrix)] @@ -156,6 +168,13 @@ class GameGrid(Frame): index = (self.gen(), self.gen()) self.matrix[index[0]][index[1]] = 2 -start_time = time.time() -gamegrid = GameGrid() -print("--- %s seconds ---" % (time.time() - start_time)) +if __name__ == '__main__': + parser = OptionParser() + parser.add_option("-g", "--TD", dest="TD_lambda", help ="TD_lambda the forget coefficient") + parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate") + parser.add_option("-t", "--train", dest="train", help ="training episodes") + (options,args)= parser.parse_args() + print(vars(options)) + start_time = time.time() + gamegrid = GameGrid(vars(options)) + print("--- %s seconds ---" % (time.time() - start_time)) diff --git a/train_history_after_0.txt b/train_history_after_0.txt deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/train_result_after_0.txt b/train_result_after_0.txt deleted file mode 100644 index 7efff79600f13cc373f5a6ddedf3bf34769d873c..0000000000000000000000000000000000000000 Binary files a/train_result_after_0.txt and /dev/null differ