go

dc88ace1 · Wen Yao Jin · 695ea879 · dc88ace1 · dc88ace1 · 695ea879
Commit dc88ace1 authored 8 years ago by Wen Yao Jin
--- a/__pycache__/agent_afterstate.cpython-35.pyc
+++ b/__pycache__/agent_afterstate.cpython-35.pyc
--- a/puzzle.py
+++ b/puzzle.py
@@ -10,6 +10,9 @@ from agent_afterstate import *
 import numpy as np
 import pickle
 import time
+import sys
+from optparse import OptionParser
+import os
 TRAIN = 2000
@@ -28,8 +31,18 @@ CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
 FONT = ("Verdana", 40, "bold")
 class GameGrid(Frame):
-    def __init__(self):
+    def __init__(self,args=None):
+        for k in list(args.keys()):
+            if args[k] == None:
+                args.pop(k)
+            else :
+                args[k] = float(args[k])
+        if "train" in args.keys():
+            self.train = args["train"]
+            args.pop("train")
+        else:
+            self.train = 10000
        self.DISPLAY = False
        if self.DISPLAY:
            Frame.__init__(self)
@@ -44,12 +57,12 @@ class GameGrid(Frame):
        self.history = []
        self.count = 0
        # self.agent = RandomAgent()  
-        self.agent = afterstateAgent(self.matrix) 
+        self.agent = afterstateAgent(self.matrix,**args) 
        if self.DISPLAY:     
            self.key_down()
            self.mainloop()
        else:
-            while self.count<=TRAIN:
+            while self.count<=self.train:
                self.key_down()
    def reset(self):
@@ -114,7 +127,6 @@ class GameGrid(Frame):
        if game_state(self.matrix)=='win':
-            reward = 1024
            print("win")
                # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
                # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
@@ -129,11 +141,11 @@ class GameGrid(Frame):
        if (game_state(self.matrix)=='win' ) or  (game_state(self.matrix)=='lose'):
            # print(self.agent.W)
-            if (self.count == TRAIN):
+            if (self.count == self.train):
-                f = open("train_result_after_"+str(self.count)+".txt",'wb')
+                f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+ " _result_after_"+str(self.count)+".txt",'wb')
                pickle.dump(self.agent.W ,f)
                f.close()
-                f = open("train_history_after_"+str(self.count)+".txt",'wb')
+                f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+ "_history_after_"+str(self.count)+".txt",'wb')
                np.savetxt(f, self.history)
                f.close()
            self.history += [np.max(self.matrix)]
@@ -156,6 +168,13 @@ class GameGrid(Frame):
            index = (self.gen(), self.gen())
        self.matrix[index[0]][index[1]] = 2
+if __name__ == '__main__':
+    parser = OptionParser()
+    parser.add_option("-g", "--TD", dest="TD_lambda", help ="TD_lambda the forget coefficient")
+    parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate")
+    parser.add_option("-t", "--train", dest="train", help ="training episodes") 
+    (options,args)= parser.parse_args()
+    print(vars(options))
    start_time = time.time()
-gamegrid = GameGrid()
+    gamegrid = GameGrid(vars(options))
    print("--- %s seconds ---" % (time.time() - start_time))
--- a/train_history_after_0.txt
+++ b/train_history_after_0.txt
--- a/train_result_after_0.txt
+++ b/train_result_after_0.txt