Skip to content
Snippets Groups Projects
Select Git revision
  • 6b576a8a6ad96182d570a6b4871d63143bb324d2
  • sansdocker default
  • master
3 results

main.py

Blame
  • test.py 6.96 KiB
    import matplotlib
    matplotlib.use("TkAgg")
    import matplotlib.pyplot as plt
    
    from tkinter import *
    from logic import *
    from random import *
    from agent import *
    from agent_afterstate import *
    import numpy as np
    import pickle
    import time
    import sys
    from optparse import OptionParser
    import os
    
    
    TRAIN = 2000
    SIZE = 500
    GRID_LEN = 4
    GRID_PADDING = 10
    
    BACKGROUND_COLOR_GAME = "#92877d"
    BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
    BACKGROUND_COLOR_DICT = {   2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
                                32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
                                512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
    CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
                        32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
                        512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
    FONT = ("Verdana", 40, "bold")
    
    class GameGrid(Frame):
        def __init__(self,args=None):
        
            for k in list(args.keys()):
                if args[k] == None:
                    args.pop(k)
                else :
                    args[k] = float(args[k])
            if "train" in args.keys():
                self.train = args["train"]
                args.pop("train")
            else:
                self.train = TRAIN
            self.DISPLAY = True
            if self.DISPLAY:
                Frame.__init__(self)
            self.commands = {   Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
    
            self.grid_cells = []
            if self.DISPLAY:
                self.grid()
                self.master.title('2048')
                self.init_grid()
            self.reset()
            self.history = []
            self.count = 0
            # self.agent = RandomAgent()  
            self.agent = afterstateAgent(self.matrix,**args) 
            f = open("train_0.0025_0.5_0.0_result_after_2000.txt",'rb')
            self.agent.W = pickle.load(f)
    
            if self.DISPLAY:     
                self.key_down()
                self.mainloop()
            else:
                while self.count<=self.train:
                    self.key_down()
    
        def reset(self):
            self.init_matrix()
            if self.DISPLAY:
                self.update_grid_cells()
    
        def init_grid(self):
            background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
            background.grid()
            for i in range(GRID_LEN):
                grid_row = []
                for j in range(GRID_LEN):
                    cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
                    cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
                    # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
                    t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
                    t.grid()
                    grid_row.append(t)
    
                self.grid_cells.append(grid_row)
    
        def gen(self):
            return randint(0, GRID_LEN - 1)
    
        def init_matrix(self):
            self.matrix = new_game(4)
    
            self.matrix=add_two(self.matrix)
            self.matrix=add_two(self.matrix)
    
        def update_grid_cells(self):
            for i in range(GRID_LEN):
                for j in range(GRID_LEN):
                    new_number = self.matrix[i][j]
                    if new_number == 0:
                        self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
                    else:
                        self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
            self.update_idletasks()
            
        def key_down(self):
    
            if self.count>=1:
                self.agent.verbose = False
            if self.agent.count >10000:
                self.agent.verbose = True
            self.agent.set_state(self.matrix)
            key = self.agent.act()
            self.matrix,done = self.commands[key](self.matrix)
            reward = 0
            if done:
                self.matrix = add_two(self.matrix)
                if self.DISPLAY:
                    self.update_grid_cells()
                if done!=1:
                    reward += done
                # print(reward)
            # else:
            #     reward = -0.5
    
    
    
            if game_state(self.matrix)=='win':
                print("win")
                    # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
                    # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
            if game_state(self.matrix)=='lose':
                if self.agent.explore>0:
                    print("explore: "+ str(self.agent.explore))
                # reward = -10
                # reward = np.log(np.max(self.matrix))
                # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
                # self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY)
                print(str(self.count) + " : " + str(np.max(self.matrix)))
    
            # self.agent.update(self.matrix, reward)
    
            if (game_state(self.matrix)=='win' ) or  (game_state(self.matrix)=='lose'):
                # print(self.agent.W)
                if (self.count == self.train):
                    f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+"_"+str(self.agent.symmetric)+"_result_after_"+str(self.count)+".txt",'wb')
                    pickle.dump(self.agent.W ,f)
                    f.close()
                    f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+"_"+str(self.agent.symmetric)+"_history_after_"+str(self.count)+".txt",'wb')
                    np.savetxt(f, self.history)
                    f.close()
                self.history += [np.max(self.matrix)]
                self.agent.reset()
                self.count += 1
                self.reset()
                # plt.plot(self.history)
                # plt.show()
            # print(reward)
    
            # self.matrix
    
            if (self.DISPLAY):
                # Tell Tkinter to wait DELTA_TIME seconds before next iteration
                self.after(50, self.key_down)
    
        def generate_next(self):
            index = (self.gen(), self.gen())
            while self.matrix[index[0]][index[1]] != 0:
                index = (self.gen(), self.gen())
            self.matrix[index[0]][index[1]] = 2
    
    if __name__ == '__main__':
        parser = OptionParser()
        parser.add_option("-g", "--TD", dest="TD_lambda", help ="TD_lambda the forget coefficient")
        parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate")
        parser.add_option("-t", "--train", dest="train", help ="training episodes") 
        parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling")
        parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration")
        parser.add_option("-u", "--tuple", dest="tuple", help ="the tuple to use")
        (options,args)= parser.parse_args()
        print(vars(options))
        f = open("train_0.0025_0.5_0.0_history_after_2000.txt",'rb')
        history = np.loadtxt(f)
        f.close()
        plt.plot(history)
        plt.show()
        start_time = time.time()
        gamegrid = GameGrid(vars(options))
        print("--- %s seconds ---" % (time.time() - start_time))