import matplotlib matplotlib.use("TkAgg") import matplotlib.pyplot as plt from tkinter import * from logic import * from random import * from agent import * from agent_afterstate import * import numpy as np import pickle import time import sys from optparse import OptionParser import os TRAIN = 2000 SIZE = 500 GRID_LEN = 4 GRID_PADDING = 10 BACKGROUND_COLOR_GAME = "#92877d" BACKGROUND_COLOR_CELL_EMPTY = "#9e948a" BACKGROUND_COLOR_DICT = { 2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \ 32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \ 512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" } CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \ 32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \ 512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" } FONT = ("Verdana", 40, "bold") class GameGrid(Frame): def __init__(self,args=None): for k in list(args.keys()): if args[k] == None: args.pop(k) else : args[k] = float(args[k]) if "train" in args.keys(): self.train = args["train"] args.pop("train") else: self.train = TRAIN self.DISPLAY = True if self.DISPLAY: Frame.__init__(self) self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} self.grid_cells = [] if self.DISPLAY: self.grid() self.master.title('2048') self.init_grid() self.reset() self.history = [] self.count = 0 # self.agent = RandomAgent() self.agent = afterstateAgent(self.matrix,**args) f = open("train_0.0025_0.5_0.0_result_after_2000.txt",'rb') self.agent.W = pickle.load(f) if self.DISPLAY: self.key_down() self.mainloop() else: while self.count<=self.train: self.key_down() def reset(self): self.init_matrix() if self.DISPLAY: self.update_grid_cells() def init_grid(self): background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE) background.grid() for i in range(GRID_LEN): grid_row = [] for j in range(GRID_LEN): cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN) cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING) # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT) t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2) t.grid() grid_row.append(t) self.grid_cells.append(grid_row) def gen(self): return randint(0, GRID_LEN - 1) def init_matrix(self): self.matrix = new_game(4) self.matrix=add_two(self.matrix) self.matrix=add_two(self.matrix) def update_grid_cells(self): for i in range(GRID_LEN): for j in range(GRID_LEN): new_number = self.matrix[i][j] if new_number == 0: self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY) else: self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number]) self.update_idletasks() def key_down(self): if self.count>=1: self.agent.verbose = False if self.agent.count >10000: self.agent.verbose = True self.agent.set_state(self.matrix) key = self.agent.act() self.matrix,done = self.commands[key](self.matrix) reward = 0 if done: self.matrix = add_two(self.matrix) if self.DISPLAY: self.update_grid_cells() if done!=1: reward += done # print(reward) # else: # reward = -0.5 if game_state(self.matrix)=='win': print("win") # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY) # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY) if game_state(self.matrix)=='lose': if self.agent.explore>0: print("explore: "+ str(self.agent.explore)) # reward = -10 # reward = np.log(np.max(self.matrix)) # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY) # self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY) print(str(self.count) + " : " + str(np.max(self.matrix))) # self.agent.update(self.matrix, reward) if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'): # print(self.agent.W) if (self.count == self.train): f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+"_"+str(self.agent.symmetric)+"_result_after_"+str(self.count)+".txt",'wb') pickle.dump(self.agent.W ,f) f.close() f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+"_"+str(self.agent.symmetric)+"_history_after_"+str(self.count)+".txt",'wb') np.savetxt(f, self.history) f.close() self.history += [np.max(self.matrix)] self.agent.reset() self.count += 1 self.reset() # plt.plot(self.history) # plt.show() # print(reward) # self.matrix if (self.DISPLAY): # Tell Tkinter to wait DELTA_TIME seconds before next iteration self.after(50, self.key_down) def generate_next(self): index = (self.gen(), self.gen()) while self.matrix[index[0]][index[1]] != 0: index = (self.gen(), self.gen()) self.matrix[index[0]][index[1]] = 2 if __name__ == '__main__': parser = OptionParser() parser.add_option("-g", "--TD", dest="TD_lambda", help ="TD_lambda the forget coefficient") parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate") parser.add_option("-t", "--train", dest="train", help ="training episodes") parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling") parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration") parser.add_option("-u", "--tuple", dest="tuple", help ="the tuple to use") (options,args)= parser.parse_args() print(vars(options)) f = open("train_0.0025_0.5_0.0_history_after_2000.txt",'rb') history = np.loadtxt(f) f.close() plt.plot(history) plt.show() start_time = time.time() gamegrid = GameGrid(vars(options)) print("--- %s seconds ---" % (time.time() - start_time))