diff --git a/__pycache__/agent_afterstate.cpython-35.pyc b/__pycache__/agent_afterstate.cpython-35.pyc index 868c7462c3ecc9b239e39999006428952923ed71..c98f07408ce6dd5b46f6601976789ee53da6ad7a 100644 Binary files a/__pycache__/agent_afterstate.cpython-35.pyc and b/__pycache__/agent_afterstate.cpython-35.pyc differ diff --git a/agent_afterstate.py b/agent_afterstate.py index 642b33c8ecfa25974cb1ea7d29b658f8fa2eeef5..0e98b4083d1049ebd9de9f0360d237afd94e7285 100644 --- a/agent_afterstate.py +++ b/agent_afterstate.py @@ -16,6 +16,7 @@ class afterstateAgent: self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} self.alpha = alpha # self.gamma = gamma + self.epsilon_origin = epsilon self.epsilon = epsilon # e-greedy # self.TD_lambda = 1-epsilon # TD(lamdba) self.TD_lambda = TD_lambda @@ -77,6 +78,7 @@ class afterstateAgent: self.count = 0 self.first_step = True# used to avoid update the first time self.explore = 0 + self.epsilon -= self.epsilon_origin/2000 return def _reset_trace(self): @@ -168,6 +170,7 @@ class afterstateAgent: i = np.random.rand(); if i < self.epsilon: #explore self.explore += 1 + self.forget = 0.0 return sum(phi_array) + 10000 return sum(phi_array) + done @@ -200,11 +203,9 @@ class afterstateAgent: if self.symmetric>0: for i in range(4): s = transpose(s) - self.set_state(s) n = transpose(n) self.one_side_update(n,reward,s) s = reverse(s) - self.set_state(s) n = reverse(n) self.one_side_update(n,reward,s) #one loop is one rotation diff --git a/test.py b/test.py deleted file mode 100644 index e4dc0fa87de3c2a3d7572230dcd4de4f2cb35fea..0000000000000000000000000000000000000000 --- a/test.py +++ /dev/null @@ -1,135 +0,0 @@ -from tkinter import * -from logic import * -from random import * -from agent import * -from agent_afterstate import * - -import numpy as np -import pickle -import time - - -TRAIN = 100000 -SIZE = 500 -GRID_LEN = 4 -GRID_PADDING = 10 - -BACKGROUND_COLOR_GAME = "#92877d" -BACKGROUND_COLOR_CELL_EMPTY = "#9e948a" -BACKGROUND_COLOR_DICT = { 2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \ - 32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \ - 512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" } -CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \ - 32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \ - 512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" } -FONT = ("Verdana", 40, "bold") - -class GameGrid(Frame): - def __init__(self): - - self.DISPLAY = True - if self.DISPLAY: - Frame.__init__(self) - self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} - - self.grid_cells = [] - if self.DISPLAY: - self.grid() - self.master.title('2048') - self.init_grid() - self.reset() - self.history = [] - self.count = 0 - self.agent = afterstateAgent(self.matrix) - f = open("train_0.0025_0.0_result_after_2000.txt",'rb') - self.agent.W = pickle.load(f) - f.close() - print(self.agent.W[0]) - if self.DISPLAY: - self.key_down() - self.mainloop() - else: - while self.count<=TRAIN: - self.key_down() - - def reset(self): - self.init_matrix() - if self.DISPLAY: - self.update_grid_cells() - - def init_grid(self): - background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE) - background.grid() - for i in range(GRID_LEN): - grid_row = [] - for j in range(GRID_LEN): - cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN) - cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING) - # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT) - t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2) - t.grid() - grid_row.append(t) - - self.grid_cells.append(grid_row) - - def gen(self): - return randint(0, GRID_LEN - 1) - - def init_matrix(self): - self.matrix = new_game(4) - - self.matrix=add_two(self.matrix) - self.matrix=add_two(self.matrix) - - def update_grid_cells(self): - for i in range(GRID_LEN): - for j in range(GRID_LEN): - new_number = self.matrix[i][j] - if new_number == 0: - self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY) - else: - self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number]) - self.update_idletasks() - - def key_down(self): - - self.agent.set_state(self.matrix) - key = self.agent.act() - self.matrix,done = self.commands[key](self.matrix) - reward = 0 - if done: - self.matrix = add_two(self.matrix) - if self.DISPLAY: - self.update_grid_cells() - if done!=1: - reward = done - # else: - # reward = -10 - - if game_state(self.matrix)=='win': - print("win") - if game_state(self.matrix)=='lose': - print(np.max(self.matrix)) - - # self.agent.update(self.matrix, reward) - - if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'): - # print(self.agent.W) - self.history += [np.max(self.matrix)] - self.agent.reset() - self.count += 1 - self.reset() - - if (self.DISPLAY): - # Tell Tkinter to wait DELTA_TIME seconds before next iteration - self.after(100, self.key_down) - - def generate_next(self): - index = (self.gen(), self.gen()) - while self.matrix[index[0]][index[1]] != 0: - index = (self.gen(), self.gen()) - self.matrix[index[0]][index[1]] = 2 - -start_time = time.time() -gamegrid = GameGrid() -print("--- %s seconds ---" % (time.time() - start_time)) diff --git a/train_0.0025_0.0_True_result_after_2000.txt b/train_0.0025_0.0_True_result_after_2000.txt deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000