Commit 8b244b33 authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parents 0f5dd0a0 beae7c53
......@@ -16,6 +16,7 @@ class afterstateAgent:
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.alpha = alpha
# self.gamma = gamma
self.epsilon_origin = epsilon
self.epsilon = epsilon # e-greedy
# self.TD_lambda = 1-epsilon # TD(lamdba)
self.TD_lambda = TD_lambda
......@@ -77,6 +78,7 @@ class afterstateAgent:
self.count = 0
self.first_step = True# used to avoid update the first time
self.explore = 0
self.epsilon -= self.epsilon_origin/2000
return
def _reset_trace(self):
......@@ -168,6 +170,7 @@ class afterstateAgent:
i = np.random.rand();
if i < self.epsilon: #explore
self.explore += 1
self.forget = 0.0
return sum(phi_array) + 10000
return sum(phi_array) + done
......@@ -200,11 +203,9 @@ class afterstateAgent:
if self.symmetric>0:
for i in range(4):
s = transpose(s)
self.set_state(s)
n = transpose(n)
self.one_side_update(n,reward,s)
s = reverse(s)
self.set_state(s)
n = reverse(n)
self.one_side_update(n,reward,s)
#one loop is one rotation
......
from tkinter import *
from logic import *
from random import *
from agent import *
from agent_afterstate import *
import numpy as np
import pickle
import time
TRAIN = 100000
SIZE = 500
GRID_LEN = 4
GRID_PADDING = 10
BACKGROUND_COLOR_GAME = "#92877d"
BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
BACKGROUND_COLOR_DICT = { 2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
FONT = ("Verdana", 40, "bold")
class GameGrid(Frame):
def __init__(self):
self.DISPLAY = True
if self.DISPLAY:
Frame.__init__(self)
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.grid_cells = []
if self.DISPLAY:
self.grid()
self.master.title('2048')
self.init_grid()
self.reset()
self.history = []
self.count = 0
self.agent = afterstateAgent(self.matrix)
f = open("train_0.0025_0.0_result_after_2000.txt",'rb')
self.agent.W = pickle.load(f)
f.close()
print(self.agent.W[0])
if self.DISPLAY:
self.key_down()
self.mainloop()
else:
while self.count<=TRAIN:
self.key_down()
def reset(self):
self.init_matrix()
if self.DISPLAY:
self.update_grid_cells()
def init_grid(self):
background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
background.grid()
for i in range(GRID_LEN):
grid_row = []
for j in range(GRID_LEN):
cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
# font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
t.grid()
grid_row.append(t)
self.grid_cells.append(grid_row)
def gen(self):
return randint(0, GRID_LEN - 1)
def init_matrix(self):
self.matrix = new_game(4)
self.matrix=add_two(self.matrix)
self.matrix=add_two(self.matrix)
def update_grid_cells(self):
for i in range(GRID_LEN):
for j in range(GRID_LEN):
new_number = self.matrix[i][j]
if new_number == 0:
self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
else:
self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
self.update_idletasks()
def key_down(self):
self.agent.set_state(self.matrix)
key = self.agent.act()
self.matrix,done = self.commands[key](self.matrix)
reward = 0
if done:
self.matrix = add_two(self.matrix)
if self.DISPLAY:
self.update_grid_cells()
if done!=1:
reward = done
# else:
# reward = -10
if game_state(self.matrix)=='win':
print("win")
if game_state(self.matrix)=='lose':
print(np.max(self.matrix))
# self.agent.update(self.matrix, reward)
if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'):
# print(self.agent.W)
self.history += [np.max(self.matrix)]
self.agent.reset()
self.count += 1
self.reset()
if (self.DISPLAY):
# Tell Tkinter to wait DELTA_TIME seconds before next iteration
self.after(100, self.key_down)
def generate_next(self):
index = (self.gen(), self.gen())
while self.matrix[index[0]][index[1]] != 0:
index = (self.gen(), self.gen())
self.matrix[index[0]][index[1]] = 2
start_time = time.time()
gamegrid = GameGrid()
print("--- %s seconds ---" % (time.time() - start_time))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment