Skip to content
Snippets Groups Projects
Commit 8b244b33 authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parents 0f5dd0a0 beae7c53
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -16,6 +16,7 @@ class afterstateAgent:
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.alpha = alpha
# self.gamma = gamma
self.epsilon_origin = epsilon
self.epsilon = epsilon # e-greedy
# self.TD_lambda = 1-epsilon # TD(lamdba)
self.TD_lambda = TD_lambda
......@@ -77,6 +78,7 @@ class afterstateAgent:
self.count = 0
self.first_step = True# used to avoid update the first time
self.explore = 0
self.epsilon -= self.epsilon_origin/2000
return
def _reset_trace(self):
......@@ -168,6 +170,7 @@ class afterstateAgent:
i = np.random.rand();
if i < self.epsilon: #explore
self.explore += 1
self.forget = 0.0
return sum(phi_array) + 10000
return sum(phi_array) + done
......@@ -200,11 +203,9 @@ class afterstateAgent:
if self.symmetric>0:
for i in range(4):
s = transpose(s)
self.set_state(s)
n = transpose(n)
self.one_side_update(n,reward,s)
s = reverse(s)
self.set_state(s)
n = reverse(n)
self.one_side_update(n,reward,s)
#one loop is one rotation
......
from tkinter import *
from logic import *
from random import *
from agent import *
from agent_afterstate import *
import numpy as np
import pickle
import time
TRAIN = 100000
SIZE = 500
GRID_LEN = 4
GRID_PADDING = 10
BACKGROUND_COLOR_GAME = "#92877d"
BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
BACKGROUND_COLOR_DICT = { 2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
FONT = ("Verdana", 40, "bold")
class GameGrid(Frame):
def __init__(self):
self.DISPLAY = True
if self.DISPLAY:
Frame.__init__(self)
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.grid_cells = []
if self.DISPLAY:
self.grid()
self.master.title('2048')
self.init_grid()
self.reset()
self.history = []
self.count = 0
self.agent = afterstateAgent(self.matrix)
f = open("train_0.0025_0.0_result_after_2000.txt",'rb')
self.agent.W = pickle.load(f)
f.close()
print(self.agent.W[0])
if self.DISPLAY:
self.key_down()
self.mainloop()
else:
while self.count<=TRAIN:
self.key_down()
def reset(self):
self.init_matrix()
if self.DISPLAY:
self.update_grid_cells()
def init_grid(self):
background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
background.grid()
for i in range(GRID_LEN):
grid_row = []
for j in range(GRID_LEN):
cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
# font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
t.grid()
grid_row.append(t)
self.grid_cells.append(grid_row)
def gen(self):
return randint(0, GRID_LEN - 1)
def init_matrix(self):
self.matrix = new_game(4)
self.matrix=add_two(self.matrix)
self.matrix=add_two(self.matrix)
def update_grid_cells(self):
for i in range(GRID_LEN):
for j in range(GRID_LEN):
new_number = self.matrix[i][j]
if new_number == 0:
self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
else:
self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
self.update_idletasks()
def key_down(self):
self.agent.set_state(self.matrix)
key = self.agent.act()
self.matrix,done = self.commands[key](self.matrix)
reward = 0
if done:
self.matrix = add_two(self.matrix)
if self.DISPLAY:
self.update_grid_cells()
if done!=1:
reward = done
# else:
# reward = -10
if game_state(self.matrix)=='win':
print("win")
if game_state(self.matrix)=='lose':
print(np.max(self.matrix))
# self.agent.update(self.matrix, reward)
if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'):
# print(self.agent.W)
self.history += [np.max(self.matrix)]
self.agent.reset()
self.count += 1
self.reset()
if (self.DISPLAY):
# Tell Tkinter to wait DELTA_TIME seconds before next iteration
self.after(100, self.key_down)
def generate_next(self):
index = (self.gen(), self.gen())
while self.matrix[index[0]][index[1]] != 0:
index = (self.gen(), self.gen())
self.matrix[index[0]][index[1]] = 2
start_time = time.time()
gamegrid = GameGrid()
print("--- %s seconds ---" % (time.time() - start_time))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment