Skip to content
Snippets Groups Projects
Commit 80351eef authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

push

parents
Branches
No related tags found
No related merge requests found
.DS_Store 0 → 100644
File added
File added
: 0 → 100644
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
from tkinter import *
from logic import *
from random import *
from agent import *
import numpy as np
TRAIN = 5
SIZE = 500
GRID_LEN = 4
GRID_PADDING = 10
BACKGROUND_COLOR_GAME = "#92877d"
BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
BACKGROUND_COLOR_DICT = { 2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
FONT = ("Verdana", 40, "bold")
class GameGrid(Frame):
def __init__(self):
Frame.__init__(self)
self.DISPLAY = False
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.grid_cells = []
if self.DISPLAY:
self.grid()
self.master.title('2048')
self.init_grid()
self.reset()
self.history = []
self.count = 0
self.agent = qLearningAgent2(self.matrix)
if self.DISPLAY:
self.key_down()
self.mainloop()
else:
while self.count<=TRAIN:
self.key_down()
def reset(self):
self.init_matrix()
if self.DISPLAY:
self.update_grid_cells()
def init_grid(self):
background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
background.grid()
for i in range(GRID_LEN):
grid_row = []
for j in range(GRID_LEN):
cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
# font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
t.grid()
grid_row.append(t)
self.grid_cells.append(grid_row)
def gen(self):
return randint(0, GRID_LEN - 1)
def init_matrix(self):
self.matrix = new_game(4)
self.matrix=add_two(self.matrix)
self.matrix=add_two(self.matrix)
def update_grid_cells(self):
for i in range(GRID_LEN):
for j in range(GRID_LEN):
new_number = self.matrix[i][j]
if new_number == 0:
self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
else:
self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
self.update_idletasks()
def key_down(self):
self.agent.set_state(self.matrix)
key = self.agent.act()
self.matrix,done = self.commands[key](self.matrix)
reward = 0
if done:
self.matrix = add_two(self.matrix)
if self.DISPLAY:
self.update_grid_cells()
if done!=1:
reward = done
# print(reward)
else:
reward = -10
if game_state(self.matrix)=='win':
reward = 1024
print("win")
# self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
# self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
if game_state(self.matrix)=='lose':
# reward = -10
# reward = np.log(np.max(self.matrix))
# self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
# self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY)
print(np.max(self.matrix))
self.agent.update(self.matrix, reward)
if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'):
# print(self.agent.W)
self.history += [np.max(self.matrix)]
self.agent.reset()
self.count += 1
self.reset()
if (self.count == TRAIN):
np.savetxt("train_result_after_"+str(TRAIN)+".txt", self.agent.W)
plt.plot(self.history)
plt.show()
# print(reward)
# self.matrix
if (self.DISPLAY):
# Tell Tkinter to wait DELTA_TIME seconds before next iteration
self.after(100, self.key_down)
def generate_next(self):
index = (self.gen(), self.gen())
while self.matrix[index[0]][index[1]] != 0:
index = (self.gen(), self.gen())
self.matrix[index[0]][index[1]] = 2
gamegrid = GameGrid()
LICENSE 0 → 100755
The MIT License (MIT)
Copyright (c) 2014 Tay Yang Shun
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
2048-python
===========
Based on the popular game [2048](https://github.com/gabrielecirulli/2048) by Gabriele Cirulli, here is a Python version that uses TKinter.
![screenshot](img/screenshot.png)
To start the game, run:
$ python3 puzzle.py
Contributors:
==
- [Tay Yang Shun](http://github.com/yangshun)
- [Emmanuel Goh](http://github.com/emman27)
\ No newline at end of file
File added
File added
File added
File added
agent.py 0 → 100644
from enum import IntEnum, unique
import numpy as np
@unique
class Action(IntEnum):
UP = 1
DOWN = 2
LEFT = 3
RIGHT = 4
class RandomAgent():
def __init__(self):
"""
Initialize your internal state
"""
pass
def act(self):
"""
Choose action depending on your internal state
"""
return Action(np.random.randint(4)+1)
def update(self, next_state, reward):
"""
Update your internal state
"""
pass
class qLearningAgent:
def __init__(self, mat, TD_lambda = 0.9, alpha = 0.1, gamma = 0.8, epsilon = 0.1):
self.set_state(mat)
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon # e-greedy
self.TD_lambda = TD_lambda # TD(lamdba)
self.W = np.zeros((len(self.state),len(Action))) #weight(Theta)
self.reset()
def reset(self):
self.trace = np.zeros((len(self.state),len(Action))) #eligibility trace
self.first_step = True# used to avoid update the first time
pass
def _phi(self, state = None):
#value function
if state == None:
state = self.state
return state/11.0
def _size(self, mat):
return len(mat)*len(mat)
# def _grid(self):
# X = np.linspace(1, 4, 4)
# Y = np.linspace(1, 4, 4)
# X,Y = np.meshgrid(X ,Y)
# r,c = X.shape
# X = np.reshape(X,r*c)
# Y = np.reshape(Y,r*c)
# return X,Y
def act(self):
i = np.random.rand();
if i > self.epsilon:
#e-greedy
#exploitation
self.forget = self.TD_lambda
action_index = np.argmax(np.dot(np.transpose(self.W), self._phi()))
else:
# exploration
self.forget = 0.0
action_index = np.random.randint(0, len(Action))
self._action_index = action_index
return Action(action_index+1)
def _update_trace(self):
self.trace *= self.forget*self.gamma
self.trace[:,self._action_index] += self._phi()
pass
def _target(self,next_state,reward):
#q learning target function
return reward + self.gamma * np.max(np.dot(np.transpose(self.W), self._phi(next_state)))
def update(self, next_state, reward):
next_state = np.reshape(next_state,self._size(next_state))
next_state = np.where(next_state!=0,np.log2(next_state),0)
# print(next_state)
if self.first_step == True:
#don't update the first time
self.first_step = False
pass
self._update_trace()
self.W += self.alpha * (self._target(next_state,reward) \
- np.dot(np.transpose(self.W[:,self._action_index]), self._phi()))\
* self.trace
# #game stops, reset the agent
# self._reset()
pass
def set_state(self, state):
self.state = np.reshape(state,self._size(state))
# print(self.state)
self.state = np.where(self.state!=0,np.log2(self.state),0)
# print(self.state)
pass
class qLearningAgent2:
def __init__(self, mat, TD_lambda = 0.0, alpha = 0.5, gamma = 0.8, epsilon = 0.01):
self.state_per_tile = 10
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon # e-greedy
self.TD_lambda = TD_lambda # TD(lamdba)
self.tuple = [[(0,0),(1,0),(2,0),(3,0)],\
[(0,1),(1,1),(2,1),(3,1)],\
[(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\
[(0,2),(1,2),(2,2),(0,3),(1,3),(2,3)]]
self.feature_size = sum([self.state_per_tile**len(k) for k in self.tuple])
self.W = np.zeros((self.feature_size,len(Action))) #weight(Theta)
self.set_state(mat)
print(self.feature_size)
self.reset()
def reset(self):
self.trace = np.zeros((self.feature_size,len(Action))) #eligibility trace
self.first_step = True# used to avoid update the first time
pass
def _index(self, state):
#value function
sum = 0
list_index = []
for t in self.tuple:
index = self._calculate_index(state,t)
# assert sum+index < self.feature_size, "bad calculation of feature index"
list_index += [sum+index]
sum += self.state_per_tile**len(t)
return list_index
def _phi(self, state = None):
#value function
if state == None:
return np.sum(self.W[self.index,:],axis=0)
else:
# print(self.W[self._index(state),:])
return np.sum(self.W[self._index(state),:],axis=0)
def _phi_gradient(self):
#value function
res = np.zeros(self.feature_size)
res[self.index] = 1
return res
def _calculate_index(self, state, tuple):
sum = 0
for r,l in tuple:
if state[r][l] != 0:
sum += int(np.log2(state[r][l]))
sum *= self.state_per_tile
sum /= self.state_per_tile
return int(sum)
def _size(self, mat):
return len(mat)*len(mat)
def act(self):
i = np.random.rand();
if i > self.epsilon:
#e-greedy
#exploitation
self.forget = self.TD_lambda
action_index = np.argmax(self._phi())
# print(self._phi())
else:
# exploration
self.forget = 0.0
action_index = np.random.randint(0, len(Action))
self._action_index = action_index
return Action(action_index+1)
def _update_trace(self):
self.trace *= self.forget*self.gamma
self.trace[:,self._action_index] += self._phi_gradient()
# print(np.sum(self.trace,axis=1))
pass
def _target(self,next_state,reward):
#q learning target function
return reward + self.gamma * np.max(self._phi(next_state))
def update(self, next_state, reward):
# print(next_state)
if self.first_step == True:
#don't update the first time
self.first_step = False
pass
self._update_trace()
self.W += self.alpha * (self._target(next_state,reward) \
- self._phi()[self._action_index])\
* self.trace
# print(self._target(next_state,reward) \
# - self._phi()[self._action_index])
# #game stops, reset the agent
# self._reset()
pass
def set_state(self, state):
self.state = state
# print(self.state)
self.index = self._index(self.state)
# assert len(self.phi) ==4,"wrong calculation of phi"
# print(self.index)
pass
img/screenshot.png

103 KiB

logic.py 0 → 100755
#
# CS1010FC --- Programming Methodology
#
# Mission N Solutions
#
# Note that written answers are commented out to allow us to run your
# code easily while grading your problem set.
from random import *
#######
#Task 1a#
#######
# [Marking Scheme]
# Points to note:
# Matrix elements must be equal but not identical
# 1 mark for creating the correct matrix
def new_game(n):
matrix = []
for i in range(n):
matrix.append([0] * n)
return matrix
###########
# Task 1b #
###########
# [Marking Scheme]
# Points to note:
# Must ensure that it is created on a zero entry
# 1 mark for creating the correct loop
def add_two(mat):
a=randint(0,len(mat)-1)
b=randint(0,len(mat)-1)
while(mat[a][b]!=0):
a=randint(0,len(mat)-1)
b=randint(0,len(mat)-1)
mat[a][b]=2
return mat
###########
# Task 1c #
###########
# [Marking Scheme]
# Points to note:
# Matrix elements must be equal but not identical
# 0 marks for completely wrong solutions
# 1 mark for getting only one condition correct
# 2 marks for getting two of the three conditions
# 3 marks for correct checking
def game_state(mat):
for i in range(len(mat)):
for j in range(len(mat[0])):
if mat[i][j]==2048:
return 'win'
for i in range(len(mat)-1): #intentionally reduced to check the row on the right and below
for j in range(len(mat[0])-1): #more elegant to use exceptions but most likely this will be their solution
if mat[i][j]==mat[i+1][j] or mat[i][j+1]==mat[i][j]:
return 'not over'
for i in range(len(mat)): #check for any zero entries
for j in range(len(mat[0])):
if mat[i][j]==0:
return 'not over'
for k in range(len(mat)-1): #to check the left/right entries on the last row
if mat[len(mat)-1][k]==mat[len(mat)-1][k+1]:
return 'not over'
for j in range(len(mat)-1): #check up/down entries on last column
if mat[j][len(mat)-1]==mat[j+1][len(mat)-1]:
return 'not over'
return 'lose'
###########
# Task 2a #
###########
# [Marking Scheme]
# Points to note:
# 0 marks for completely incorrect solutions
# 1 mark for solutions that show general understanding
# 2 marks for correct solutions that work for all sizes of matrices
def reverse(mat):
new=[]
for i in range(len(mat)):
new.append([])
for j in range(len(mat[0])):
new[i].append(mat[i][len(mat[0])-j-1])
return new
###########
# Task 2b #
###########
# [Marking Scheme]
# Points to note:
# 0 marks for completely incorrect solutions
# 1 mark for solutions that show general understanding
# 2 marks for correct solutions that work for all sizes of matrices
def transpose(mat):
new=[]
for i in range(len(mat[0])):
new.append([])
for j in range(len(mat)):
new[i].append(mat[j][i])
return new
##########
# Task 3 #
##########
# [Marking Scheme]
# Points to note:
# The way to do movement is compress -> merge -> compress again
# Basically if they can solve one side, and use transpose and reverse correctly they should
# be able to solve the entire thing just by flipping the matrix around
# No idea how to grade this one at the moment. I have it pegged to 8 (which gives you like,
# 2 per up/down/left/right?) But if you get one correct likely to get all correct so...
# Check the down one. Reverse/transpose if ordered wrongly will give you wrong result.
def cover_up(mat):
new=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]
done=0
for i in range(4):
count=0
for j in range(4):
if mat[i][j]!=0:
new[i][count]=mat[i][j]
if j!=count:
done=1
count+=1
return (new,done)
def merge(mat):
done=0
for i in range(4):
for j in range(3):
if mat[i][j]==mat[i][j+1] and mat[i][j]!=0:
mat[i][j]*=2
mat[i][j+1]=0
done += mat[i][j]
return (mat,done)
def up(game):
# print("up")
# return matrix after shifting up
game=transpose(game)
game,done=cover_up(game)
temp=merge(game)
game=temp[0]
done=max(done,temp[1])
game=cover_up(game)[0]
game=transpose(game)
return (game,done)
def down(game):
# print("down")
game=reverse(transpose(game))
game,done=cover_up(game)
temp=merge(game)
game=temp[0]
done=max(done,temp[1])
game=cover_up(game)[0]
game=transpose(reverse(game))
return (game,done)
def left(game):
# print("left")
# return matrix after shifting left
game,done=cover_up(game)
temp=merge(game)
game=temp[0]
done=max(done,temp[1])
game=cover_up(game)[0]
return (game,done)
def right(game):
# print("right")
# return matrix after shifting right
game=reverse(game)
game,done=cover_up(game)
temp=merge(game)
game=temp[0]
done=max(done,temp[1])
game=cover_up(game)[0]
game=reverse(game)
return (game,done)
puzzle.py 0 → 100755
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
from tkinter import *
from logic import *
from random import *
from agent import *
import numpy as np
TRAIN = 5
SIZE = 500
GRID_LEN = 4
GRID_PADDING = 10
BACKGROUND_COLOR_GAME = "#92877d"
BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
BACKGROUND_COLOR_DICT = { 2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
FONT = ("Verdana", 40, "bold")
class GameGrid(Frame):
def __init__(self):
self.DISPLAY = False
if self.DISPLAY:
Frame.__init__(self)
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.grid_cells = []
if self.DISPLAY:
self.grid()
self.master.title('2048')
self.init_grid()
self.reset()
self.history = []
self.count = 0
self.agent = qLearningAgent2(self.matrix)
if self.DISPLAY:
self.key_down()
self.mainloop()
else:
while self.count<=TRAIN:
self.key_down()
def reset(self):
self.init_matrix()
if self.DISPLAY:
self.update_grid_cells()
def init_grid(self):
background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
background.grid()
for i in range(GRID_LEN):
grid_row = []
for j in range(GRID_LEN):
cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
# font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
t.grid()
grid_row.append(t)
self.grid_cells.append(grid_row)
def gen(self):
return randint(0, GRID_LEN - 1)
def init_matrix(self):
self.matrix = new_game(4)
self.matrix=add_two(self.matrix)
self.matrix=add_two(self.matrix)
def update_grid_cells(self):
for i in range(GRID_LEN):
for j in range(GRID_LEN):
new_number = self.matrix[i][j]
if new_number == 0:
self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
else:
self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
self.update_idletasks()
def key_down(self):
self.agent.set_state(self.matrix)
key = self.agent.act()
self.matrix,done = self.commands[key](self.matrix)
reward = 0
if done:
self.matrix = add_two(self.matrix)
if self.DISPLAY:
self.update_grid_cells()
if done!=1:
reward = done
# print(reward)
else:
reward = -10
if game_state(self.matrix)=='win':
reward = 1024
print("win")
# self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
# self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
if game_state(self.matrix)=='lose':
# reward = -10
# reward = np.log(np.max(self.matrix))
# self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
# self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY)
print(np.max(self.matrix))
self.agent.update(self.matrix, reward)
if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'):
# print(self.agent.W)
self.history += [np.max(self.matrix)]
self.agent.reset()
self.count += 1
self.reset()
if (self.count == TRAIN):
np.savetxt("train_result_after_"+str(TRAIN)+".txt", self.agent.W)
plt.plot(self.history)
plt.show()
# print(reward)
# self.matrix
if (self.DISPLAY):
# Tell Tkinter to wait DELTA_TIME seconds before next iteration
self.after(100, self.key_down)
def generate_next(self):
index = (self.gen(), self.gen())
while self.matrix[index[0]][index[1]] != 0:
index = (self.gen(), self.gen())
self.matrix[index[0]][index[1]] = 2
gamegrid = GameGrid()
File suppressed by a .gitattributes entry, the file's encoding is unsupported, or the file size exceeds the limit.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment