push

80351eef · Wen Yao Jin · 80351eef · 80351eef · 80351eef · 80351eef
Commit 80351eef authored Mar 6, 2017 by Wen Yao Jin
--- a/.DS_Store
+++ b/.DS_Store
--- a/.puzzle.py.swp
+++ b/.puzzle.py.swp
--- a/:
+++ b/:
+import matplotlib
+matplotlib.use("TkAgg")
+import matplotlib.pyplot as plt
+
+from tkinter import *
+from logic import *
+from random import *
+from agent import *
+import numpy as np
+
+TRAIN = 5
+SIZE = 500
+GRID_LEN = 4
+GRID_PADDING = 10
+
+BACKGROUND_COLOR_GAME = "#92877d"
+BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
+BACKGROUND_COLOR_DICT = {   2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
+                            32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
+                            512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
+CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
+                    32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
+                    512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
+FONT = ("Verdana", 40, "bold")
+
+class GameGrid(Frame):
+    def __init__(self):
+        Frame.__init__(self)
+        self.DISPLAY = False
+
+        self.commands = {   Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
+
+        self.grid_cells = []
+        if self.DISPLAY:
+            self.grid()
+            self.master.title('2048')
+            self.init_grid()
+        self.reset()
+        self.history = []
+        self.count = 0
+        self.agent = qLearningAgent2(self.matrix)   
+        if self.DISPLAY:     
+            self.key_down()
+            self.mainloop()
+        else:
+            while self.count<=TRAIN:
+                self.key_down()
+
+    def reset(self):
+        self.init_matrix()
+        if self.DISPLAY:
+            self.update_grid_cells()
+
+    def init_grid(self):
+        background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
+        background.grid()
+        for i in range(GRID_LEN):
+            grid_row = []
+            for j in range(GRID_LEN):
+                cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
+                cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
+                # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
+                t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
+                t.grid()
+                grid_row.append(t)
+
+            self.grid_cells.append(grid_row)
+
+    def gen(self):
+        return randint(0, GRID_LEN - 1)
+
+    def init_matrix(self):
+        self.matrix = new_game(4)
+
+        self.matrix=add_two(self.matrix)
+        self.matrix=add_two(self.matrix)
+
+    def update_grid_cells(self):
+        for i in range(GRID_LEN):
+            for j in range(GRID_LEN):
+                new_number = self.matrix[i][j]
+                if new_number == 0:
+                    self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
+                else:
+                    self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
+        self.update_idletasks()
+        
+    def key_down(self):
+
+        self.agent.set_state(self.matrix)
+        key = self.agent.act()
+        self.matrix,done = self.commands[key](self.matrix)
+        reward = 0
+        if done:
+            self.matrix = add_two(self.matrix)
+            if self.DISPLAY:
+                self.update_grid_cells()
+            if done!=1:
+                reward = done
+            # print(reward)
+        else:
+            reward = -10
+
+        if game_state(self.matrix)=='win':
+            reward = 1024
+            print("win")
+                # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
+                # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
+        if game_state(self.matrix)=='lose':
+            # reward = -10
+            # reward = np.log(np.max(self.matrix))
+            # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
+            # self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY)
+            print(np.max(self.matrix))
+
+        self.agent.update(self.matrix, reward)
+
+        if (game_state(self.matrix)=='win' ) or  (game_state(self.matrix)=='lose'):
+            # print(self.agent.W)
+            self.history += [np.max(self.matrix)]
+            self.agent.reset()
+            self.count += 1
+            self.reset()
+
+        if (self.count == TRAIN):
+            np.savetxt("train_result_after_"+str(TRAIN)+".txt", self.agent.W)
+            plt.plot(self.history)
+            plt.show()
+        # print(reward)
+
+        # self.matrix
+
+        if (self.DISPLAY):
+            # Tell Tkinter to wait DELTA_TIME seconds before next iteration
+            self.after(100, self.key_down)
+
+    def generate_next(self):
+        index = (self.gen(), self.gen())
+        while self.matrix[index[0]][index[1]] != 0:
+            index = (self.gen(), self.gen())
+        self.matrix[index[0]][index[1]] = 2
+
+gamegrid = GameGrid()
--- a/LICENSE
+++ b/LICENSE
+The MIT License (MIT)
+
+Copyright (c) 2014 Tay Yang Shun
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
--- a/README.md
+++ b/README.md
+2048-python
+===========
+
+Based on the popular game [2048](https://github.com/gabrielecirulli/2048) by Gabriele Cirulli, here is a Python version that uses TKinter. 
+
+![screenshot](img/screenshot.png)
+
+To start the game, run:
+    
+    $ python3 puzzle.py
+
+
+Contributors:
+==
+
+- [Tay Yang Shun](http://github.com/yangshun)
+- [Emmanuel Goh](http://github.com/emman27)
\ No newline at end of file
--- a/__pycache__/agent.cpython-34.pyc
+++ b/__pycache__/agent.cpython-34.pyc
--- a/__pycache__/agent.cpython-35.pyc
+++ b/__pycache__/agent.cpython-35.pyc
--- a/__pycache__/logic.cpython-34.pyc
+++ b/__pycache__/logic.cpython-34.pyc
--- a/__pycache__/logic.cpython-35.pyc
+++ b/__pycache__/logic.cpython-35.pyc
--- a/agent.py
+++ b/agent.py
+from enum import IntEnum, unique
+import numpy as np
+
+@unique
+class Action(IntEnum):
+    UP = 1
+    DOWN = 2
+    LEFT = 3
+    RIGHT = 4
+
+class RandomAgent():
+    def __init__(self):
+        """
+        Initialize your internal state
+        """
+        pass
+
+    def act(self):
+        """
+        Choose action depending on your internal state
+        """
+        return Action(np.random.randint(4)+1)
+
+
+    def update(self, next_state, reward):
+        """
+        Update your internal state
+        """
+        pass
+
+class qLearningAgent:
+    def __init__(self, mat, TD_lambda = 0.9, alpha = 0.1, gamma = 0.8, epsilon = 0.1):
+        self.set_state(mat)
+        self.alpha = alpha
+        self.gamma = gamma
+        self.epsilon = epsilon # e-greedy
+        self.TD_lambda = TD_lambda # TD(lamdba)
+        self.W = np.zeros((len(self.state),len(Action))) #weight(Theta)
+        self.reset()
+        
+    def reset(self):
+        self.trace = np.zeros((len(self.state),len(Action))) #eligibility trace
+        self.first_step = True# used to avoid update the first time
+        pass
+
+    def _phi(self, state = None):
+        #value function
+        if state == None:
+            state = self.state
+        return state/11.0
+
+    def _size(self, mat):
+        return len(mat)*len(mat)
+    # def _grid(self):
+    #     X = np.linspace(1, 4, 4)
+    #     Y = np.linspace(1, 4, 4)
+    #     X,Y = np.meshgrid(X ,Y)
+    #     r,c = X.shape
+    #     X = np.reshape(X,r*c)
+    #     Y = np.reshape(Y,r*c)
+    #     return X,Y
+        
+    def act(self):
+        i = np.random.rand();
+        if i > self.epsilon:
+            #e-greedy
+            #exploitation
+            self.forget = self.TD_lambda
+            action_index = np.argmax(np.dot(np.transpose(self.W), self._phi()))
+        else:
+            # exploration
+            self.forget = 0.0
+            action_index = np.random.randint(0, len(Action))
+        self._action_index = action_index
+        return Action(action_index+1)
+
+    def _update_trace(self):
+        self.trace *= self.forget*self.gamma
+        self.trace[:,self._action_index] += self._phi()
+        pass
+
+    def _target(self,next_state,reward):
+        #q learning target function
+        return reward + self.gamma * np.max(np.dot(np.transpose(self.W), self._phi(next_state)))
+
+    def update(self, next_state, reward):
+        next_state = np.reshape(next_state,self._size(next_state))
+        next_state = np.where(next_state!=0,np.log2(next_state),0)
+
+        # print(next_state)
+        if self.first_step == True:
+            #don't update the first time
+            self.first_step = False
+            pass
+        self._update_trace()
+        self.W += self.alpha * (self._target(next_state,reward) \
+                        - np.dot(np.transpose(self.W[:,self._action_index]), self._phi()))\
+                        * self.trace
+            # #game stops, reset the agent
+            # self._reset()
+        pass
+
+    def set_state(self, state):
+        self.state = np.reshape(state,self._size(state))
+        # print(self.state)
+        self.state = np.where(self.state!=0,np.log2(self.state),0)
+        # print(self.state)
+        pass
+
+class qLearningAgent2:
+    def __init__(self, mat, TD_lambda = 0.0, alpha = 0.5, gamma = 0.8, epsilon = 0.01):
+        self.state_per_tile = 10
+        self.alpha = alpha
+        self.gamma = gamma
+        self.epsilon = epsilon # e-greedy
+        self.TD_lambda = TD_lambda # TD(lamdba)
+        self.tuple = [[(0,0),(1,0),(2,0),(3,0)],\
+                    [(0,1),(1,1),(2,1),(3,1)],\
+                    [(0,1),(1,1),(2,1),(0,2),(1,2),(2,2)],\
+                    [(0,2),(1,2),(2,2),(0,3),(1,3),(2,3)]]
+        self.feature_size = sum([self.state_per_tile**len(k) for k in self.tuple])
+        self.W = np.zeros((self.feature_size,len(Action))) #weight(Theta)
+        self.set_state(mat)
+        print(self.feature_size)
+        self.reset()
+        
+    def reset(self):
+        self.trace = np.zeros((self.feature_size,len(Action))) #eligibility trace
+        self.first_step = True# used to avoid update the first time
+        pass
+
+    def _index(self, state):
+        #value function
+        sum = 0
+        list_index = []
+        for t in self.tuple:
+            index = self._calculate_index(state,t)
+            # assert sum+index < self.feature_size, "bad calculation of feature index"
+            list_index += [sum+index]
+            sum += self.state_per_tile**len(t)
+        return list_index
+
+    def _phi(self, state = None):
+        #value function
+        if state == None:
+            return np.sum(self.W[self.index,:],axis=0)
+        else:
+            # print(self.W[self._index(state),:])
+            return np.sum(self.W[self._index(state),:],axis=0)
+
+    def _phi_gradient(self):
+        #value function
+        res = np.zeros(self.feature_size)
+        res[self.index] = 1
+        return res
+
+    def _calculate_index(self, state, tuple):
+        sum = 0
+        for r,l in tuple:
+            if state[r][l] != 0:
+                sum += int(np.log2(state[r][l]))
+            sum *= self.state_per_tile
+        sum /= self.state_per_tile
+        return int(sum)
+
+    def _size(self, mat):
+        return len(mat)*len(mat)
+        
+    def act(self):
+        i = np.random.rand();
+        if i > self.epsilon:
+            #e-greedy
+            #exploitation
+            self.forget = self.TD_lambda
+            action_index = np.argmax(self._phi())
+            # print(self._phi())
+        else:
+            # exploration
+            self.forget = 0.0
+            action_index = np.random.randint(0, len(Action))
+        self._action_index = action_index
+        return Action(action_index+1)
+
+    def _update_trace(self):
+        self.trace *= self.forget*self.gamma
+        self.trace[:,self._action_index] += self._phi_gradient()
+        # print(np.sum(self.trace,axis=1))
+        pass
+
+    def _target(self,next_state,reward):
+        #q learning target function
+        return reward + self.gamma * np.max(self._phi(next_state))
+
+    def update(self, next_state, reward):
+        # print(next_state)
+        if self.first_step == True:
+            #don't update the first time
+            self.first_step = False
+            pass
+        self._update_trace()
+        self.W += self.alpha * (self._target(next_state,reward) \
+                        - self._phi()[self._action_index])\
+                        * self.trace
+        # print(self._target(next_state,reward) \
+        #                 - self._phi()[self._action_index])
+            # #game stops, reset the agent
+            # self._reset()
+        pass
+
+    def set_state(self, state):
+        self.state = state
+        # print(self.state)
+        self.index = self._index(self.state)
+        # assert len(self.phi) ==4,"wrong calculation of phi"
+        # print(self.index)
+        pass
+
--- a/img/screenshot.png
+++ b/img/screenshot.png
--- a/logic.py
+++ b/logic.py
+#
+# CS1010FC --- Programming Methodology
+#
+# Mission N Solutions
+#
+# Note that written answers are commented out to allow us to run your
+# code easily while grading your problem set.
+from random import *
+
+#######
+#Task 1a#
+#######
+
+# [Marking Scheme]
+# Points to note:
+# Matrix elements must be equal but not identical
+# 1 mark for creating the correct matrix
+
+def new_game(n):
+    matrix = []
+
+    for i in range(n):
+        matrix.append([0] * n)
+    return matrix
+
+###########
+# Task 1b #
+###########
+
+# [Marking Scheme]
+# Points to note:
+# Must ensure that it is created on a zero entry
+# 1 mark for creating the correct loop
+
+def add_two(mat):
+    a=randint(0,len(mat)-1)
+    b=randint(0,len(mat)-1)
+    while(mat[a][b]!=0):
+        a=randint(0,len(mat)-1)
+        b=randint(0,len(mat)-1)
+    mat[a][b]=2
+    return mat
+
+###########
+# Task 1c #
+###########
+
+# [Marking Scheme]
+# Points to note:
+# Matrix elements must be equal but not identical
+# 0 marks for completely wrong solutions
+# 1 mark for getting only one condition correct
+# 2 marks for getting two of the three conditions
+# 3 marks for correct checking
+
+def game_state(mat):
+    for i in range(len(mat)):
+        for j in range(len(mat[0])):
+            if mat[i][j]==2048:
+                return 'win'
+    for i in range(len(mat)-1): #intentionally reduced to check the row on the right and below
+        for j in range(len(mat[0])-1): #more elegant to use exceptions but most likely this will be their solution
+            if mat[i][j]==mat[i+1][j] or mat[i][j+1]==mat[i][j]:
+                return 'not over'
+    for i in range(len(mat)): #check for any zero entries
+        for j in range(len(mat[0])):
+            if mat[i][j]==0:
+                return 'not over'
+    for k in range(len(mat)-1): #to check the left/right entries on the last row
+        if mat[len(mat)-1][k]==mat[len(mat)-1][k+1]:
+            return 'not over'
+    for j in range(len(mat)-1): #check up/down entries on last column
+        if mat[j][len(mat)-1]==mat[j+1][len(mat)-1]:
+            return 'not over'
+    return 'lose'
+
+###########
+# Task 2a #
+###########
+
+# [Marking Scheme]
+# Points to note:
+# 0 marks for completely incorrect solutions
+# 1 mark for solutions that show general understanding
+# 2 marks for correct solutions that work for all sizes of matrices
+
+def reverse(mat):
+    new=[]
+    for i in range(len(mat)):
+        new.append([])
+        for j in range(len(mat[0])):
+            new[i].append(mat[i][len(mat[0])-j-1])
+    return new
+
+###########
+# Task 2b #
+###########
+
+# [Marking Scheme]
+# Points to note:
+# 0 marks for completely incorrect solutions
+# 1 mark for solutions that show general understanding
+# 2 marks for correct solutions that work for all sizes of matrices
+
+def transpose(mat):
+    new=[]
+    for i in range(len(mat[0])):
+        new.append([])
+        for j in range(len(mat)):
+            new[i].append(mat[j][i])
+    return new
+
+##########
+# Task 3 #
+##########
+
+# [Marking Scheme]
+# Points to note:
+# The way to do movement is compress -> merge -> compress again
+# Basically if they can solve one side, and use transpose and reverse correctly they should
+# be able to solve the entire thing just by flipping the matrix around
+# No idea how to grade this one at the moment. I have it pegged to 8 (which gives you like,
+# 2 per up/down/left/right?) But if you get one correct likely to get all correct so...
+# Check the down one. Reverse/transpose if ordered wrongly will give you wrong result.
+
+def cover_up(mat):
+    new=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]
+    done=0
+    for i in range(4):
+        count=0
+        for j in range(4):
+            if mat[i][j]!=0:
+                new[i][count]=mat[i][j]
+                if j!=count:
+                    done=1
+                count+=1
+    return (new,done)
+
+def merge(mat):
+    done=0
+    for i in range(4):
+         for j in range(3):
+             if mat[i][j]==mat[i][j+1] and mat[i][j]!=0:
+                 mat[i][j]*=2
+                 mat[i][j+1]=0
+                 done += mat[i][j]
+    return (mat,done)
+
+
+def up(game):
+        # print("up")
+        # return matrix after shifting up
+        game=transpose(game)
+        game,done=cover_up(game)
+        temp=merge(game)
+        game=temp[0]
+        done=max(done,temp[1])
+        game=cover_up(game)[0]
+        game=transpose(game)
+        return (game,done)
+
+def down(game):
+        # print("down")
+        game=reverse(transpose(game))
+        game,done=cover_up(game)
+        temp=merge(game)
+        game=temp[0]
+        done=max(done,temp[1])
+        game=cover_up(game)[0]
+        game=transpose(reverse(game))
+        return (game,done)
+
+def left(game):
+        # print("left")
+        # return matrix after shifting left
+        game,done=cover_up(game)
+        temp=merge(game)
+        game=temp[0]
+        done=max(done,temp[1])
+        game=cover_up(game)[0]
+        return (game,done)
+
+def right(game):
+        # print("right")
+        # return matrix after shifting right
+        game=reverse(game)
+        game,done=cover_up(game)
+        temp=merge(game)
+        game=temp[0]
+        done=max(done,temp[1])
+        game=cover_up(game)[0]
+        game=reverse(game)
+        return (game,done)
--- a/puzzle.py
+++ b/puzzle.py
+import matplotlib
+matplotlib.use("TkAgg")
+import matplotlib.pyplot as plt
+
+from tkinter import *
+from logic import *
+from random import *
+from agent import *
+import numpy as np
+
+TRAIN = 5
+SIZE = 500
+GRID_LEN = 4
+GRID_PADDING = 10
+
+BACKGROUND_COLOR_GAME = "#92877d"
+BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
+BACKGROUND_COLOR_DICT = {   2:"#eee4da", 4:"#ede0c8", 8:"#f2b179", 16:"#f59563", \
+                            32:"#f67c5f", 64:"#f65e3b", 128:"#edcf72", 256:"#edcc61", \
+                            512:"#edc850", 1024:"#edc53f", 2048:"#edc22e" }
+CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \
+                    32:"#f9f6f2", 64:"#f9f6f2", 128:"#f9f6f2", 256:"#f9f6f2", \
+                    512:"#f9f6f2", 1024:"#f9f6f2", 2048:"#f9f6f2" }
+FONT = ("Verdana", 40, "bold")
+
+class GameGrid(Frame):
+    def __init__(self):
+    
+        self.DISPLAY = False
+	
+	if self.DISPLAY:
+	    Frame.__init__(self)
+        self.commands = {   Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
+
+        self.grid_cells = []
+        if self.DISPLAY:
+            self.grid()
+            self.master.title('2048')
+            self.init_grid()
+        self.reset()
+        self.history = []
+        self.count = 0
+        self.agent = qLearningAgent2(self.matrix)   
+        if self.DISPLAY:     
+            self.key_down()
+            self.mainloop()
+        else:
+            while self.count<=TRAIN:
+                self.key_down()
+
+    def reset(self):
+        self.init_matrix()
+        if self.DISPLAY:
+            self.update_grid_cells()
+
+    def init_grid(self):
+        background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)
+        background.grid()
+        for i in range(GRID_LEN):
+            grid_row = []
+            for j in range(GRID_LEN):
+                cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)
+                cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)
+                # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)
+                t = Label(master=cell, text="", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)
+                t.grid()
+                grid_row.append(t)
+
+            self.grid_cells.append(grid_row)
+
+    def gen(self):
+        return randint(0, GRID_LEN - 1)
+
+    def init_matrix(self):
+        self.matrix = new_game(4)
+
+        self.matrix=add_two(self.matrix)
+        self.matrix=add_two(self.matrix)
+
+    def update_grid_cells(self):
+        for i in range(GRID_LEN):
+            for j in range(GRID_LEN):
+                new_number = self.matrix[i][j]
+                if new_number == 0:
+                    self.grid_cells[i][j].configure(text="", bg=BACKGROUND_COLOR_CELL_EMPTY)
+                else:
+                    self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])
+        self.update_idletasks()
+        
+    def key_down(self):
+
+        self.agent.set_state(self.matrix)
+        key = self.agent.act()
+        self.matrix,done = self.commands[key](self.matrix)
+        reward = 0
+        if done:
+            self.matrix = add_two(self.matrix)
+            if self.DISPLAY:
+                self.update_grid_cells()
+            if done!=1:
+                reward = done
+            # print(reward)
+        else:
+            reward = -10
+
+        if game_state(self.matrix)=='win':
+            reward = 1024
+            print("win")
+                # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
+                # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY)
+        if game_state(self.matrix)=='lose':
+            # reward = -10
+            # reward = np.log(np.max(self.matrix))
+            # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY)
+            # self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY)
+            print(np.max(self.matrix))
+
+        self.agent.update(self.matrix, reward)
+
+        if (game_state(self.matrix)=='win' ) or  (game_state(self.matrix)=='lose'):
+            # print(self.agent.W)
+            self.history += [np.max(self.matrix)]
+            self.agent.reset()
+            self.count += 1
+            self.reset()
+
+        if (self.count == TRAIN):
+            np.savetxt("train_result_after_"+str(TRAIN)+".txt", self.agent.W)
+            plt.plot(self.history)
+            plt.show()
+        # print(reward)
+
+        # self.matrix
+
+        if (self.DISPLAY):
+            # Tell Tkinter to wait DELTA_TIME seconds before next iteration
+            self.after(100, self.key_down)
+
+    def generate_next(self):
+        index = (self.gen(), self.gen())
+        while self.matrix[index[0]][index[1]] != 0:
+            index = (self.gen(), self.gen())
+        self.matrix[index[0]][index[1]] = 2
+
+gamegrid = GameGrid()
--- a/train_result_after_5.txt
+++ b/train_result_after_5.txt