Commit 4872db3e authored by Wen Yao Jin's avatar Wen Yao Jin
Browse files

go

parent ab57a721
No preview for this file type
2048-python
===========
Based on the popular game [2048](https://github.com/gabrielecirulli/2048) by Gabriele Cirulli, here is a Python version that uses TKinter.
![screenshot](img/screenshot.png)
To start the game, run:
$ python3 puzzle.py
Contributors:
==
- [Tay Yang Shun](http://github.com/yangshun)
- [Emmanuel Goh](http://github.com/emman27)
\ No newline at end of file
......@@ -11,10 +11,11 @@ class Action(IntEnum):
RIGHT = 4
class afterstateAgent:
def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2):
def __init__(self, mat, TD_lambda = 0.0, alpha = 0.0025, epsilon = 0.01, verbose= True, symmetric=1, tuple = 2, mono = 0.0025):
self.state_per_tile = 12
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.alpha = alpha
self.mono = mono
# self.gamma = gamma
self.epsilon_origin = epsilon
self.epsilon = epsilon # e-greedy
......@@ -23,11 +24,13 @@ class afterstateAgent:
self.forget = self.TD_lambda
self.symmetric = symmetric
if tuple==0:
self.tuple = self._tuple()
self.tuple = self._tuple()
elif tuple == 1:
self.tuple = self._tuple_advance()
else:
elif tuple == 2:
self.tuple = self._tuple_advance_plus()
else:
self.tuple = self._tuple_restricted()
if verbose:
print(len(self.tuple))
self.W = self._generate_dict()
......@@ -60,6 +63,27 @@ class afterstateAgent:
print(list)
return list
def _tuple_restricted(self):
list=[]
for i in range(4):
l = []
for j in range(4):
l+=[(i,j)]
list+=[l]
for i in range(4):
l = []
for j in range(4):
l+=[(j,i)]
list+=[l]
for i in range(3):
for j in range(3):
if abs(i-j) == 1:
continue
l = [(i,j),(i,j+1),(i+1,j),(i+1,j+1)]
list += [l]
print(list)
return list
def _tuple_advance(self):
return [[(0,0),(1,0),(2,0),(3,0)],\
......@@ -164,8 +188,17 @@ class afterstateAgent:
return -1
if done== 1:
done = 0 #1 means no reward
phi_array = np.array([w[i] for w,i in zip(self.W, self._index(afterstate))])
phi_array = []
for i,w,ind in zip(range(len(self.W)),self.W, self._index(afterstate)):
phi_array += [w[ind]]
if act:
for i,w,ind in zip(range(len(self.W)),self.W, self._index(afterstate)):
if self.mono > 0:
if i in [0,3,4,7]:
phi_array[i] += self._bonus(ind)
# in the act phase, explore when there is 0
if 0 in phi_array:
if self.verbose:
......@@ -184,10 +217,14 @@ class afterstateAgent:
self.set_state(afterstate)
self._update_trace()
target = self.alpha * self._target(next_state) # afterstate don't need reward
for w,tr,n in zip(self.W,self.trace,self.N):
for i,w,tr,n in zip(range(len(self.W)),self.W,self.trace,self.N):
for k in tr.keys():
n[k] += tr[k]
w[k] += target*tr[k]/np.sqrt(n[k])
t = target
# if self.mono > 0:
# if i in [0,3,4,7]:
# t += self.mono*self._bonus(k)
w[k] += t*tr[k]/np.sqrt(n[k])
if self.verbose:
print("reward: "+str(reward))
print("target: "+str(target))
......@@ -196,6 +233,11 @@ class afterstateAgent:
self.count+= 1
return
def _bonus(self,t):
dx = np.diff(list(t))
yes = int(np.all(dx <= 0) or np.all(dx >= 0))
return yes*(2**sum(list(t)))
def update(self, next_state, reward):
if self.first_step == True:
#don't update the first time
......
......@@ -32,20 +32,36 @@ FONT = ("Verdana", 40, "bold")
class GameGrid(Frame):
def __init__(self,args=None):
if args["continue"] != None:
continue_filename = args["continue"]
else:
continue_filename = None
args.pop("continue")
if args["display"] != None:
self.DISPLAY = int(args["display"])
else:
self.DISPLAY = 0
args.pop("display")
for k in list(args.keys()):
if args[k] == None:
args.pop(k)
else :
args[k] = float(args[k])
if "train" in args.keys():
self.train = args["train"]
args.pop("train")
else:
self.train = TRAIN
self.DISPLAY = False
if self.DISPLAY:
if self.DISPLAY > 0:
Frame.__init__(self)
self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right}
self.grid_cells = []
......@@ -53,11 +69,17 @@ class GameGrid(Frame):
self.grid()
self.master.title('2048')
self.init_grid()
self.reset()
self.history = []
self.count = 0
# self.agent = RandomAgent()
self.agent = afterstateAgent(self.matrix,**args)
if continue_filename != None:
f = open(continue_filename,'rb')
self.agent.W = pickle.load(f)
f.close()
if self.DISPLAY:
self.key_down()
self.mainloop()
......@@ -178,6 +200,9 @@ if __name__ == '__main__':
parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling")
parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration")
parser.add_option("-u", "--tuple", dest="tuple", help ="the tuple to use")
parser.add_option("-c", "--continue", dest="continue", help ="the file to continue training")
parser.add_option("-d", "--display", dest="display", help ="display result")
parser.add_option("-m", "--mono", dest="mono", help ="bonus for monotonicity")
(options,args)= parser.parse_args()
print(vars(options))
start_time = time.time()
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment