From 8bc74b997cc61ee72eccafebed6c6ccba068a954 Mon Sep 17 00:00:00 2001 From: Jin Wenyao <wen-yao.jin@student.ecp.fr> Date: Thu, 23 Mar 2017 22:51:06 +0100 Subject: [PATCH] go --- __pycache__/agent_afterstate.cpython-35.pyc | Bin 8946 -> 9015 bytes agent_afterstate.py | 5 +- test.py | 88 ++++++++++++++++---- train_0.0025_0.0_True_result_after_2000.txt | 0 4 files changed, 76 insertions(+), 17 deletions(-) delete mode 100644 train_0.0025_0.0_True_result_after_2000.txt diff --git a/__pycache__/agent_afterstate.cpython-35.pyc b/__pycache__/agent_afterstate.cpython-35.pyc index 868c7462c3ecc9b239e39999006428952923ed71..c98f07408ce6dd5b46f6601976789ee53da6ad7a 100644 GIT binary patch delta 2335 zcmez5y4{UajF*>dvh<aR+>M;?80*;>QrH<%I2ckm8CuvGqL>*{xENBn8Cn<^qF5MG zco<T6!E9EB6h4L&elVMjAw_^8MG(wpXGjrZND&6JIT%tz7*a&RY)*z0F@_XzFq?}Z zMS>wkk|9Nkp@oqlikl%tnju97EX7mLkRr>FA_wO1GNi~eq$q&dd<-dy3@J)rHa}Ru zGDC_ALy9Ux3ll?>07HrzLy9_BR*)e@gCRu|%obuu(PBu^2D613Qgj$nbir&9kO{$> zdbju!(@Ii{ic1npQXSJ%^Gebue`Qi-VPjxmm@LUG%~(9yl-Z}Am7#-yA)c3^h6&8z z1Cd!^GM=BIh8@I>7htI2U`SzN$Yx?F>II2p2{JS@fEe*Y40*a>S!S^8YLH--Fp?}k zSVRONQp3hjbeW+<l%Yh7p@X5GAxns%nGu8-I~W+U#9<00!1~}^Nd}NUaYm3nDTWjl zh7?wiXF*yd7+fF*G&3?VmPj$QGccqvGNiD9l(d0enI!~M0`jXg*kl=o8j!8Z42&Ry zV?YLj43h@Cq=pToCSDdqX30&S$<oWHGFhE<6R#=*14A(r0|NsmgQooCXRPl0e5nP+ znK}7+@%crW>6v+xb=Y<&f>aeLF)%RPVoguX%}p#)VPIe=G6oUqAVPC8E4wnE5d#AQ z10yG+E+Z!sC*x)l_LYo`I+O2lXtHWBFfizB=HWce$g0J_z~Hd?4%b&kR&53b2Di!A zc$~Oc7#J9wK`ybMti-FKRl~r*lEuPM!@!Wm%8<g$AZ@`E&%%(xz!0p-;#b7Tz`zia zd4ZjQL6dEAB(D}b#5Ky3_i{-|m@qIf_<&5}0Xv3^krNEXCU4*^Wz?Lk#;43`3(`N? zkMEX%CdlnhAi@$vSWLFy*X0Dc;}&B|k@e&}ehF4x1_p+x$=&?t8J#Cb3n(zAPOcHK z)CXzQWGa#Y>GA>*f(#4{C;~#Dz+#Kf%u7kFNZI^EfSYmhLtznCNd^XntjQcgtstHQ zh?fg;EHnbxZ!zT+6d8cDvL+T4C05>IuF5PZ(gO>CjVkg+wrKK0AqmFx$-jj5I)nUt zizO#N-KYrcI5s55fqlf1oS&O?i?OK48s^?2kfV!04lVLzU|`7K{7YDwk+F8NoT#-d zIKtIHD!~S`B^IUUCRP;rf<!DPmx(&FhB7cPRBzrVYRhN?R_6$k0tMqO*36RB+~Qlz z1^ER<ps+6T14;RVJi(lvTB6ATaheB6)OxbHxH)6v=6rEMM&2-p-^4)@;gfqMR2gSX z-Yj7y0CGZ61V|KW|6d77w*Zi0R)|r_Ah{@z1zd;{q9_C;3i6{SQxRANIJ!Xoy~Q41 zl30|US~6{NjHDgo_sO#*Z5cTx-<0&^eaVo*2+En+Yz##&C+kZYSiA(KLzXNaP<dg_ z6wd?7h&A96ri2fqvY8Q728##7c{QvI;=!7nll!D%K(P#t^Cgp+rETgR7#JARLFp2N zGX>Nc7+(JW|Np-xe-StlrGOj^_AJ<+;9vvAJvh=KxlIS;Nw%WIyp;UhTPz?NlK4R; z7lGpelzL!63<?s6(V*BWVoaU9QCbw_CN5A=&jF=8mB~+}mDxBM`53FDC-X~7Pgau& zltbtP1s_s60l9GF=BY9Z8S6ory(kIfVo<qIlm=oUTntVP`XDhe5CQVkEtcZKq7o!; zFc+5;6^VkR?LcwKiHOTvY(=T%iA5<zsbKFy;;Kjjq%;8}!kAYyd8?cX<Ce*{<f0i_ zCY#B7)iX0>voI9xXQ*gkC}9Par_GEEF0nBTwQLL}Yz#HbpklC=8O&w{vspp$n#Img z!wRY@Knj={7)v;ilyZRNni&~tu_`T$VF=F?h8d#Ch-69)Ggug0gA_9`mT)qZa6!#2 z;hJo(AZE?#7vcsk$ci{Ye&;T-2YC(@yhV{97C1eCi&hJehdE1%67z}+@{3c83MNlb zkX6b7>0(EOSP`fgxy4uvN%Z-k8u5X!2;VKX(t?!4lGG!hs8*Q#Q9+cOhf$c3hmnU- zidlHFsA8->*d|b^QUuDOC}kL0<bz`T=H!Kn)?wx#CxXiKB2Z~w1S-diKm{)(yMu~M zNRkERQE=H(1Wte8)B;W@pyXEs_H6;ks8SGNG}%Z=Ls5=LgollXjfaDWjfabenV*A) rhlh<vl!r}7l!uu|fJbz4wUWFbI6ZM@mZTPy<mcxU7r9Pet>g{>=$Xym delta 2454 zcmdn)_Q{n~jF*=yo9kkP#YWC|jP<MxDQpZW><lR!3@z*oQOpb}oD3;k3@r={Q7jB8 z+zcr^U^Xj53NJ$nADGR?kiyT9A^>KyGo%PIqzHl891JPK3@IXDHYY=hC_{=En9aqI zBF>N^!H^=!(89<N#m$f+#gHNmmg1>rNReSkkp**j8B*jJQslvGK86$ph7?6Gn;)!S zi6KRqAw`9ug^3|bfFVVdAw>-=E69+d&XA%3W(zT-XfmW|f!V?gDcTGvI$*X4$b?`` z-CO*LX(g#e#U+U)sgCKXc_rzSzcQ&#mS>h?%$jV)>{HLg(80hE&kG{6z+^lhLk&BK z8_&;B!@-ck#E{LzP}Bkv$r4~_W&knb1sU>m!LrO?*+n41EFmOWez1rzLZpU`q39Gt zi3mf9C|F96p_!4PnX#UMv4epjOAMw^94ravN-%)*@iBt*Niw9cFr=`8ya&=E&fo$z z0b~wii6lci149}kIBHoL+Q6>N5`-xMc~J^%vNS^t$XW(Qkh#H(B~l__VJWaFHEbYG zybOrUlAXMqrI%58vMK8(UKIughGHfL1_n+BO}WWmSluUkvu#%di54j_FfiO=O;60t zO)OGoU|=XR0ugE;LSwQryE2af0|NsCBPXLSBPY}55cZXfjM|g`acHutGcYh{Z`R>F z&B&_Bz`$U?`487uMpi8b1_sy3-*}w3K-M{f?A4s?#H+!d!oU!$$?R9e$iTo5qRBFO z3a{4W2VCM}#taM$-XLXMV0*Y2IYDspcivJ)jmf!u%B(gF3=Bb&r}Etr&;U8g5ky#k z2=mGH{JLsRAonw-6j_0Uq(OuroDc%BbQl;IA|_wrKd%H*s>xI&1Jc0?PEkc*wP0nE z3=9m(lMf46>VvF-Dl75?nE+CRA^_5Gi!DAgFD11iX|uK<H>0E_Od*<)8I%2mT61y9 zW`jHq3MftnP4-(%c?CuKV8<mE6(v^QVy?<8DAENBfUPR>VqjoEaR|s|2n%nq#1~{_ zrcQPg-s=ns(pxM!`RPVQVE?dzy$$gXC>(CFB<JTQ-C`_)1RSbCMIH<c47r;fMU)vC zt0$+5TFZiyiYmyDV1wBbi_&uwD~fzTBIc9#iaN7~FfcGwZe|s;WwZgSa{x(!;^`J^ zW=U#p@h#?p{DLA-WES~?r2IhsVopyj(d2+Q%^f6aHMvgQoUwlMHgQ2l-cX2l#X%Bb zldnjqGESTPOTwxi<b<MdkSN4{5K9z9u!79wEJ-aciO)-|C@Jy>8NdoLJ_)2C5~P3& zQKA$DgG51|*JLUJs{kdAB5?e&$Co4)rKgrm0mTs;C~-1bF-kC2iBH}vsmJ(k@*_!G zM)t`HQl7jI7*ZHP`8=DAq3FTnGART72Mjd~3@lkZpwh>jDV~R+gcqcs8JxexgW<dy zRtE84O^(U;q+)QUphc5|rETi%85kHc1=JZBUjG08|Gy?5BqozV&IbnvI3&OU3raBH zlmf}g+8{5p6(#1S<mcXE0nw0b4l)p&+F;=i3LA*2pr|ZL0qJ8&$<IrT;w;FREF;4q zhb<L@;%F8qgp?*b$tbh%F;+=Wj+d66+$Iw!hcFQo!iaQQ1akJe&0l2}GS(Y{e2@qt zKm}1zDu{*f1~?JvfyBf>1jwhiSc(gaN|5}<TwGFA1de`NP;_%5qWczGQEGW&Q3}K_ zY>?<Ik^q?$5Aq#jUeRPxc@@Tulda^V8KoyrllQ7;WXR@VD7wi|F`c1=l_7;0RIa+j zmNC?_F_f?|)PVW5%%JGaVux}WK@zMWg|)0;2@ZxDR)!iTurxCRV+kji#mIo6j}xS@ znUMjZPZU{WVGKifo&ZA$$K+TA=|pkH5-x@kZm3xx1_MJD*gfLSj0{bT4B!%$#V^FI zh!YeLJVkaOpMt`=C<4Ut1QFnZ-5lh5&XS_UyyAlV;?yE=B;-%NsUWKYDv>nV5n))A z15(Ra49O{ZAR{a%ODRfe9Rx)>s90cN<Y(e!WMPzJWMkxK6lUaM<YnZ6(^AX=lcN-4 z^}&{d%C90&#>Z2LfgN*W@-s#2Ff)*CplYTFRKXO1s+J;9bpa_wK;=3lorAJDxX3C3 zr(AGq0w*L;;shtQqI{51B_P6ZGNZEi<US=qMOhvZ9yT5}9(Eo!9xfhcehwZU9yT6P m9yTEn9%dc^9`VTsl;j1$X^u0qB(<m{KR>6q$Yt^eC3gT%V$^d0 diff --git a/agent_afterstate.py b/agent_afterstate.py index 642b33c..0e98b40 100644 --- a/agent_afterstate.py +++ b/agent_afterstate.py @@ -16,6 +16,7 @@ class afterstateAgent: self.commands = { Action.UP: up, Action.DOWN: down, Action.LEFT: left, Action.RIGHT: right} self.alpha = alpha # self.gamma = gamma + self.epsilon_origin = epsilon self.epsilon = epsilon # e-greedy # self.TD_lambda = 1-epsilon # TD(lamdba) self.TD_lambda = TD_lambda @@ -77,6 +78,7 @@ class afterstateAgent: self.count = 0 self.first_step = True# used to avoid update the first time self.explore = 0 + self.epsilon -= self.epsilon_origin/2000 return def _reset_trace(self): @@ -168,6 +170,7 @@ class afterstateAgent: i = np.random.rand(); if i < self.epsilon: #explore self.explore += 1 + self.forget = 0.0 return sum(phi_array) + 10000 return sum(phi_array) + done @@ -200,11 +203,9 @@ class afterstateAgent: if self.symmetric>0: for i in range(4): s = transpose(s) - self.set_state(s) n = transpose(n) self.one_side_update(n,reward,s) s = reverse(s) - self.set_state(s) n = reverse(n) self.one_side_update(n,reward,s) #one loop is one rotation diff --git a/test.py b/test.py index e4dc0fa..ee393df 100644 --- a/test.py +++ b/test.py @@ -1,15 +1,21 @@ +import matplotlib +matplotlib.use("TkAgg") +import matplotlib.pyplot as plt + from tkinter import * from logic import * from random import * from agent import * from agent_afterstate import * - import numpy as np import pickle import time +import sys +from optparse import OptionParser +import os -TRAIN = 100000 +TRAIN = 2000 SIZE = 500 GRID_LEN = 4 GRID_PADDING = 10 @@ -25,8 +31,18 @@ CELL_COLOR_DICT = { 2:"#776e65", 4:"#776e65", 8:"#f9f6f2", 16:"#f9f6f2", \ FONT = ("Verdana", 40, "bold") class GameGrid(Frame): - def __init__(self): + def __init__(self,args=None): + for k in list(args.keys()): + if args[k] == None: + args.pop(k) + else : + args[k] = float(args[k]) + if "train" in args.keys(): + self.train = args["train"] + args.pop("train") + else: + self.train = TRAIN self.DISPLAY = True if self.DISPLAY: Frame.__init__(self) @@ -40,16 +56,16 @@ class GameGrid(Frame): self.reset() self.history = [] self.count = 0 - self.agent = afterstateAgent(self.matrix) - f = open("train_0.0025_0.0_result_after_2000.txt",'rb') + # self.agent = RandomAgent() + self.agent = afterstateAgent(self.matrix,**args) + f = open("train_0.0025_0.5_0.0_result_after_2000.txt",'rb') self.agent.W = pickle.load(f) - f.close() - print(self.agent.W[0]) + if self.DISPLAY: self.key_down() self.mainloop() else: - while self.count<=TRAIN: + while self.count<=self.train: self.key_down() def reset(self): @@ -93,6 +109,10 @@ class GameGrid(Frame): def key_down(self): + if self.count>=1: + self.agent.verbose = False + if self.agent.count >10000: + self.agent.verbose = True self.agent.set_state(self.matrix) key = self.agent.act() self.matrix,done = self.commands[key](self.matrix) @@ -102,27 +122,50 @@ class GameGrid(Frame): if self.DISPLAY: self.update_grid_cells() if done!=1: - reward = done + reward += done + # print(reward) # else: - # reward = -10 + # reward = -0.5 + + if game_state(self.matrix)=='win': print("win") + # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY) + # self.grid_cells[1][2].configure(text="Win!",bg=BACKGROUND_COLOR_CELL_EMPTY) if game_state(self.matrix)=='lose': - print(np.max(self.matrix)) + if self.agent.explore>0: + print("explore: "+ str(self.agent.explore)) + # reward = -10 + # reward = np.log(np.max(self.matrix)) + # self.grid_cells[1][1].configure(text="You",bg=BACKGROUND_COLOR_CELL_EMPTY) + # self.grid_cells[1][2].configure(text="Lose!",bg=BACKGROUND_COLOR_CELL_EMPTY) + print(str(self.count) + " : " + str(np.max(self.matrix))) # self.agent.update(self.matrix, reward) if (game_state(self.matrix)=='win' ) or (game_state(self.matrix)=='lose'): # print(self.agent.W) + if (self.count == self.train): + f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+"_"+str(self.agent.symmetric)+"_result_after_"+str(self.count)+".txt",'wb') + pickle.dump(self.agent.W ,f) + f.close() + f = open("train_" +str(self.agent.alpha) +"_"+str(self.agent.TD_lambda)+"_"+str(self.agent.symmetric)+"_history_after_"+str(self.count)+".txt",'wb') + np.savetxt(f, self.history) + f.close() self.history += [np.max(self.matrix)] self.agent.reset() self.count += 1 self.reset() + # plt.plot(self.history) + # plt.show() + # print(reward) + + # self.matrix if (self.DISPLAY): # Tell Tkinter to wait DELTA_TIME seconds before next iteration - self.after(100, self.key_down) + self.after(50, self.key_down) def generate_next(self): index = (self.gen(), self.gen()) @@ -130,6 +173,21 @@ class GameGrid(Frame): index = (self.gen(), self.gen()) self.matrix[index[0]][index[1]] = 2 -start_time = time.time() -gamegrid = GameGrid() -print("--- %s seconds ---" % (time.time() - start_time)) +if __name__ == '__main__': + parser = OptionParser() + parser.add_option("-g", "--TD", dest="TD_lambda", help ="TD_lambda the forget coefficient") + parser.add_option("-a", "--alpha", dest="alpha", help ="alpha the learning rate") + parser.add_option("-t", "--train", dest="train", help ="training episodes") + parser.add_option("-s", "--symmetric", dest="symmetric", help ="symmetric sampling") + parser.add_option("-e", "--epsilon", dest="epsilon", help ="epsilon the exploration") + parser.add_option("-u", "--tuple", dest="tuple", help ="the tuple to use") + (options,args)= parser.parse_args() + print(vars(options)) + f = open("train_0.0025_0.5_0.0_history_after_2000.txt",'rb') + history = np.loadtxt(f) + f.close() + plt.plot(history) + plt.show() + start_time = time.time() + gamegrid = GameGrid(vars(options)) + print("--- %s seconds ---" % (time.time() - start_time)) diff --git a/train_0.0025_0.0_True_result_after_2000.txt b/train_0.0025_0.0_True_result_after_2000.txt deleted file mode 100644 index e69de29..0000000 -- GitLab