From bf10beba658d7dd655f6b43edf04ad3a729cf87e Mon Sep 17 00:00:00 2001
From: cloudera_vm <cloudera@quickstart.cloudera>
Date: Sat, 18 Mar 2017 10:29:57 -0700
Subject: [PATCH] Preprocessing test on pg100_test (5 lines with 1 empty)

---
 .../Preprocessing_1_test/.part-r-00000.crc    |  Bin 12 -> 12 bytes
 .../nb_output_records.txt                     |    1 +
 Assign2/Preprocessing_1_test/part-r-00000     |    8 +-
 Assign2/WordCount/.part-r-00000.crc           |  Bin 1948 -> 1944 bytes
 Assign2/WordCount/WordCount                   |   34 -
 Assign2/hadoop.log                            | 1821 +++++++++++++++++
 Assign2/pg100_test.txt                        |    3 +-
 Assign2/pg100_test.txt~                       |    2 +-
 .../src/Preprocessing/Preprocessing_1.java    |  196 +-
 Assign2/src/WordCount/WordCount.java          |   62 +-
 10 files changed, 1905 insertions(+), 222 deletions(-)
 create mode 100644 Assign2/Preprocessing_1_test/nb_output_records.txt

diff --git a/Assign2/Preprocessing_1_test/.part-r-00000.crc b/Assign2/Preprocessing_1_test/.part-r-00000.crc
index 80a2402158e63b6e8612ff830e75990dc9c4bce6..a82a4326b15ab48d1271a57d45ab704cfe992207 100644
GIT binary patch
literal 12
TcmYc;N@ieSU}9*iS@Qt^6I27)

literal 12
TcmYc;N@ieSU}E5(ziABs5vc=5

diff --git a/Assign2/Preprocessing_1_test/nb_output_records.txt b/Assign2/Preprocessing_1_test/nb_output_records.txt
new file mode 100644
index 0000000..bf0d87a
--- /dev/null
+++ b/Assign2/Preprocessing_1_test/nb_output_records.txt
@@ -0,0 +1 @@
+4
\ No newline at end of file
diff --git a/Assign2/Preprocessing_1_test/part-r-00000 b/Assign2/Preprocessing_1_test/part-r-00000
index d442095..96f5477 100644
--- a/Assign2/Preprocessing_1_test/part-r-00000
+++ b/Assign2/Preprocessing_1_test/part-r-00000
@@ -1,4 +1,4 @@
-0,anyone anywhere ebook cost use at no
-78,restrictions whatsoever copy almost away give may or no
-148,included license terms under re gutenberg project use
-217,online www org ebook gutenberg at or
+0,anyone anywhere ebook cost use
+78,restrictions whatsoever copy almost away give may
+149,included license terms under re gutenberg project use
+218,online www org ebook gutenberg
diff --git a/Assign2/WordCount/.part-r-00000.crc b/Assign2/WordCount/.part-r-00000.crc
index ad5e3c9b250270c739a2fbce7362301e8dbb1cf0..7d9f624a0cc001355f01cb4c180dd6cb549c6884 100644
GIT binary patch
literal 1944
zcmYc;N@ieSU}De+YAD&D=ax~%@X%@Vs`oR@>bACe?D|)?>g!fU6%C_xN>|My45qAq
zF`>6y`eb%wuJ#hSj8#@Ao=?AgWX6%lHwCMeXWB*U*@!7>cid?*n*Lh&{QbW5-yR=2
zYahO2@&7cBJSEqQ%noa06^wqoS9T3tBV1iMN%N1e=<>pZwZVF9N!smd_dhcq@XXg(
zu;q6CL>r~MMvLZdD*hkqd&99^Otn?d=R1#6fz-E~Oc56kU15Cg;;t+-dEKSxuRcOt
z{@VZb!%LLqSeO4|T~=WDXwUL1W$(Na9_j8DuQ|I___#z!+Jb3D!A8~2p3RawPn_H|
z)vQ!$?i?4lUmac1kwSCbGXnfhFS+;pTK5~pJtyXv#`f5RY`$Rh^kztD-Tzt7mhxEL
zIvOp^zP54E*Q1S28Me(#wSs42b5pce<XqS?N5oj@g$S2H_cF1)KUduU@G9@<mpw24
zx%fxE@s2)~KEE};;fTQgKglX=YcmYRZ|*5AlgQ>fr4Ui%oqO`_kJ}czPJQP-+8-6G
zcW=%Ueg7Y-k9Tm+_|o})%9rB{F0WWq&hX;rYYF}Ig66AI-e)DhUpnwCO#aZ#x!d<G
z;1T?>^XQ{lnFns@esleHX0FuKhQmiVKa{-Rw)N9vr*Es&c;Yzs8dV>4U8Q;MT8rf!
zo;yk1s()1z#LVYh-2Z@yZ`!`EKeRSJx_GPZ-;7Ba%GGw(^Kb2`{2i~x%Gmmw`Ci?v
zIrk=*IqnMd+m~l?$?mWFvyDc{h7mI!)-&=woWG03xThn?^t#PlEft@m28P=n9CPs1
z;BwGf!)ZPFU*4jtdR?J99_J5itg`Tq5Sv$j=Irq-lWA9)ni8t++<*6Q{Ze1qTf8v~
zY;^C|-%?KbwOvdv;%n-Ujg8XreHv#j?|2=&lHK4*<?4H$I#1UzW|W`G-tfU+cJ;iM
z1~v_0M?-n;{kpRzX2EXdW%IXlS++&wcpS}+%4k?LXY=#!>lrKWFnecO_!aMYu<GwI
z|MzjzjPw2XYcBV2TlB%|;tWyGqMnnNo+PE1KdpJ08Oi6KYgoCveZ{XMFI^A)WM|8r
z|GDh3V8OwIE{lGN|9iLa&-3IMsm(Jwx1HEwH2KfMjEN@>PJI~|U*;yeg0J_8*3HKY
zxEjoyk}qV(&3mfG%<8>x#%9Kby}88=7hj%uYgI6DhDy{iruM@^SG{H1dL{(Ke7>I)
zx#CICl#1QUO!h~=jyij%;$#{3<oVmVjlD{|wwUXc$v><KUN|S9xqD@C_uZRE{{9r2
zwz6T;+uN2ajZE!Co_&8N<9a~xqlD?9rFRV`hwZShE9<rEaJ<`l|7V64%l{QdHVZ$j
zZ<Wj36m3uyv(0g1P4>HuD>U?f@agb)^0=RgP_((-<+|gc^5g$n#u>BYk7x5)pAXkn
ziZ{6<^gT?`scWCK$kc83=boRxPvFc-na}lcyHaoIyvqI^>$dXg>Sq^)w@g{Y6<V@0
zZAMG^#qXb+xHVoD&aXcbZ!-Vxqj}Hu3>4mpw#sZ<!CDdOqS&{6$JfW)C)>ByKgmtj
zW72;8{if`S?C|YdeCiTZwlh10>~4|FY2Li|*0k?;p6BYdyibse<M3Pbuh&53cVQDp
z(2s?4pUzj<DN$IuZNbCJiVLbcwi~JW<Q>e`+*`$C_kp?D-Xqzd&$ixT&ph5UGj#R|
zb+FH7%~}#-B<GOYy1TN@w(V{3@$+K+xqOa`8ZVzHXstcL?fd=ZVh4rjW?_rX7hbU`
zE*CUe_~4W8nzp@;_kW+vbO<oM__=VQ$_<Tq93Ac*%8P%MbTDRJ;LTnA?`x}MtL}7N
z3Fg-V;s-uXyyv^l)~C~B=kMgF+gDmC?6YD||B<ov$gddY3+?eN^D>@_an5~jaejNc
z+FYh-KRv^)dgpewK9JAy$lo*l^@7`O?C-<Y-Zz@w>9pmbCm;V$qs*y|itH+Zsu{H_
zceXzjSbTAoe#_Unm5yguUVFIJ;L4s8?LRV_o^x!ly;R;Nt-Srj52KxzT~}(hY53iY
zDw>gaN3hm(hJt5X=aeFa-@fv`flFPxKQ_GMUiRxjs9u><)vAoFZE5;jF8fZIpuw|4
zEqmUEGjA9CIvUJs5$KcXx+`X(qyB`8etUkoo{diOdQ_itb?b&XYu+TpA6T+;!+y*7
z9KV-Y&N~iNEs|F){l(QVb+ejf?aYpVyX`k8DNM_+U-P6P&Hs1fb5D2vQs!B#d{xaQ
zzZ=h<4`!Q_(_rB9O^JWol+bx<hJ|5fB~=M^CKjKQPH1ZNuh0pcy7uL?lNP(~#l8A^
z^Z!PH&KXC(EWG-LCG1npp`ENpEk6Ei&su&nTs!yWn-eQPO+9ElZ|n75zl1F}Kb+**
zX18gjxcj@;r=^v?ubIbpZW3p^;X+sTtk@+vZdXEQ&KB@}^dc<d`N{apWj!l6W=8!`
zop`DH<A3d7x450AMzcz9&xtw~e(Pbe;`R?Kz5Fj8JnqV>)8xDGVDMCK&TIGmCLLYs
z9`@Pq(o)rv>38lgIv5kXs)#$xZo66Wo}fJDn5#QJnsM^5Iko8Q`FrclW;vsZOI=T&
zuUhZ9+tx35KF?HHZ;b~BWg9G3xm8?May)k?h}E<1<<GYvJKpjcKj;1By;syr>$m=9
f;ai23YJsaBGgfauptzcq^@6iqWw^2Ysx_Mc`*qQu

literal 1948
zcmYc;N@ieSU}Cs^oO2<I<+daHHoY_0_qv6}{G`8Y@La9ijPKQa#quWZSs)d=;=Wto
z8CfCA(|mtI@74a$=@EUVaq8a<5C2meQ{OcyF<4YcNT`W_zM1$iE~8JJb(7<+^4R~G
z*FQb_c~5YI`(!qm^I!j66Ke0dDZm)GTuYy4kCa5m;h4oy0bl?53f;JQ;a>emt&I1X
z`TpHM4sLtTudrTR@zM4xIbVgAXQ`MRs1KfhO6uk6qkYczrRJ|F&fU}}x%<S~3!KG4
zQ!Tz{dN6<8v2-QZ)7NgVW|+OrT`Sz~>MFIg&ro*VdG{65x8Kr|JMC-tc&YQb4?Q-S
zItL_n1@xcaP`kAJqWjaoGHceJPcV2p;go5_v=v9h!>#@Z-wUZTj{nP^Hbq-l<K5}W
zn?>{*xcBC-@BCmHvZUNu%hiypXu5Ck<o#vY%NkFq{uX^ExXDR$zPA0sRL-!9dnt8~
zBi23ROxYxDw%hZ-x23VZ?_!#dt@qvZ|K{p@wr<R}r?P*Ddwxl?@B4P-qgsjVzNfYG
zJELWSc(1-YwMP8vlrR&yiJz}sb=<}gsJXy!s~_9*hELb)9+jup^naD#u;25Oq^5)X
zMvkyanj2qEJ2d}=<c+57*W}+xhOoUmyXl|!p|!KcSl7L3jL_Dplm6ExVsZI%XmP)@
z$rP7I@?YLW%+Of1xZ(!u#hixdzx`i!KTcvdF4}ne>p}BlRYlHXv8JzH`zIJ2-u6I$
z#=^ZatY^N79hp=6eB)c6x78uiNk^3qHLfl%@>3V_*mNo8+L~L_?T>EdZaZywYr-|<
zd|!r8%j?G$)z-C5VQc8mXAoCe^B`CB#+zE-)g==qv^g*)rSIet@9~_lpms;zWXqM`
z>QZhl=P)t;d*{m6-SzEiD^DuD^w~e5|H!U&@w-a;_||TiXqR9rCwC?`Xs_d|4|fFT
zGwpKSprtR(x-0dVzx(R~`GY$HHs%+`20mYGx~OZdUASM(hR$OwTK9_XxF)_kT6rN(
zA>%>#?45h!cOFeX$+U$jA@G{Q))ijoRxFiiULY-U+3m43vy*bYx8Cn9&oqwSI+4a$
zJ8{CvO(MUe4Z8|TCo;XTUaR)vUQf_%7vAm)Bg^#0$YXb}PE(rPylG|LFS*$dpBh};
z<-}Xre4}u-5*Nd9&krT_LErEF2&}&Ge|6z6<%3gF7}*7~?^eBkxaDuc?28f8gXEZw
zzFo9ZbNyncT-)<D#Y=D4luEMy)&Fn%?p(z8_gr5jB60-}vpzd}B>Twa8|h9>I)7}Z
zI_)#^H+!_Q!13B?%ZvK1SJLY_^7-ngrp4x2>#Se(vWPSC^uH&s1oLaRRl48J-th1G
zTdpe{W*^p>t`=Q>Q(Vpb(C(u1yT5-+uQIv6>HacNh9cFb-HN65e`c&-n3Rxkdm9_W
zr$5Pg+3g}#^5VQY#jjR%Md%u8$@euVEj23iD7AfRdZ0LD(ZxDP&#>l=_P2R-<eMV4
ztTdgVyEx^o%&#S-C81A`d<d;_<0|I4p}*ARPnk&A>&y=ke3xdHbXD<2=;(ZL+Zz&o
zS?Z&&&|3jdQ{^_^;${ZTn%$Bb^IQ+4Ote<WRM>a#vDk`qCn@s)|56?c-7@u#efK1{
z$t7=+xMmQ%w)?|zg;r0^C+fZCvjtLEc^KkF+viLwb3Yr>c2zzu!tlzJS)V30F@N}E
zR?T+6b$al$lYRa_90E=)a}8;UGMw?gN!^{dP`9l0=F`V~%|G^}Kjev8wftPVt4Y?R
z4-9iNX03DmDgW#wk0|FqleSWyV~U%r9H%&`i1O^3FEyu9>#Vq@RMfG{I=9ZJ*BidI
z-RU0QSS+sd#E^%}Ql{1Qg78eiOor;`1v)9ka>r|91#^F89KPLgcf#G(ZPRvrzU*qT
zO<mT<_wpC@6*{l?wCq^5_N1=v9W~L2ztd&wbrm;!?2HWY7G+A244qZVtRp>FA;n@t
z<ox-u(o1hH(W(2C&cFK4>XO}W1>;uiXZf_uWSz{uYhO;EiLv;;cG0Tahc?R2iuu;N
z_oBl5>Vp~9^Y1_AYm<`Gke{d0Fn#hZCs+2xn-0q#+VV-={gZp0sQo=nuLbiy_jvht
z*?)Y-r(Du7jfv&o!OIh-DXsb&mG`Q0cEZ&ntCR}XwCBDT_HX!fKcMsIC$;dBT;pOV
zr_+YJ;==TAx>cL1Br7`loqDcvYSE^taj%YkO<7~4VKUD>X8tSh4Gt&IGvBF{C{yy8
z_$=4MuI}7so5LrH-)PqaZBh@ewVxNRASbc^&`+6y^sXCg&ivVLWxpjZ$?14myH(WT
z<QSES2X6c7Z@29&dMD)1pa0{I^><&!6RA;lVhwdw`@`?7cd@Zsr?WP6DbxM)>p7FY
zwA3$Vzueu!F@d>rNwn85C$^*8=lSq!+nrh%6ffJ~d!Ti(t<=jW505rlRUPrn>#5vg
z)uQM9Fa4=@(-yT4bHi&&9$oZfnj{l(M*aAXlxg2;el!S*+?})Js&5t7wx=F~Uxb&u
zmtJv$rSoL-IXjjk_uX^WPg$jTxMXE<bord`yG{w$_($^_G`x0Rpt#8U^uBKH&-sdb
z?&SS%y}EW$Vxf}33e9QB&E1<r(q9*!)0L@oXSwzC$(_jsN*b{o^*iGVvmf=H@jLmW
ebHAzHNrCoN<x@*<%Rg>d>s?*>zc6g)|ET~%jJyT_

diff --git a/Assign2/WordCount/WordCount b/Assign2/WordCount/WordCount
index f7ce383..7cbead3 100644
--- a/Assign2/WordCount/WordCount
+++ b/Assign2/WordCount/WordCount
@@ -910,7 +910,6 @@ amply,3
 ampthill,1
 amurath,2
 amyntas,1
-an,1896
 anatomiz,2
 anatomize,3
 anatomy,4
@@ -1386,7 +1385,6 @@ astronomers,1
 astronomical,1
 astronomy,1
 asunder,15
-at,2536
 atalanta,2
 ate,3
 ates,2
@@ -1514,7 +1512,6 @@ avails,2
 avarice,2
 avaricious,1
 avaunt,15
-ave,3
 aveng,3
 avenge,1
 avenged,2
@@ -1566,7 +1563,6 @@ aye,15
 ayez,1
 azur,2
 azure,1
-b,16
 ba,2
 baa,1
 babbl,1
@@ -5310,7 +5306,6 @@ cypriot,1
 cyprus,28
 cyrus,1
 cytherea,3
-d,8961
 dabbled,1
 dace,1
 dad,3
@@ -6807,7 +6802,6 @@ dye,5
 dyed,3
 dyer,1
 dying,48
-e,142
 each,240
 eager,9
 eagerly,3
@@ -7870,7 +7864,6 @@ eyestrings,1
 eying,1
 eyne,9
 eyrie,1
-f,11
 fa,6
 fabian,74
 fable,4
@@ -8511,7 +8504,6 @@ flux,2
 fluxive,1
 fly,245
 flying,17
-fo,4
 foal,1
 foals,1
 foam,4
@@ -9800,8 +9792,6 @@ gypsy,2
 gyve,1
 gyved,1
 gyves,5
-h,2
-ha,230
 haberdasher,5
 habiliment,1
 habiliments,4
@@ -9984,7 +9974,6 @@ hastily,5
 hasting,2
 hastings,149
 hasty,21
-hat,36
 hatch,18
 hatches,7
 hatchet,1
@@ -10325,7 +10314,6 @@ hitting,2
 hive,6
 hives,1
 hizzing,1
-ho,209
 hoa,5
 hoar,7
 hoard,4
@@ -10703,12 +10691,10 @@ ignorant,48
 ii,171
 iii,145
 iiii,1
-il,18
 ilbow,1
 ild,1
 ilion,6
 ilium,5
-ill,279
 illegitimate,2
 illinois,222
 illiterate,1
@@ -10727,7 +10713,6 @@ illustrious,5
 illyria,13
 illyrian,1
 ils,2
-im,1
 image,46
 imagery,1
 images,11
@@ -11812,7 +11797,6 @@ knowledge,78
 known,188
 knows,213
 kramer,1
-l,23
 la,78
 laban,2
 label,2
@@ -12376,7 +12360,6 @@ living,121
 livings,1
 lizard,2
 lizards,2
-ll,2409
 llous,2
 lnd,1
 lo,74
@@ -12640,7 +12623,6 @@ lym,1
 lymoges,2
 lynn,1
 lysander,103
-m,30
 ma,7
 mab,3
 macbeth,291
@@ -13833,7 +13815,6 @@ myself,567
 myst,1
 mysteries,4
 mystery,17
-n,159
 nag,2
 nage,1
 nags,1
@@ -14066,7 +14047,6 @@ nit,2
 nly,1
 nnight,2
 nnights,1
-no,3814
 noah,2
 nob,2
 nobility,37
@@ -14231,7 +14211,6 @@ ny,2
 nym,63
 nymph,9
 nymphs,12
-o,3053
 oak,27
 oaken,2
 oaks,5
@@ -14473,7 +14452,6 @@ opprobriously,1
 oppugnancy,1
 opulency,1
 opulent,2
-or,3199
 oracle,27
 oracles,3
 orange,5
@@ -14563,7 +14541,6 @@ oui,6
 ounce,6
 ounces,1
 ouphes,2
-our,3066
 ours,88
 ourself,24
 ourselves,115
@@ -16778,7 +16755,6 @@ quoted,5
 quotes,1
 quoth,66
 quotidian,2
-r,92
 rabbit,4
 rabble,13
 rabblement,2
@@ -17960,7 +17936,6 @@ ruttish,1
 ry,60
 rye,3
 rything,1
-s,7734
 sa,6
 saba,1
 sabbath,2
@@ -20577,7 +20552,6 @@ syracusians,1
 syria,6
 syrups,2
 system,1
-t,1213
 ta,96
 taber,1
 table,60
@@ -20865,7 +20839,6 @@ tetter,3
 tevil,1
 tewksbury,8
 text,11
-th,1177
 thaes,1
 thames,7
 than,1885
@@ -21734,7 +21707,6 @@ tyrant,60
 tyrants,10
 tyrian,1
 tyrrel,21
-u,6
 ubique,1
 udders,1
 udge,1
@@ -22597,7 +22569,6 @@ utterly,8
 uttermost,7
 utters,5
 uy,1
-v,99
 va,1
 vacancy,4
 vacant,6
@@ -22713,7 +22684,6 @@ vaunts,2
 vauvado,1
 vaux,9
 vaward,5
-ve,1
 veal,2
 vede,1
 vehemence,1
@@ -23047,7 +23017,6 @@ vulnerable,1
 vulture,4
 vultures,2
 vurther,1
-w,2
 wad,1
 waddled,1
 wade,3
@@ -23492,7 +23461,6 @@ whoso,4
 whosoe,2
 whosoever,2
 why,1476
-wi,12
 wick,1
 wicked,64
 wickednes,1
@@ -23605,7 +23573,6 @@ wishing,9
 wishtly,1
 wisp,1
 wist,1
-wit,269
 witb,2
 witch,94
 witchcraft,18
@@ -23863,7 +23830,6 @@ xii,2
 xiii,2
 xiv,1
 xv,1
-y,51
 yard,12
 yards,5
 yare,10
diff --git a/Assign2/hadoop.log b/Assign2/hadoop.log
index 4b0c489..a5c2ca7 100644
--- a/Assign2/hadoop.log
+++ b/Assign2/hadoop.log
@@ -134,3 +134,1824 @@ Caused by: java.lang.NullPointerException
 		Bytes Read=5589889
 	File Output Format Counters 
 		Bytes Written=0
+2017-03-18 08:18:14,295 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 08:18:18,093 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 08:18:18,115 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 08:18:20,321 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 08:18:20,450 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 08:18:21,079 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 08:18:22,754 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1486099625_0001
+2017-03-18 08:18:25,049 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 08:18:25,051 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1486099625_0001
+2017-03-18 08:18:25,063 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 08:18:25,153 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 08:18:25,163 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 08:18:25,683 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 08:18:25,685 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1486099625_0001_m_000000_0
+2017-03-18 08:18:26,040 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 08:18:26,076 INFO org.apache.hadoop.mapreduce.Job: Job job_local1486099625_0001 running in uber mode : false
+2017-03-18 08:18:26,078 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 08:18:26,170 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 08:18:26,184 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100.txt:0+5589889
+2017-03-18 08:18:27,663 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 08:18:27,678 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 08:18:27,678 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 08:18:27,679 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 08:18:27,679 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 08:18:27,721 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 08:18:27,778 INFO org.apache.hadoop.mapreduce.lib.input.LineRecordReader: Found UTF-8 BOM and skipped it
+2017-03-18 08:18:32,174 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:33,097 INFO org.apache.hadoop.mapreduce.Job:  map 1% reduce 0%
+2017-03-18 08:18:35,177 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:36,105 INFO org.apache.hadoop.mapreduce.Job:  map 3% reduce 0%
+2017-03-18 08:18:38,179 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:39,109 INFO org.apache.hadoop.mapreduce.Job:  map 5% reduce 0%
+2017-03-18 08:18:41,180 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:42,111 INFO org.apache.hadoop.mapreduce.Job:  map 12% reduce 0%
+2017-03-18 08:18:44,182 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:45,116 INFO org.apache.hadoop.mapreduce.Job:  map 23% reduce 0%
+2017-03-18 08:18:47,183 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:48,126 INFO org.apache.hadoop.mapreduce.Job:  map 35% reduce 0%
+2017-03-18 08:18:50,184 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:51,133 INFO org.apache.hadoop.mapreduce.Job:  map 46% reduce 0%
+2017-03-18 08:18:53,186 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:54,135 INFO org.apache.hadoop.mapreduce.Job:  map 57% reduce 0%
+2017-03-18 08:18:55,937 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
+2017-03-18 08:18:55,945 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 08:18:55,948 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 08:18:55,950 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 5961092; bufvoid = 104857600
+2017-03-18 08:18:55,951 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 23837324(95349296); length = 2377073/6553600
+2017-03-18 08:18:56,187 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
+2017-03-18 08:18:57,139 INFO org.apache.hadoop.mapreduce.Job:  map 67% reduce 0%
+2017-03-18 08:18:59,189 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
+2017-03-18 08:19:02,190 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
+2017-03-18 08:19:05,191 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
+2017-03-18 08:19:05,383 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 08:19:05,429 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1486099625_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 08:19:05,442 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 08:19:05,457 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1486099625_0001_m_000000_0' done.
+2017-03-18 08:19:05,457 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1486099625_0001_m_000000_0
+2017-03-18 08:19:05,458 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 08:19:05,477 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 08:19:05,478 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1486099625_0001_r_000000_0
+2017-03-18 08:19:05,540 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 08:19:05,560 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 08:19:05,564 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@3b7b4f72
+2017-03-18 08:19:05,743 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 08:19:05,780 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1486099625_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 08:19:06,090 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1486099625_0001_m_000000_0 decomp: 7149632 len: 7149636 to MEMORY
+2017-03-18 08:19:06,153 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 08:19:06,262 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 7149632 bytes from map-output for attempt_local1486099625_0001_m_000000_0
+2017-03-18 08:19:06,280 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 7149632, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->7149632
+2017-03-18 08:19:06,291 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 08:19:06,292 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 08:19:06,299 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 08:19:06,338 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 08:19:06,354 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7149628 bytes
+2017-03-18 08:19:10,080 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 7149632 bytes to disk to satisfy reduce memory limit
+2017-03-18 08:19:10,091 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 7149636 bytes from disk
+2017-03-18 08:19:10,121 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 08:19:10,121 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 08:19:10,131 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7149628 bytes
+2017-03-18 08:19:10,131 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 08:19:10,215 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 08:19:11,558 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 08:19:12,161 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 67%
+2017-03-18 08:19:14,559 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 08:19:15,163 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 86%
+2017-03-18 08:19:15,939 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1486099625_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 08:19:15,952 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 08:19:15,963 INFO org.apache.hadoop.mapred.Task: Task attempt_local1486099625_0001_r_000000_0 is allowed to commit now
+2017-03-18 08:19:15,970 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1486099625_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/WordCount/_temporary/0/task_local1486099625_0001_r_000000
+2017-03-18 08:19:15,978 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 08:19:15,984 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1486099625_0001_r_000000_0' done.
+2017-03-18 08:19:15,987 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1486099625_0001_r_000000_0
+2017-03-18 08:19:15,989 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 08:19:16,089 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1486099625_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 08:19:16,164 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 08:19:16,164 INFO org.apache.hadoop.mapreduce.Job: Job job_local1486099625_0001 failed with state FAILED due to: NA
+2017-03-18 08:19:16,290 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
+	File System Counters
+		FILE: Number of bytes read=25479428
+		FILE: Number of bytes written=22253006
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=124787
+		Map output records=594269
+		Map output bytes=5961092
+		Map output materialized bytes=7149636
+		Input split bytes=116
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=23927
+		Reduce shuffle bytes=7149636
+		Reduce input records=594269
+		Reduce output records=23927
+		Spilled Records=1188538
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=463
+		Total committed heap usage (bytes)=331227136
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=5589889
+	File Output Format Counters 
+		Bytes Written=249540
+2017-03-18 09:01:02,099 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 09:01:08,193 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 09:01:08,213 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 09:01:10,958 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 09:01:11,100 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 09:01:11,689 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 09:01:14,259 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local858246623_0001
+2017-03-18 09:01:18,212 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 09:01:18,245 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local858246623_0001
+2017-03-18 09:01:18,254 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 09:01:18,356 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:01:18,372 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 09:01:19,033 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 09:01:19,035 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local858246623_0001_m_000000_0
+2017-03-18 09:01:19,258 INFO org.apache.hadoop.mapreduce.Job: Job job_local858246623_0001 running in uber mode : false
+2017-03-18 09:01:19,270 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 09:01:19,420 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:01:19,720 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:01:20,053 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 09:01:23,039 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 09:01:23,732 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 09:01:24,554 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 09:01:24,558 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 09:01:24,558 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 09:01:24,572 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 09:01:24,572 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 09:01:24,836 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 09:01:24,897 INFO org.apache.hadoop.mapred.Task: Task:attempt_local858246623_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 09:01:25,349 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 09:01:25,349 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local858246623_0001_m_000000_0' done.
+2017-03-18 09:01:25,363 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local858246623_0001_m_000000_0
+2017-03-18 09:01:25,377 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 09:01:25,439 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 09:01:25,461 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local858246623_0001_r_000000_0
+2017-03-18 09:01:25,551 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:01:25,553 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:01:25,643 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@3d4a4a50
+2017-03-18 09:01:25,875 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 09:01:25,919 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local858246623_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 09:01:26,318 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 09:01:26,370 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local858246623_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 09:01:26,440 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local858246623_0001_m_000000_0
+2017-03-18 09:01:26,471 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 09:01:26,505 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 09:01:26,506 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:01:26,508 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 09:01:26,592 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:01:26,593 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:01:26,609 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 09:01:26,610 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 09:01:26,635 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 09:01:26,635 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:01:26,636 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:01:26,637 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:01:26,850 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 09:01:26,944 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 09:01:26,966 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local858246623_0001
+java.lang.Exception: java.lang.NullPointerException
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556)
+Caused by: java.lang.NullPointerException
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:170)
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1)
+	at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324)
+	at java.util.TimSort.sort(TimSort.java:189)
+	at java.util.TimSort.sort(TimSort.java:173)
+	at java.util.Arrays.sort(Arrays.java:659)
+	at java.util.Collections.sort(Collections.java:217)
+	at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:166)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:214)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1)
+	at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
+	at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
+	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346)
+	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
+	at java.util.concurrent.FutureTask.run(FutureTask.java:262)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
+	at java.lang.Thread.run(Thread.java:745)
+2017-03-18 09:01:27,319 INFO org.apache.hadoop.mapreduce.Job: Job job_local858246623_0001 failed with state FAILED due to: NA
+2017-03-18 09:01:27,573 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
+	File System Counters
+		FILE: Number of bytes read=441
+		FILE: Number of bytes written=276371
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=0
+		Reduce shuffle bytes=454
+		Reduce input records=0
+		Reduce output records=0
+		Spilled Records=27
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=346
+		Total committed heap usage (bytes)=165613568
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=0
+2017-03-18 09:31:14,110 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 09:31:18,747 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 09:31:18,757 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 09:31:23,780 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 09:31:23,837 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 09:31:24,319 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 09:31:26,140 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local805279743_0001
+2017-03-18 09:31:29,121 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 09:31:29,146 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local805279743_0001
+2017-03-18 09:31:29,180 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 09:31:29,413 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:31:29,437 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 09:31:30,024 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 09:31:30,028 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local805279743_0001_m_000000_0
+2017-03-18 09:31:30,184 INFO org.apache.hadoop.mapreduce.Job: Job job_local805279743_0001 running in uber mode : false
+2017-03-18 09:31:30,188 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 09:31:30,374 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:31:30,520 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:31:30,659 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 09:31:32,579 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 09:31:32,581 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 09:31:32,582 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 09:31:32,582 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 09:31:32,582 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 09:31:32,678 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 09:31:32,914 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 09:31:32,914 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 09:31:32,918 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 09:31:32,919 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 09:31:32,920 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 09:31:33,203 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 09:31:33,488 INFO org.apache.hadoop.mapred.Task: Task:attempt_local805279743_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 09:31:33,718 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 09:31:33,721 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local805279743_0001_m_000000_0' done.
+2017-03-18 09:31:33,721 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local805279743_0001_m_000000_0
+2017-03-18 09:31:33,724 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 09:31:33,742 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 09:31:33,742 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local805279743_0001_r_000000_0
+2017-03-18 09:31:33,861 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:31:33,862 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:31:33,907 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@6d550439
+2017-03-18 09:31:34,115 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 09:31:34,150 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local805279743_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 09:31:34,321 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 09:31:34,642 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local805279743_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 09:31:34,691 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local805279743_0001_m_000000_0
+2017-03-18 09:31:34,718 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 09:31:34,734 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 09:31:34,738 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:31:34,739 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 09:31:34,811 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:31:34,815 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:31:34,838 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 09:31:34,839 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 09:31:34,851 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 09:31:34,854 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:31:34,855 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:31:34,861 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:31:35,210 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 09:31:35,300 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 09:31:35,325 INFO org.apache.hadoop.mapreduce.Job: Job job_local805279743_0001 failed with state FAILED due to: NA
+2017-03-18 09:31:35,359 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local805279743_0001
+java.lang.Exception: java.lang.NullPointerException
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556)
+Caused by: java.lang.NullPointerException
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:170)
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1)
+	at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324)
+	at java.util.TimSort.sort(TimSort.java:189)
+	at java.util.TimSort.sort(TimSort.java:173)
+	at java.util.Arrays.sort(Arrays.java:659)
+	at java.util.Collections.sort(Collections.java:217)
+	at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:165)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:213)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1)
+	at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
+	at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
+	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346)
+	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
+	at java.util.concurrent.FutureTask.run(FutureTask.java:262)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
+	at java.lang.Thread.run(Thread.java:745)
+2017-03-18 09:31:35,574 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
+	File System Counters
+		FILE: Number of bytes read=441
+		FILE: Number of bytes written=276371
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=0
+		Reduce shuffle bytes=454
+		Reduce input records=0
+		Reduce output records=0
+		Spilled Records=27
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=325
+		Total committed heap usage (bytes)=165613568
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=0
+2017-03-18 09:34:34,236 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 09:34:37,311 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 09:34:37,333 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 09:34:39,465 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 09:34:39,510 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 09:34:40,107 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 09:34:41,786 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1657624619_0001
+2017-03-18 09:34:43,586 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 09:34:43,588 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1657624619_0001
+2017-03-18 09:34:43,604 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 09:34:43,680 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:34:43,691 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 09:34:44,104 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 09:34:44,105 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1657624619_0001_m_000000_0
+2017-03-18 09:34:44,380 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:34:44,514 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:34:44,590 INFO org.apache.hadoop.mapreduce.Job: Job job_local1657624619_0001 running in uber mode : false
+2017-03-18 09:34:44,593 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 09:34:44,605 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 09:34:45,967 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 09:34:46,143 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 09:34:46,150 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 09:34:46,152 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 09:34:46,155 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 09:34:46,156 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 09:34:46,625 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 09:34:46,666 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1657624619_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 09:34:46,757 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 09:34:46,769 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1657624619_0001_m_000000_0' done.
+2017-03-18 09:34:46,771 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1657624619_0001_m_000000_0
+2017-03-18 09:34:46,774 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 09:34:46,793 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 09:34:46,794 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1657624619_0001_r_000000_0
+2017-03-18 09:34:46,861 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:34:46,873 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:34:46,903 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@17b42596
+2017-03-18 09:34:47,055 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 09:34:47,112 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1657624619_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 09:34:47,365 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1657624619_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 09:34:47,394 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1657624619_0001_m_000000_0
+2017-03-18 09:34:47,406 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 09:34:47,430 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 09:34:47,433 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:34:47,439 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 09:34:47,496 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:34:47,514 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:34:47,523 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 09:34:47,525 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 09:34:47,536 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 09:34:47,539 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:34:47,540 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:34:47,543 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:34:47,602 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 09:34:48,903 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 09:34:48,996 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1657624619_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 09:34:49,029 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:34:49,030 INFO org.apache.hadoop.mapred.Task: Task attempt_local1657624619_0001_r_000000_0 is allowed to commit now
+2017-03-18 09:34:49,032 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1657624619_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1657624619_0001_r_000000
+2017-03-18 09:34:49,051 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 09:34:49,066 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1657624619_0001_r_000000_0' done.
+2017-03-18 09:34:49,067 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1657624619_0001_r_000000_0
+2017-03-18 09:34:49,067 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 09:34:49,153 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1657624619_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 09:34:49,610 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 09:34:49,611 INFO org.apache.hadoop.mapreduce.Job: Job job_local1657624619_0001 failed with state FAILED due to: NA
+2017-03-18 09:34:49,781 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=556327
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=402
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=191
+2017-03-18 09:41:04,072 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 09:41:07,127 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 09:41:07,155 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 09:41:09,446 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 09:41:09,559 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 09:41:10,078 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 09:41:11,652 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1710867457_0001
+2017-03-18 09:41:13,534 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 09:41:13,536 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1710867457_0001
+2017-03-18 09:41:13,552 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 09:41:13,625 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:41:13,642 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 09:41:14,050 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 09:41:14,052 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1710867457_0001_m_000000_0
+2017-03-18 09:41:14,328 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:41:14,459 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:41:14,552 INFO org.apache.hadoop.mapreduce.Job: Job job_local1710867457_0001 running in uber mode : false
+2017-03-18 09:41:14,561 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 09:41:14,575 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 09:41:17,724 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 09:41:17,724 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 09:41:17,725 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 09:41:17,725 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 09:41:17,725 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 09:41:17,856 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 09:41:18,043 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 09:41:18,043 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 09:41:18,046 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 09:41:18,049 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 09:41:18,104 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 09:41:18,279 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 09:41:18,363 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1710867457_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 09:41:18,490 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 09:41:18,496 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1710867457_0001_m_000000_0' done.
+2017-03-18 09:41:18,497 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1710867457_0001_m_000000_0
+2017-03-18 09:41:18,499 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 09:41:18,514 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 09:41:18,514 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1710867457_0001_r_000000_0
+2017-03-18 09:41:18,597 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 09:41:18,627 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 09:41:18,628 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 09:41:18,803 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@59b8ca86
+2017-03-18 09:41:19,007 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 09:41:19,044 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1710867457_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 09:41:19,394 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1710867457_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 09:41:19,474 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1710867457_0001_m_000000_0
+2017-03-18 09:41:19,503 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 09:41:19,524 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 09:41:19,530 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:41:19,531 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 09:41:19,621 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:41:19,629 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:41:19,668 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 09:41:19,669 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 09:41:19,680 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 09:41:19,681 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 09:41:19,683 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 09:41:19,688 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:41:20,851 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 09:41:20,932 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1710867457_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 09:41:20,969 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 09:41:20,970 INFO org.apache.hadoop.mapred.Task: Task attempt_local1710867457_0001_r_000000_0 is allowed to commit now
+2017-03-18 09:41:20,972 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1710867457_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1710867457_0001_r_000000
+2017-03-18 09:41:20,986 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 09:41:21,001 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1710867457_0001_r_000000_0' done.
+2017-03-18 09:41:21,001 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1710867457_0001_r_000000_0
+2017-03-18 09:41:21,001 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 09:41:21,089 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1710867457_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 09:41:21,607 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 09:41:21,608 INFO org.apache.hadoop.mapreduce.Job: Job job_local1710867457_0001 failed with state FAILED due to: NA
+2017-03-18 09:41:21,819 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=556327
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=412
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=191
+2017-03-18 10:02:31,717 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:02:36,195 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:02:36,217 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:02:39,313 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:02:39,437 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:02:39,946 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:02:41,475 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1299112731_0001
+2017-03-18 10:02:43,571 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:02:43,578 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1299112731_0001
+2017-03-18 10:02:43,622 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:02:43,714 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:02:43,725 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:02:44,329 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:02:44,331 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1299112731_0001_m_000000_0
+2017-03-18 10:02:44,616 INFO org.apache.hadoop.mapreduce.Job: Job job_local1299112731_0001 running in uber mode : false
+2017-03-18 10:02:44,639 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:02:44,697 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:02:44,894 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:02:45,012 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:02:48,005 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:02:48,006 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:02:48,006 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:02:48,006 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:02:48,007 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:02:48,081 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:02:48,152 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:02:48,175 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:02:48,175 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:02:48,175 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:02:48,176 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:02:48,295 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:02:48,319 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1299112731_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:02:48,446 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:02:48,447 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1299112731_0001_m_000000_0' done.
+2017-03-18 10:02:48,447 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1299112731_0001_m_000000_0
+2017-03-18 10:02:48,447 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:02:48,472 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:02:48,473 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1299112731_0001_r_000000_0
+2017-03-18 10:02:48,592 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:02:48,594 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:02:48,633 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@621e4b65
+2017-03-18 10:02:48,656 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:02:49,394 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:02:49,427 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1299112731_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:02:50,207 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1299112731_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:02:50,243 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1299112731_0001_m_000000_0
+2017-03-18 10:02:50,259 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:02:50,283 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:02:50,285 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:02:50,285 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:02:50,412 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:02:50,413 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:02:50,428 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:02:50,430 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:02:50,448 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:02:50,454 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:02:50,456 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:02:50,459 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:02:51,864 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:02:51,948 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1299112731_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:02:51,965 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:02:51,965 INFO org.apache.hadoop.mapred.Task: Task attempt_local1299112731_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:02:51,980 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1299112731_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1299112731_0001_r_000000
+2017-03-18 10:02:51,998 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:02:51,999 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1299112731_0001_r_000000_0' done.
+2017-03-18 10:02:51,999 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1299112731_0001_r_000000_0
+2017-03-18 10:02:51,999 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:02:52,093 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1299112731_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:02:52,734 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:02:52,734 INFO org.apache.hadoop.mapreduce.Job: Job job_local1299112731_0001 failed with state FAILED due to: NA
+2017-03-18 10:02:52,903 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=556241
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=402
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=105
+2017-03-18 10:09:55,517 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:09:58,604 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:09:58,648 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:10:00,578 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:10:00,646 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:10:01,161 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:10:02,797 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local525023421_0001
+2017-03-18 10:10:04,560 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:10:04,562 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local525023421_0001
+2017-03-18 10:10:04,593 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:10:04,641 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:10:04,655 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:10:05,048 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:10:05,050 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local525023421_0001_m_000000_0
+2017-03-18 10:10:05,306 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:10:05,423 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:10:05,503 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:10:05,595 INFO org.apache.hadoop.mapreduce.Job: Job job_local525023421_0001 running in uber mode : false
+2017-03-18 10:10:05,628 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:10:06,690 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:10:06,690 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:10:06,690 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:10:06,691 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:10:06,691 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:10:06,780 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:10:06,915 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:10:06,929 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:10:06,939 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:10:06,942 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:10:06,945 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:10:07,074 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:10:07,102 INFO org.apache.hadoop.mapred.Task: Task:attempt_local525023421_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:10:07,178 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:10:07,201 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local525023421_0001_m_000000_0' done.
+2017-03-18 10:10:07,201 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local525023421_0001_m_000000_0
+2017-03-18 10:10:07,205 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:10:07,221 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:10:07,222 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local525023421_0001_r_000000_0
+2017-03-18 10:10:07,279 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:10:07,280 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:10:07,323 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1d8c3d92
+2017-03-18 10:10:07,485 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:10:07,525 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local525023421_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:10:07,648 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:10:09,046 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local525023421_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:10:09,124 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local525023421_0001_m_000000_0
+2017-03-18 10:10:09,139 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:10:09,164 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:10:09,166 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:10:09,167 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:10:09,224 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:10:09,227 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:10:09,244 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:10:09,245 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:10:09,258 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:10:09,261 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:10:09,262 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:10:09,271 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:10:10,314 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:10:10,381 INFO org.apache.hadoop.mapred.Task: Task:attempt_local525023421_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:10:10,409 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:10:10,410 INFO org.apache.hadoop.mapred.Task: Task attempt_local525023421_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:10:10,411 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local525023421_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local525023421_0001_r_000000
+2017-03-18 10:10:10,426 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:10:10,443 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local525023421_0001_r_000000_0' done.
+2017-03-18 10:10:10,443 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local525023421_0001_r_000000_0
+2017-03-18 10:10:10,444 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:10:10,525 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local525023421_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:10:10,655 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:10:10,655 INFO org.apache.hadoop.mapreduce.Job: Job job_local525023421_0001 failed with state FAILED due to: NA
+2017-03-18 10:10:10,821 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=553387
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=376
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=191
+2017-03-18 10:13:43,339 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:13:46,756 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:13:46,793 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:13:49,193 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:13:49,248 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:13:49,894 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:13:52,018 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local156256502_0001
+2017-03-18 10:13:54,111 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:13:54,113 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local156256502_0001
+2017-03-18 10:13:54,135 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:13:54,200 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:13:54,217 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:13:54,625 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:13:54,627 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local156256502_0001_m_000000_0
+2017-03-18 10:13:54,933 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:13:55,069 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:13:55,130 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:13:55,149 INFO org.apache.hadoop.mapreduce.Job: Job job_local156256502_0001 running in uber mode : false
+2017-03-18 10:13:55,177 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:13:56,487 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:13:56,706 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:13:56,775 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:13:56,784 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:13:56,785 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:13:56,787 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:13:56,787 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:13:56,852 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:13:56,865 INFO org.apache.hadoop.mapred.Task: Task:attempt_local156256502_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:13:56,971 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:13:56,977 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local156256502_0001_m_000000_0' done.
+2017-03-18 10:13:56,978 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local156256502_0001_m_000000_0
+2017-03-18 10:13:56,979 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:13:56,996 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:13:56,997 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local156256502_0001_r_000000_0
+2017-03-18 10:13:57,081 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:13:57,083 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:13:57,095 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@72f8516e
+2017-03-18 10:13:57,183 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:13:57,313 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:13:57,352 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local156256502_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:13:58,019 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local156256502_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:13:58,075 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local156256502_0001_m_000000_0
+2017-03-18 10:13:58,104 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:13:58,117 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:13:58,118 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:13:58,127 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:13:58,181 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:13:58,181 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:13:58,199 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:13:58,200 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:13:58,217 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:13:58,218 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:13:58,220 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:13:58,226 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:13:59,506 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:13:59,579 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:13:59,603 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local156256502_0001
+java.lang.Exception: java.lang.NullPointerException
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556)
+Caused by: java.lang.NullPointerException
+	at java.lang.Integer.compareTo(Integer.java:1003)
+	at java.lang.Integer.compareTo(Integer.java:52)
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:153)
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1)
+	at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324)
+	at java.util.TimSort.sort(TimSort.java:189)
+	at java.util.TimSort.sort(TimSort.java:173)
+	at java.util.Arrays.sort(Arrays.java:659)
+	at java.util.Collections.sort(Collections.java:217)
+	at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:148)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:185)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1)
+	at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
+	at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
+	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346)
+	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
+	at java.util.concurrent.FutureTask.run(FutureTask.java:262)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
+	at java.lang.Thread.run(Thread.java:745)
+2017-03-18 10:14:00,192 INFO org.apache.hadoop.mapreduce.Job: Job job_local156256502_0001 failed with state FAILED due to: NA
+2017-03-18 10:14:00,357 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
+	File System Counters
+		FILE: Number of bytes read=441
+		FILE: Number of bytes written=276371
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=0
+		Reduce shuffle bytes=454
+		Reduce input records=0
+		Reduce output records=0
+		Spilled Records=27
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=318
+		Total committed heap usage (bytes)=165613568
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=0
+2017-03-18 10:15:20,636 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:15:23,641 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:15:23,686 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:15:25,708 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:15:25,787 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:15:26,311 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:15:27,986 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local773198118_0001
+2017-03-18 10:15:29,767 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:15:29,769 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local773198118_0001
+2017-03-18 10:15:29,780 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:15:29,852 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:15:29,853 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:15:30,269 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:15:30,271 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local773198118_0001_m_000000_0
+2017-03-18 10:15:30,523 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:15:30,627 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:15:30,714 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:15:30,771 INFO org.apache.hadoop.mapreduce.Job: Job job_local773198118_0001 running in uber mode : false
+2017-03-18 10:15:30,774 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:15:32,116 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:15:32,158 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:15:32,254 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:15:32,268 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:15:32,270 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:15:32,271 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 419; bufvoid = 104857600
+2017-03-18 10:15:32,277 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214284(104857136); length = 113/6553600
+2017-03-18 10:15:32,382 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:15:32,418 INFO org.apache.hadoop.mapred.Task: Task:attempt_local773198118_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:15:32,504 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:15:32,513 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local773198118_0001_m_000000_0' done.
+2017-03-18 10:15:32,513 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local773198118_0001_m_000000_0
+2017-03-18 10:15:32,514 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:15:32,529 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:15:32,530 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local773198118_0001_r_000000_0
+2017-03-18 10:15:32,664 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:15:32,666 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:15:32,756 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@71e2b09b
+2017-03-18 10:15:32,787 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:15:33,029 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:15:33,082 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local773198118_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:15:33,366 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local773198118_0001_m_000000_0 decomp: 479 len: 483 to MEMORY
+2017-03-18 10:15:33,384 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 479 bytes from map-output for attempt_local773198118_0001_m_000000_0
+2017-03-18 10:15:33,401 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 479, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->479
+2017-03-18 10:15:33,415 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:15:33,417 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:15:33,418 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:15:33,476 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:15:33,487 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 469 bytes
+2017-03-18 10:15:33,496 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 479 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:15:33,498 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 483 bytes from disk
+2017-03-18 10:15:33,513 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:15:33,515 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:15:33,517 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 469 bytes
+2017-03-18 10:15:33,519 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:15:34,620 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:15:34,685 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:15:34,694 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local773198118_0001
+java.lang.Exception: java.lang.NullPointerException
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556)
+Caused by: java.lang.NullPointerException
+	at java.lang.Integer.compareTo(Integer.java:1003)
+	at java.lang.Integer.compareTo(Integer.java:52)
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:145)
+	at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1)
+	at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324)
+	at java.util.TimSort.sort(TimSort.java:189)
+	at java.util.TimSort.sort(TimSort.java:173)
+	at java.util.Arrays.sort(Arrays.java:659)
+	at java.util.Collections.sort(Collections.java:217)
+	at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:140)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:177)
+	at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1)
+	at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
+	at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
+	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
+	at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346)
+	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
+	at java.util.concurrent.FutureTask.run(FutureTask.java:262)
+	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
+	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
+	at java.lang.Thread.run(Thread.java:745)
+2017-03-18 10:15:34,796 INFO org.apache.hadoop.mapreduce.Job: Job job_local773198118_0001 failed with state FAILED due to: NA
+2017-03-18 10:15:34,977 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
+	File System Counters
+		FILE: Number of bytes read=441
+		FILE: Number of bytes written=276400
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=29
+		Map output bytes=419
+		Map output materialized bytes=483
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=0
+		Reduce shuffle bytes=483
+		Reduce input records=0
+		Reduce output records=0
+		Spilled Records=29
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=299
+		Total committed heap usage (bytes)=165613568
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=0
+2017-03-18 10:16:35,727 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:16:38,709 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:16:38,719 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:16:40,831 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:16:40,909 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:16:41,416 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:16:43,118 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1537778759_0001
+2017-03-18 10:16:44,879 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:16:44,881 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1537778759_0001
+2017-03-18 10:16:44,904 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:16:44,953 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:16:44,960 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:16:45,357 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:16:45,359 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1537778759_0001_m_000000_0
+2017-03-18 10:16:45,587 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:16:45,695 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:16:45,760 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:16:45,909 INFO org.apache.hadoop.mapreduce.Job: Job job_local1537778759_0001 running in uber mode : false
+2017-03-18 10:16:46,263 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:16:46,932 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:16:46,979 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:16:47,062 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:16:47,084 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:16:47,085 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:16:47,085 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:16:47,085 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:16:47,186 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:16:47,209 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1537778759_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:16:47,312 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:16:47,317 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1537778759_0001_m_000000_0' done.
+2017-03-18 10:16:47,318 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1537778759_0001_m_000000_0
+2017-03-18 10:16:47,318 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:16:47,339 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:16:47,343 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1537778759_0001_r_000000_0
+2017-03-18 10:16:47,408 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:16:47,409 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:16:47,441 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1d8c3d92
+2017-03-18 10:16:47,582 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:16:47,624 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1537778759_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:16:47,865 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1537778759_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:16:47,890 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1537778759_0001_m_000000_0
+2017-03-18 10:16:47,898 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:16:47,920 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:16:47,922 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:16:47,923 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:16:47,981 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:16:47,992 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:16:48,001 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:16:48,005 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:16:48,033 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:16:48,039 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:16:48,056 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:16:48,057 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:16:48,268 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:16:49,175 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:16:49,250 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1537778759_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:16:49,281 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:16:49,281 INFO org.apache.hadoop.mapred.Task: Task attempt_local1537778759_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:16:49,283 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1537778759_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1537778759_0001_r_000000
+2017-03-18 10:16:49,317 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:16:49,326 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1537778759_0001_r_000000_0' done.
+2017-03-18 10:16:49,326 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1537778759_0001_r_000000_0
+2017-03-18 10:16:49,326 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:16:49,412 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1537778759_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:16:50,272 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:16:50,272 INFO org.apache.hadoop.mapreduce.Job: Job job_local1537778759_0001 failed with state FAILED due to: NA
+2017-03-18 10:16:50,425 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=556327
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=401
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=191
+2017-03-18 10:17:33,330 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:17:36,373 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:17:36,409 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:17:38,467 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:17:38,568 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:17:39,104 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:17:40,665 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local270399040_0001
+2017-03-18 10:17:42,497 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:17:42,499 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local270399040_0001
+2017-03-18 10:17:42,524 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:17:42,593 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:17:42,607 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:17:43,008 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:17:43,010 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local270399040_0001_m_000000_0
+2017-03-18 10:17:43,262 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:17:43,388 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:17:43,474 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:17:43,524 INFO org.apache.hadoop.mapreduce.Job: Job job_local270399040_0001 running in uber mode : false
+2017-03-18 10:17:43,527 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:17:44,458 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:17:44,523 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:17:44,609 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:17:44,615 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:17:44,616 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:17:44,616 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:17:44,616 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:17:44,677 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:17:44,701 INFO org.apache.hadoop.mapred.Task: Task:attempt_local270399040_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:17:44,778 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:17:44,797 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local270399040_0001_m_000000_0' done.
+2017-03-18 10:17:44,798 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local270399040_0001_m_000000_0
+2017-03-18 10:17:44,799 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:17:44,813 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:17:44,814 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local270399040_0001_r_000000_0
+2017-03-18 10:17:44,875 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:17:44,877 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:17:44,907 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@72f8516e
+2017-03-18 10:17:45,037 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:17:45,062 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local270399040_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:17:45,290 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local270399040_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:17:45,313 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local270399040_0001_m_000000_0
+2017-03-18 10:17:45,333 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:17:45,346 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:17:45,347 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:17:45,353 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:17:45,415 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:17:45,416 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:17:45,438 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:17:45,444 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:17:45,447 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:17:45,450 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:17:45,452 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:17:45,455 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:17:45,552 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:17:46,562 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:17:46,602 INFO org.apache.hadoop.mapred.Task: Task:attempt_local270399040_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:17:46,646 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:17:46,647 INFO org.apache.hadoop.mapred.Task: Task attempt_local270399040_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:17:46,648 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local270399040_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local270399040_0001_r_000000
+2017-03-18 10:17:46,662 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:17:46,663 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local270399040_0001_r_000000_0' done.
+2017-03-18 10:17:46,663 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local270399040_0001_r_000000_0
+2017-03-18 10:17:46,675 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:17:46,741 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local270399040_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:17:47,556 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:17:47,557 INFO org.apache.hadoop.mapreduce.Job: Job job_local270399040_0001 failed with state FAILED due to: NA
+2017-03-18 10:17:47,666 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=553387
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=352
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=191
+2017-03-18 10:22:19,077 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:22:22,159 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:22:22,175 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:22:23,976 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:22:24,049 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:22:24,419 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:22:25,911 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local315696638_0001
+2017-03-18 10:22:27,672 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:22:27,674 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local315696638_0001
+2017-03-18 10:22:27,699 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:22:27,747 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:22:27,760 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:22:28,186 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:22:28,188 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local315696638_0001_m_000000_0
+2017-03-18 10:22:28,454 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:22:28,572 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:22:28,660 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:22:28,702 INFO org.apache.hadoop.mapreduce.Job: Job job_local315696638_0001 running in uber mode : false
+2017-03-18 10:22:28,706 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:22:29,934 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:22:29,937 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:22:29,937 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:22:29,938 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:22:29,939 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:22:30,041 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:22:30,153 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:22:30,153 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:22:30,156 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:22:30,157 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:22:30,158 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:22:30,251 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:22:30,285 INFO org.apache.hadoop.mapred.Task: Task:attempt_local315696638_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:22:30,389 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:22:30,392 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local315696638_0001_m_000000_0' done.
+2017-03-18 10:22:30,395 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local315696638_0001_m_000000_0
+2017-03-18 10:22:30,397 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:22:30,423 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:22:30,423 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local315696638_0001_r_000000_0
+2017-03-18 10:22:30,515 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:22:30,516 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:22:30,560 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@61ea9ce5
+2017-03-18 10:22:30,728 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:22:30,778 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:22:30,810 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local315696638_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:22:31,123 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local315696638_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:22:31,197 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local315696638_0001_m_000000_0
+2017-03-18 10:22:31,205 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:22:31,222 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:22:31,224 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:22:31,225 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:22:31,302 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:22:31,302 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:22:31,308 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:22:31,310 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:22:31,321 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:22:31,324 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:22:31,325 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:22:31,332 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:22:32,704 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:22:32,785 INFO org.apache.hadoop.mapred.Task: Task:attempt_local315696638_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:22:32,822 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:22:32,823 INFO org.apache.hadoop.mapred.Task: Task attempt_local315696638_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:22:32,824 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local315696638_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local315696638_0001_r_000000
+2017-03-18 10:22:32,839 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:22:32,839 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local315696638_0001_r_000000_0' done.
+2017-03-18 10:22:32,840 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local315696638_0001_r_000000_0
+2017-03-18 10:22:32,847 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:22:32,941 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local315696638_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:22:33,739 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:22:33,739 INFO org.apache.hadoop.mapreduce.Job: Job job_local315696638_0001 failed with state FAILED due to: NA
+2017-03-18 10:22:33,890 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=553366
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=370
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=170
+2017-03-18 10:23:34,780 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:23:37,781 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:23:37,804 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:23:39,906 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:23:39,934 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:23:40,482 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:23:42,200 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1059822994_0001
+2017-03-18 10:23:43,991 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:23:43,993 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1059822994_0001
+2017-03-18 10:23:44,009 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:23:44,076 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:23:44,083 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:23:44,493 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:23:44,494 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1059822994_0001_m_000000_0
+2017-03-18 10:23:44,761 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:23:44,863 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:23:44,943 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264
+2017-03-18 10:23:44,997 INFO org.apache.hadoop.mapreduce.Job: Job job_local1059822994_0001 running in uber mode : false
+2017-03-18 10:23:45,000 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:23:46,552 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:23:46,686 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:23:46,691 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:23:46,692 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:23:46,692 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:23:46,692 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:23:46,791 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:23:46,829 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1059822994_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:23:46,891 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:23:46,910 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1059822994_0001_m_000000_0' done.
+2017-03-18 10:23:46,916 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1059822994_0001_m_000000_0
+2017-03-18 10:23:46,918 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:23:46,937 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:23:46,938 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1059822994_0001_r_000000_0
+2017-03-18 10:23:46,985 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:23:46,987 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:23:47,006 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:23:47,019 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@621e4b65
+2017-03-18 10:23:47,182 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:23:47,211 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1059822994_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:23:47,457 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1059822994_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:23:47,473 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1059822994_0001_m_000000_0
+2017-03-18 10:23:47,485 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:23:47,507 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:23:47,508 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:23:47,509 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:23:47,550 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:23:47,553 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:23:47,571 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:23:47,573 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:23:47,576 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:23:47,580 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:23:47,583 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:23:47,592 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:23:48,680 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:23:48,764 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1059822994_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:23:48,784 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:23:48,785 INFO org.apache.hadoop.mapred.Task: Task attempt_local1059822994_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:23:48,786 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1059822994_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1059822994_0001_r_000000
+2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1059822994_0001_r_000000_0' done.
+2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1059822994_0001_r_000000_0
+2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:23:48,892 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1059822994_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:23:49,011 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:23:49,012 INFO org.apache.hadoop.mapreduce.Job: Job job_local1059822994_0001 failed with state FAILED due to: NA
+2017-03-18 10:23:49,202 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1822
+		FILE: Number of bytes written=556331
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=4
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=395
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=264
+	File Output Format Counters 
+		Bytes Written=195
+2017-03-18 10:25:10,795 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:25:13,885 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:25:13,914 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:25:15,934 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:25:16,030 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:25:16,475 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:25:18,086 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1796876123_0001
+2017-03-18 10:25:19,864 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:25:19,865 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1796876123_0001
+2017-03-18 10:25:19,882 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:25:19,944 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:25:19,957 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:25:20,353 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:25:20,355 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1796876123_0001_m_000000_0
+2017-03-18 10:25:20,601 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:25:20,721 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:25:20,840 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+265
+2017-03-18 10:25:20,900 INFO org.apache.hadoop.mapreduce.Job: Job job_local1796876123_0001 running in uber mode : false
+2017-03-18 10:25:20,904 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:25:21,990 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:25:22,008 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:25:22,010 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:25:22,010 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:25:22,011 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:25:22,041 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:25:22,092 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:25:22,114 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:25:22,115 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:25:22,115 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:25:22,115 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:25:22,182 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:25:22,206 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1796876123_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:25:22,299 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:25:22,310 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1796876123_0001_m_000000_0' done.
+2017-03-18 10:25:22,311 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1796876123_0001_m_000000_0
+2017-03-18 10:25:22,313 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:25:22,331 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:25:22,332 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1796876123_0001_r_000000_0
+2017-03-18 10:25:22,403 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:25:22,404 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:25:22,436 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@72f8516e
+2017-03-18 10:25:22,557 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:25:22,587 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1796876123_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:25:22,859 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1796876123_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:25:22,878 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1796876123_0001_m_000000_0
+2017-03-18 10:25:22,896 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:25:22,909 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:25:22,919 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:25:22,920 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:25:22,928 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:25:22,989 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:25:22,992 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:25:23,015 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:25:23,029 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:25:23,096 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:25:23,097 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:25:23,098 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:25:23,105 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:25:24,357 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:25:24,432 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1796876123_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:25:24,467 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:25:24,472 INFO org.apache.hadoop.mapred.Task: Task attempt_local1796876123_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:25:24,474 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1796876123_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1796876123_0001_r_000000
+2017-03-18 10:25:24,488 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:25:24,497 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1796876123_0001_r_000000_0' done.
+2017-03-18 10:25:24,497 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1796876123_0001_r_000000_0
+2017-03-18 10:25:24,497 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:25:24,585 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1796876123_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:25:24,913 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:25:24,914 INFO org.apache.hadoop.mapreduce.Job: Job job_local1796876123_0001 failed with state FAILED due to: NA
+2017-03-18 10:25:25,061 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1824
+		FILE: Number of bytes written=556331
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=5
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=389
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=265
+	File Output Format Counters 
+		Bytes Written=195
+2017-03-18 10:28:16,986 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2017-03-18 10:28:20,013 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
+2017-03-18 10:28:20,040 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
+2017-03-18 10:28:21,855 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
+2017-03-18 10:28:21,901 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
+2017-03-18 10:28:22,330 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
+2017-03-18 10:28:23,799 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local165609805_0001
+2017-03-18 10:28:25,593 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
+2017-03-18 10:28:25,594 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local165609805_0001
+2017-03-18 10:28:25,599 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
+2017-03-18 10:28:25,657 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:28:25,668 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+2017-03-18 10:28:26,042 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
+2017-03-18 10:28:26,044 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local165609805_0001_m_000000_0
+2017-03-18 10:28:26,287 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:28:26,392 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:28:26,470 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+265
+2017-03-18 10:28:26,599 INFO org.apache.hadoop.mapreduce.Job: Job job_local165609805_0001 running in uber mode : false
+2017-03-18 10:28:26,608 INFO org.apache.hadoop.mapreduce.Job:  map 0% reduce 0%
+2017-03-18 10:28:27,393 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
+2017-03-18 10:28:27,395 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
+2017-03-18 10:28:27,396 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
+2017-03-18 10:28:27,396 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
+2017-03-18 10:28:27,397 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
+2017-03-18 10:28:27,422 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
+2017-03-18 10:28:27,485 INFO org.apache.hadoop.mapred.LocalJobRunner: 
+2017-03-18 10:28:27,500 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
+2017-03-18 10:28:27,510 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
+2017-03-18 10:28:27,510 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600
+2017-03-18 10:28:27,510 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600
+2017-03-18 10:28:27,565 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
+2017-03-18 10:28:27,589 INFO org.apache.hadoop.mapred.Task: Task:attempt_local165609805_0001_m_000000_0 is done. And is in the process of committing
+2017-03-18 10:28:27,669 INFO org.apache.hadoop.mapred.LocalJobRunner: map
+2017-03-18 10:28:27,679 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local165609805_0001_m_000000_0' done.
+2017-03-18 10:28:27,681 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local165609805_0001_m_000000_0
+2017-03-18 10:28:27,683 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
+2017-03-18 10:28:27,696 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
+2017-03-18 10:28:27,696 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local165609805_0001_r_000000_0
+2017-03-18 10:28:27,747 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
+2017-03-18 10:28:27,749 INFO org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
+2017-03-18 10:28:27,777 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@7bd00baf
+2017-03-18 10:28:27,906 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
+2017-03-18 10:28:27,958 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local165609805_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
+2017-03-18 10:28:28,177 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local165609805_0001_m_000000_0 decomp: 450 len: 454 to MEMORY
+2017-03-18 10:28:28,204 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local165609805_0001_m_000000_0
+2017-03-18 10:28:28,218 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450
+2017-03-18 10:28:28,248 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
+2017-03-18 10:28:28,250 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:28:28,259 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
+2017-03-18 10:28:28,293 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:28:28,294 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:28:28,322 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit
+2017-03-18 10:28:28,326 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk
+2017-03-18 10:28:28,329 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
+2017-03-18 10:28:28,333 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
+2017-03-18 10:28:28,343 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes
+2017-03-18 10:28:28,344 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:28:28,621 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 0%
+2017-03-18 10:28:29,441 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
+2017-03-18 10:28:29,487 INFO org.apache.hadoop.mapred.Task: Task:attempt_local165609805_0001_r_000000_0 is done. And is in the process of committing
+2017-03-18 10:28:29,534 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
+2017-03-18 10:28:29,534 INFO org.apache.hadoop.mapred.Task: Task attempt_local165609805_0001_r_000000_0 is allowed to commit now
+2017-03-18 10:28:29,536 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local165609805_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local165609805_0001_r_000000
+2017-03-18 10:28:29,554 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
+2017-03-18 10:28:29,567 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local165609805_0001_r_000000_0' done.
+2017-03-18 10:28:29,567 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local165609805_0001_r_000000_0
+2017-03-18 10:28:29,568 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
+2017-03-18 10:28:29,624 INFO org.apache.hadoop.mapreduce.Job:  map 100% reduce 100%
+2017-03-18 10:28:29,642 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local165609805_0001
+java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
+	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
+Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
+	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
+	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
+	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
+	... 1 more
+2017-03-18 10:28:30,631 INFO org.apache.hadoop.mapreduce.Job: Job job_local165609805_0001 failed with state FAILED due to: NA
+2017-03-18 10:28:30,746 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
+	File System Counters
+		FILE: Number of bytes read=1824
+		FILE: Number of bytes written=553387
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+	Map-Reduce Framework
+		Map input records=5
+		Map output records=27
+		Map output bytes=394
+		Map output materialized bytes=454
+		Input split bytes=121
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=4
+		Reduce shuffle bytes=454
+		Reduce input records=27
+		Reduce output records=4
+		Spilled Records=54
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=362
+		Total committed heap usage (bytes)=331227136
+	Preprocessing.Preprocessing_1$COUNTS
+		COUNT_LINES=4
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=265
+	File Output Format Counters 
+		Bytes Written=191
diff --git a/Assign2/pg100_test.txt b/Assign2/pg100_test.txt
index 6cb295c..a407ab2 100644
--- a/Assign2/pg100_test.txt
+++ b/Assign2/pg100_test.txt
@@ -1,4 +1,5 @@
 This eBook is for the use of anyone anywhere at no cost and with anyone cost
-almost no restrictions whatsoever.  You may copy it, give it away or
+almost no restrictions whatsoever.  You may copy it, give it away or
+
 re-use it under the terms of the Project Gutenberg License included
 with this eBook or online at www.gutenberg.org
diff --git a/Assign2/pg100_test.txt~ b/Assign2/pg100_test.txt~
index b212032..6cb295c 100644
--- a/Assign2/pg100_test.txt~
+++ b/Assign2/pg100_test.txt~
@@ -1,4 +1,4 @@
-This eBook is for the use of anyone anywhere at no cost and with
+This eBook is for the use of anyone anywhere at no cost and with anyone cost
 almost no restrictions whatsoever.  You may copy it, give it away or
 re-use it under the terms of the Project Gutenberg License included
 with this eBook or online at www.gutenberg.org
diff --git a/Assign2/src/Preprocessing/Preprocessing_1.java b/Assign2/src/Preprocessing/Preprocessing_1.java
index 3ba0873..8018614 100644
--- a/Assign2/src/Preprocessing/Preprocessing_1.java
+++ b/Assign2/src/Preprocessing/Preprocessing_1.java
@@ -1,10 +1,24 @@
 package Preprocessing;
 
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map.Entry;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
@@ -19,19 +33,8 @@ import org.apache.hadoop.util.ToolRunner;
 
 
 
-import java.io.*;
-import java.util.*;
-
-
 public class Preprocessing_1 extends Configured implements Tool {
 
-   
-   public static enum COUNTER {
-		  COUNT_LINES
-		  };
-		  
-		  
-
    public static void main(String[] args) throws Exception {
 	   
       System.out.println(Arrays.toString(args));
@@ -41,6 +44,8 @@ public class Preprocessing_1 extends Configured implements Tool {
       
       System.exit(res);
    }
+   
+   public static enum COUNTS {COUNT_LINES};
 
    @Override
    public int run(String[] args) throws Exception {
@@ -72,14 +77,14 @@ public class Preprocessing_1 extends Configured implements Tool {
 
       job.waitForCompletion(true);
 
-      // Write counter to file
-      long counter = job.getCounters().findCounter(COUNTER.COUNT_LINES).getValue();
-      Path outFile = new Path(new Path(args[1]),"NB_LINES_AFTER_Preprocessing.txt");
-      BufferedWriter writer = new BufferedWriter(
-    		  					new OutputStreamWriter(
-    		  							fs.create(outFile, true)));
-      writer.write(String.valueOf(counter));
-      writer.close();
+
+      long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue();
+      Path countFile = new Path(new Path(args[1]),"nb_output_records.txt");
+      File file = new File(countFile.toString());
+      FileWriter fileWriter = new FileWriter(file);
+      fileWriter.write(String.valueOf(counter));
+      fileWriter.flush();
+      fileWriter.close();
       
       return 0;
    }
@@ -91,167 +96,92 @@ public class Preprocessing_1 extends Configured implements Tool {
    public static class Map extends Mapper<LongWritable, Text, LongWritable, Text> {
       
       private Text word = new Text();
-      private HashSet<String> stopwords = new HashSet<String>();
+      String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
+	  String stopwords = new String(Files.readAllBytes(Paths.get(stopwords_file)));
       
-      public Map() throws NumberFormatException, IOException{
-    	  // Default constructor to load one time the stop words file
-          /* Read file of stopwords*/
-          BufferedReader Reader = new BufferedReader(
-                  new FileReader(
-                          new File(
-                                  "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords")));
-
-          /* Add each line (word) in the variable stopwords*/
-          String pattern;
-          while ((pattern = Reader.readLine()) != null) {
-              stopwords.add(pattern.toLowerCase());
-              }
-          
-          Reader.close();
-    	
+      public Map() throws  IOException{
+    	 System.out.println(stopwords);
       }
       @Override
       public void map(LongWritable key, Text value, Context context)
               throws IOException, InterruptedException {
 
-        
-
-        
-        	
         for (String token: value.toString().replaceAll("[^a-zA-Z0-9 ]", " ").split("\\s+")) {
 
-          /* if word not in stop words list then we set word with the value then write it into context */
-          
-
-          if (!stopwords.contains(token.toLowerCase())) {
-        	    // if token only contains a blank character we do not write it 
-        	  
-              
-        	  	
+         if (!stopwords.contains(token.toLowerCase())) {
                 word.set(token.toLowerCase());
                 context.write(key, word);
         	  	}
-                
-        
            }
-         
       }
    }
 
-   
-
-   
-   
    public static class Reduce extends Reducer<LongWritable, Text, LongWritable, Text> {
-     
-	   /* Initialise one time a hashmap to store each word of the vocabulary and its global 
-        * frequency in pg100.txt from the wordcountpg100.txt */   
-	 private static HashMap<String,Integer> map_word_count = new HashMap<String,Integer>();
+       
+	 private static HashMap<String,Integer> word_freq = new HashMap<String,Integer>();
 	 
-	 public Reduce() throws NumberFormatException, IOException{
-	 	 
-	      /*Default constructor to store (word,frequency) pair 
-	       * in the created hashmap from the file wordcountpg100.txt */
+	 public Reduce() throws IOException{
 
-		   BufferedReader Reader_count = new BufferedReader(
-		             new FileReader(
-		                      new File(
-		                              "/home/cloudera/workspace/bpa/Assign2/WordCount/WordCount"
-		                      		)));
-		      
-		      String line;
+		 String wordcount_file = "/home/cloudera/workspace/bpa/Assign2/WordCount/WordCount";
+		 String wordcount = new String(Files.readAllBytes(
+				 Paths.get(wordcount_file)));
 
-		      while ((line = Reader_count.readLine()) != null)
-		      {
-		          String[] parts = line.split(",", 2);
-		          if (parts.length >= 2)
-		          {
-		             
-		              map_word_count.put(parts[0].toString(),new Integer (parts[1]));
-		          
-		          } else {
-		              System.out.println("ignoring line: " + line);
-		          }
+		      for (String line : wordcount.split("\n")){
+		          String[] word_count = line.split(",");
+		          word_freq.put(word_count[0],new Integer(word_count[1]));
+		         
 		      }
-		      Reader_count.close();
-		   
 	   } 
 
-
+	/*SOURCE : http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java
+	 */
     public static <K, V extends Comparable<? super V>> LinkedHashSet<String> 
-    sortByValue( HashMap<K, V> map ){
-        List<java.util.Map.Entry<K, V>> list = new LinkedList<>( map.entrySet() );
+    sortHM( HashMap<K, V> map ){
+        List<Entry<K, V>> list =
+        		new LinkedList<>( map.entrySet() );
         
-        // sort the list of pairs 
-
-        Collections.sort( list, new Comparator<java.util.Map.Entry<K, V>>()
+        Collections.sort( list, new Comparator<Entry<K, V>>()
         {
             
-            public int compare( java.util.Map.Entry<K, V> o1, java.util.Map.Entry<K, V> o2 )
+            public int compare(Entry<K, V> o1, Entry<K, V> o2 )
             {
                 return (o1.getValue()).compareTo(o2.getValue());
             }
         } );
         
-        // Create LinkedHashset to store the word in ascending order
-
         LinkedHashSet<String> result = new LinkedHashSet<String>();
 
-        for (java.util.Map.Entry<K, V> entry : list)
+        for (Entry<K, V> entry : list)
         {
             result.add(entry.getKey().toString());
         }
         
         return result;
     }
-	   
   	 @Override
       public void reduce(LongWritable key, Iterable<Text> values, Context context)
               throws IOException, InterruptedException {
-    	  
-    	  
-
-    	  
- 
-          /*Create a reduced hashmap where each key is a word for the same 
-           * mapper key and the value is the global frequency with the static hashmap 
-           * word_word_count containing the global frequency of word in pg100.txt*/
-
-         HashMap<String, Integer> map_word_count_key = new HashMap<String, Integer>();
+    	 
+         HashMap<String, Integer> line_word_count = new HashMap<String, Integer>();
          
-         for (Text val : values)
+         for (Text token : values)
          {
-        	 /*store the global frequency of each word for words corresponding to a same key*/
-          map_word_count_key.put(val.toString(),map_word_count.get(val.toString()));
+        	 line_word_count.put(token.toString(),
+        			 word_freq.get(token.toString()));
 
          }
 
-         
-         // Sort Hashmap and return a LinkedHashset (to keep the order) with word in ascending order 
-         // Using the sortByValue method 
-	      
-         LinkedHashSet<String> setvalue = new LinkedHashSet<String>();
-         
-         setvalue = sortByValue(map_word_count_key);
-         
-         /* Concatenate the words in ascending order of frequency */
-         
-         StringBuilder reducedvalue = new StringBuilder();
-         for (String val : setvalue) {
-
-            if (reducedvalue.length() !=0){
-              reducedvalue.append(' ');
-            }
-
-            reducedvalue.append(val);
+         StringBuilder concat_words = new StringBuilder();
+         String prefix = "";
+         for (String token : sortHM(line_word_count)) {
+        	concat_words.append(prefix);
+            prefix = " ";
+            concat_words.append(token);
          }
-         
-
-         // write for each line the words in the ascending order if not empty
-         if(!reducedvalue.toString().isEmpty()){
-             // Increment counter
-         context.getCounter(COUNTER.COUNT_LINES).increment(1);
-         context.write(key, new Text(reducedvalue.toString()));
+ 
+        if(!concat_words.toString().isEmpty()){
+         context.getCounter(COUNTS.COUNT_LINES).increment(1);
+         context.write(key, new Text(concat_words.toString()));
          }
          
        }
diff --git a/Assign2/src/WordCount/WordCount.java b/Assign2/src/WordCount/WordCount.java
index 88784c2..ba848bb 100644
--- a/Assign2/src/WordCount/WordCount.java
+++ b/Assign2/src/WordCount/WordCount.java
@@ -1,10 +1,16 @@
 package WordCount;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -16,20 +22,6 @@ import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
-import Preprocessing.Preprocessing_1;
-import Preprocessing.Preprocessing_1.COUNTER;
-import Preprocessing.Preprocessing_1.Map;
-import Preprocessing.Preprocessing_1.Reduce;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.Arrays;
-import java.util.HashSet;
-
 public class WordCount extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
       System.out.println(Arrays.toString(args));
@@ -42,7 +34,7 @@ public class WordCount extends Configured implements Tool {
    public int run(String[] args) throws Exception {
 	   System.out.println(Arrays.toString(args));
 	      Job job = new Job(getConf(), "WordCount");
-	      job.setJarByClass(Preprocessing_1.class);
+	      job.setJarByClass(WordCount.class);
 	      job.setOutputKeyClass(Text.class);
 	      job.setOutputValueClass(IntWritable.class);
 
@@ -68,8 +60,6 @@ public class WordCount extends Configured implements Tool {
 			}
 
 	      job.waitForCompletion(true);
-
-
 	      
 	      return 0;
 	   }
@@ -77,45 +67,20 @@ public class WordCount extends Configured implements Tool {
    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
       private final static IntWritable ONE = new IntWritable(1);
       private Text word = new Text();
+      private String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
 
       @Override
       public void map(LongWritable key, Text value, Context context)
               throws IOException, InterruptedException {
-
-
-
-        /* Initialize a hashset variable, set of strings without duplicates*/
-        HashSet<String> stopwords = new HashSet<String>();
-
-        /* Read file of stopwords*/
-        BufferedReader Reader = new BufferedReader(
-                new FileReader(
-                        new File(
-                                "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords")));
-
-        /* Add each line (word) in the variable stopwords*/
-        String pattern;
-        while ((pattern = Reader.readLine()) != null) {
-            stopwords.add(pattern.toLowerCase());
-            }
-
+    	
+    	  String stopwords = new String(Files.readAllBytes(
+  	  			Paths.get(stopwords_file)));
+    	  
         for (String token: value.toString().replaceAll("[^a-zA-Z0-9 ]", " ").split("\\s+")) {
-
-          /* if word not in stop words list then we set word with the value then write it into context */
-          
-
           if (!stopwords.contains(token.toLowerCase())) {
                 word.set(token.toLowerCase());
                 context.write(word, ONE);
                 }
-        
-           
-
-           
-                
-        	 	
-        
-
          }
       }
    }
@@ -128,7 +93,6 @@ public class WordCount extends Configured implements Tool {
          for (IntWritable val : values) {
             sum += val.get();
          }
-         
          context.write(key, new IntWritable(sum));
          
       }
-- 
GitLab