From bf10beba658d7dd655f6b43edf04ad3a729cf87e Mon Sep 17 00:00:00 2001 From: cloudera_vm <cloudera@quickstart.cloudera> Date: Sat, 18 Mar 2017 10:29:57 -0700 Subject: [PATCH] Preprocessing test on pg100_test (5 lines with 1 empty) --- .../Preprocessing_1_test/.part-r-00000.crc | Bin 12 -> 12 bytes .../nb_output_records.txt | 1 + Assign2/Preprocessing_1_test/part-r-00000 | 8 +- Assign2/WordCount/.part-r-00000.crc | Bin 1948 -> 1944 bytes Assign2/WordCount/WordCount | 34 - Assign2/hadoop.log | 1821 +++++++++++++++++ Assign2/pg100_test.txt | 3 +- Assign2/pg100_test.txt~ | 2 +- .../src/Preprocessing/Preprocessing_1.java | 196 +- Assign2/src/WordCount/WordCount.java | 62 +- 10 files changed, 1905 insertions(+), 222 deletions(-) create mode 100644 Assign2/Preprocessing_1_test/nb_output_records.txt diff --git a/Assign2/Preprocessing_1_test/.part-r-00000.crc b/Assign2/Preprocessing_1_test/.part-r-00000.crc index 80a2402158e63b6e8612ff830e75990dc9c4bce6..a82a4326b15ab48d1271a57d45ab704cfe992207 100644 GIT binary patch literal 12 TcmYc;N@ieSU}9*iS@Qt^6I27) literal 12 TcmYc;N@ieSU}E5(ziABs5vc=5 diff --git a/Assign2/Preprocessing_1_test/nb_output_records.txt b/Assign2/Preprocessing_1_test/nb_output_records.txt new file mode 100644 index 0000000..bf0d87a --- /dev/null +++ b/Assign2/Preprocessing_1_test/nb_output_records.txt @@ -0,0 +1 @@ +4 \ No newline at end of file diff --git a/Assign2/Preprocessing_1_test/part-r-00000 b/Assign2/Preprocessing_1_test/part-r-00000 index d442095..96f5477 100644 --- a/Assign2/Preprocessing_1_test/part-r-00000 +++ b/Assign2/Preprocessing_1_test/part-r-00000 @@ -1,4 +1,4 @@ -0,anyone anywhere ebook cost use at no -78,restrictions whatsoever copy almost away give may or no -148,included license terms under re gutenberg project use -217,online www org ebook gutenberg at or +0,anyone anywhere ebook cost use +78,restrictions whatsoever copy almost away give may +149,included license terms under re gutenberg project use +218,online www org ebook gutenberg diff --git a/Assign2/WordCount/.part-r-00000.crc b/Assign2/WordCount/.part-r-00000.crc index ad5e3c9b250270c739a2fbce7362301e8dbb1cf0..7d9f624a0cc001355f01cb4c180dd6cb549c6884 100644 GIT binary patch literal 1944 zcmYc;N@ieSU}De+YAD&D=ax~%@X%@Vs`oR@>bACe?D|)?>g!fU6%C_xN>|My45qAq zF`>6y`eb%wuJ#hSj8#@Ao=?AgWX6%lHwCMeXWB*U*@!7>cid?*n*Lh&{QbW5-yR=2 zYahO2@&7cBJSEqQ%noa06^wqoS9T3tBV1iMN%N1e=<>pZwZVF9N!smd_dhcq@XXg( zu;q6CL>r~MMvLZdD*hkqd&99^Otn?d=R1#6fz-E~Oc56kU15Cg;;t+-dEKSxuRcOt z{@VZb!%LLqSeO4|T~=WDXwUL1W$(Na9_j8DuQ|I___#z!+Jb3D!A8~2p3RawPn_H| z)vQ!$?i?4lUmac1kwSCbGXnfhFS+;pTK5~pJtyXv#`f5RY`$Rh^kztD-Tzt7mhxEL zIvOp^zP54E*Q1S28Me(#wSs42b5pce<XqS?N5oj@g$S2H_cF1)KUduU@G9@<mpw24 zx%fxE@s2)~KEE};;fTQgKglX=YcmYRZ|*5AlgQ>fr4Ui%oqO`_kJ}czPJQP-+8-6G zcW=%Ueg7Y-k9Tm+_|o})%9rB{F0WWq&hX;rYYF}Ig66AI-e)DhUpnwCO#aZ#x!d<G z;1T?>^XQ{lnFns@esleHX0FuKhQmiVKa{-Rw)N9vr*Es&c;Yzs8dV>4U8Q;MT8rf! zo;yk1s()1z#LVYh-2Z@yZ`!`EKeRSJx_GPZ-;7Ba%GGw(^Kb2`{2i~x%Gmmw`Ci?v zIrk=*IqnMd+m~l?$?mWFvyDc{h7mI!)-&=woWG03xThn?^t#PlEft@m28P=n9CPs1 z;BwGf!)ZPFU*4jtdR?J99_J5itg`Tq5Sv$j=Irq-lWA9)ni8t++<*6Q{Ze1qTf8v~ zY;^C|-%?KbwOvdv;%n-Ujg8XreHv#j?|2=&lHK4*<?4H$I#1UzW|W`G-tfU+cJ;iM z1~v_0M?-n;{kpRzX2EXdW%IXlS++&wcpS}+%4k?LXY=#!>lrKWFnecO_!aMYu<GwI z|MzjzjPw2XYcBV2TlB%|;tWyGqMnnNo+PE1KdpJ08Oi6KYgoCveZ{XMFI^A)WM|8r z|GDh3V8OwIE{lGN|9iLa&-3IMsm(Jwx1HEwH2KfMjEN@>PJI~|U*;yeg0J_8*3HKY zxEjoyk}qV(&3mfG%<8>x#%9Kby}88=7hj%uYgI6DhDy{iruM@^SG{H1dL{(Ke7>I) zx#CICl#1QUO!h~=jyij%;$#{3<oVmVjlD{|wwUXc$v><KUN|S9xqD@C_uZRE{{9r2 zwz6T;+uN2ajZE!Co_&8N<9a~xqlD?9rFRV`hwZShE9<rEaJ<`l|7V64%l{QdHVZ$j zZ<Wj36m3uyv(0g1P4>HuD>U?f@agb)^0=RgP_((-<+|gc^5g$n#u>BYk7x5)pAXkn ziZ{6<^gT?`scWCK$kc83=boRxPvFc-na}lcyHaoIyvqI^>$dXg>Sq^)w@g{Y6<V@0 zZAMG^#qXb+xHVoD&aXcbZ!-Vxqj}Hu3>4mpw#sZ<!CDdOqS&{6$JfW)C)>ByKgmtj zW72;8{if`S?C|YdeCiTZwlh10>~4|FY2Li|*0k?;p6BYdyibse<M3Pbuh&53cVQDp z(2s?4pUzj<DN$IuZNbCJiVLbcwi~JW<Q>e`+*`$C_kp?D-Xqzd&$ixT&ph5UGj#R| zb+FH7%~}#-B<GOYy1TN@w(V{3@$+K+xqOa`8ZVzHXstcL?fd=ZVh4rjW?_rX7hbU` zE*CUe_~4W8nzp@;_kW+vbO<oM__=VQ$_<Tq93Ac*%8P%MbTDRJ;LTnA?`x}MtL}7N z3Fg-V;s-uXyyv^l)~C~B=kMgF+gDmC?6YD||B<ov$gddY3+?eN^D>@_an5~jaejNc z+FYh-KRv^)dgpewK9JAy$lo*l^@7`O?C-<Y-Zz@w>9pmbCm;V$qs*y|itH+Zsu{H_ zceXzjSbTAoe#_Unm5yguUVFIJ;L4s8?LRV_o^x!ly;R;Nt-Srj52KxzT~}(hY53iY zDw>gaN3hm(hJt5X=aeFa-@fv`flFPxKQ_GMUiRxjs9u><)vAoFZE5;jF8fZIpuw|4 zEqmUEGjA9CIvUJs5$KcXx+`X(qyB`8etUkoo{diOdQ_itb?b&XYu+TpA6T+;!+y*7 z9KV-Y&N~iNEs|F){l(QVb+ejf?aYpVyX`k8DNM_+U-P6P&Hs1fb5D2vQs!B#d{xaQ zzZ=h<4`!Q_(_rB9O^JWol+bx<hJ|5fB~=M^CKjKQPH1ZNuh0pcy7uL?lNP(~#l8A^ z^Z!PH&KXC(EWG-LCG1npp`ENpEk6Ei&su&nTs!yWn-eQPO+9ElZ|n75zl1F}Kb+** zX18gjxcj@;r=^v?ubIbpZW3p^;X+sTtk@+vZdXEQ&KB@}^dc<d`N{apWj!l6W=8!` zop`DH<A3d7x450AMzcz9&xtw~e(Pbe;`R?Kz5Fj8JnqV>)8xDGVDMCK&TIGmCLLYs z9`@Pq(o)rv>38lgIv5kXs)#$xZo66Wo}fJDn5#QJnsM^5Iko8Q`FrclW;vsZOI=T& zuUhZ9+tx35KF?HHZ;b~BWg9G3xm8?May)k?h}E<1<<GYvJKpjcKj;1By;syr>$m=9 f;ai23YJsaBGgfauptzcq^@6iqWw^2Ysx_Mc`*qQu literal 1948 zcmYc;N@ieSU}Cs^oO2<I<+daHHoY_0_qv6}{G`8Y@La9ijPKQa#quWZSs)d=;=Wto z8CfCA(|mtI@74a$=@EUVaq8a<5C2meQ{OcyF<4YcNT`W_zM1$iE~8JJb(7<+^4R~G z*FQb_c~5YI`(!qm^I!j66Ke0dDZm)GTuYy4kCa5m;h4oy0bl?53f;JQ;a>emt&I1X z`TpHM4sLtTudrTR@zM4xIbVgAXQ`MRs1KfhO6uk6qkYczrRJ|F&fU}}x%<S~3!KG4 zQ!Tz{dN6<8v2-QZ)7NgVW|+OrT`Sz~>MFIg&ro*VdG{65x8Kr|JMC-tc&YQb4?Q-S zItL_n1@xcaP`kAJqWjaoGHceJPcV2p;go5_v=v9h!>#@Z-wUZTj{nP^Hbq-l<K5}W zn?>{*xcBC-@BCmHvZUNu%hiypXu5Ck<o#vY%NkFq{uX^ExXDR$zPA0sRL-!9dnt8~ zBi23ROxYxDw%hZ-x23VZ?_!#dt@qvZ|K{p@wr<R}r?P*Ddwxl?@B4P-qgsjVzNfYG zJELWSc(1-YwMP8vlrR&yiJz}sb=<}gsJXy!s~_9*hELb)9+jup^naD#u;25Oq^5)X zMvkyanj2qEJ2d}=<c+57*W}+xhOoUmyXl|!p|!KcSl7L3jL_Dplm6ExVsZI%XmP)@ z$rP7I@?YLW%+Of1xZ(!u#hixdzx`i!KTcvdF4}ne>p}BlRYlHXv8JzH`zIJ2-u6I$ z#=^ZatY^N79hp=6eB)c6x78uiNk^3qHLfl%@>3V_*mNo8+L~L_?T>EdZaZywYr-|< zd|!r8%j?G$)z-C5VQc8mXAoCe^B`CB#+zE-)g==qv^g*)rSIet@9~_lpms;zWXqM` z>QZhl=P)t;d*{m6-SzEiD^DuD^w~e5|H!U&@w-a;_||TiXqR9rCwC?`Xs_d|4|fFT zGwpKSprtR(x-0dVzx(R~`GY$HHs%+`20mYGx~OZdUASM(hR$OwTK9_XxF)_kT6rN( zA>%>#?45h!cOFeX$+U$jA@G{Q))ijoRxFiiULY-U+3m43vy*bYx8Cn9&oqwSI+4a$ zJ8{CvO(MUe4Z8|TCo;XTUaR)vUQf_%7vAm)Bg^#0$YXb}PE(rPylG|LFS*$dpBh}; z<-}Xre4}u-5*Nd9&krT_LErEF2&}&Ge|6z6<%3gF7}*7~?^eBkxaDuc?28f8gXEZw zzFo9ZbNyncT-)<D#Y=D4luEMy)&Fn%?p(z8_gr5jB60-}vpzd}B>Twa8|h9>I)7}Z zI_)#^H+!_Q!13B?%ZvK1SJLY_^7-ngrp4x2>#Se(vWPSC^uH&s1oLaRRl48J-th1G zTdpe{W*^p>t`=Q>Q(Vpb(C(u1yT5-+uQIv6>HacNh9cFb-HN65e`c&-n3Rxkdm9_W zr$5Pg+3g}#^5VQY#jjR%Md%u8$@euVEj23iD7AfRdZ0LD(ZxDP&#>l=_P2R-<eMV4 ztTdgVyEx^o%&#S-C81A`d<d;_<0|I4p}*ARPnk&A>&y=ke3xdHbXD<2=;(ZL+Zz&o zS?Z&&&|3jdQ{^_^;${ZTn%$Bb^IQ+4Ote<WRM>a#vDk`qCn@s)|56?c-7@u#efK1{ z$t7=+xMmQ%w)?|zg;r0^C+fZCvjtLEc^KkF+viLwb3Yr>c2zzu!tlzJS)V30F@N}E zR?T+6b$al$lYRa_90E=)a}8;UGMw?gN!^{dP`9l0=F`V~%|G^}Kjev8wftPVt4Y?R z4-9iNX03DmDgW#wk0|FqleSWyV~U%r9H%&`i1O^3FEyu9>#Vq@RMfG{I=9ZJ*BidI z-RU0QSS+sd#E^%}Ql{1Qg78eiOor;`1v)9ka>r|91#^F89KPLgcf#G(ZPRvrzU*qT zO<mT<_wpC@6*{l?wCq^5_N1=v9W~L2ztd&wbrm;!?2HWY7G+A244qZVtRp>FA;n@t z<ox-u(o1hH(W(2C&cFK4>XO}W1>;uiXZf_uWSz{uYhO;EiLv;;cG0Tahc?R2iuu;N z_oBl5>Vp~9^Y1_AYm<`Gke{d0Fn#hZCs+2xn-0q#+VV-={gZp0sQo=nuLbiy_jvht z*?)Y-r(Du7jfv&o!OIh-DXsb&mG`Q0cEZ&ntCR}XwCBDT_HX!fKcMsIC$;dBT;pOV zr_+YJ;==TAx>cL1Br7`loqDcvYSE^taj%YkO<7~4VKUD>X8tSh4Gt&IGvBF{C{yy8 z_$=4MuI}7so5LrH-)PqaZBh@ewVxNRASbc^&`+6y^sXCg&ivVLWxpjZ$?14myH(WT z<QSES2X6c7Z@29&dMD)1pa0{I^><&!6RA;lVhwdw`@`?7cd@Zsr?WP6DbxM)>p7FY zwA3$Vzueu!F@d>rNwn85C$^*8=lSq!+nrh%6ffJ~d!Ti(t<=jW505rlRUPrn>#5vg z)uQM9Fa4=@(-yT4bHi&&9$oZfnj{l(M*aAXlxg2;el!S*+?})Js&5t7wx=F~Uxb&u zmtJv$rSoL-IXjjk_uX^WPg$jTxMXE<bord`yG{w$_($^_G`x0Rpt#8U^uBKH&-sdb z?&SS%y}EW$Vxf}33e9QB&E1<r(q9*!)0L@oXSwzC$(_jsN*b{o^*iGVvmf=H@jLmW ebHAzHNrCoN<x@*<%Rg>d>s?*>zc6g)|ET~%jJyT_ diff --git a/Assign2/WordCount/WordCount b/Assign2/WordCount/WordCount index f7ce383..7cbead3 100644 --- a/Assign2/WordCount/WordCount +++ b/Assign2/WordCount/WordCount @@ -910,7 +910,6 @@ amply,3 ampthill,1 amurath,2 amyntas,1 -an,1896 anatomiz,2 anatomize,3 anatomy,4 @@ -1386,7 +1385,6 @@ astronomers,1 astronomical,1 astronomy,1 asunder,15 -at,2536 atalanta,2 ate,3 ates,2 @@ -1514,7 +1512,6 @@ avails,2 avarice,2 avaricious,1 avaunt,15 -ave,3 aveng,3 avenge,1 avenged,2 @@ -1566,7 +1563,6 @@ aye,15 ayez,1 azur,2 azure,1 -b,16 ba,2 baa,1 babbl,1 @@ -5310,7 +5306,6 @@ cypriot,1 cyprus,28 cyrus,1 cytherea,3 -d,8961 dabbled,1 dace,1 dad,3 @@ -6807,7 +6802,6 @@ dye,5 dyed,3 dyer,1 dying,48 -e,142 each,240 eager,9 eagerly,3 @@ -7870,7 +7864,6 @@ eyestrings,1 eying,1 eyne,9 eyrie,1 -f,11 fa,6 fabian,74 fable,4 @@ -8511,7 +8504,6 @@ flux,2 fluxive,1 fly,245 flying,17 -fo,4 foal,1 foals,1 foam,4 @@ -9800,8 +9792,6 @@ gypsy,2 gyve,1 gyved,1 gyves,5 -h,2 -ha,230 haberdasher,5 habiliment,1 habiliments,4 @@ -9984,7 +9974,6 @@ hastily,5 hasting,2 hastings,149 hasty,21 -hat,36 hatch,18 hatches,7 hatchet,1 @@ -10325,7 +10314,6 @@ hitting,2 hive,6 hives,1 hizzing,1 -ho,209 hoa,5 hoar,7 hoard,4 @@ -10703,12 +10691,10 @@ ignorant,48 ii,171 iii,145 iiii,1 -il,18 ilbow,1 ild,1 ilion,6 ilium,5 -ill,279 illegitimate,2 illinois,222 illiterate,1 @@ -10727,7 +10713,6 @@ illustrious,5 illyria,13 illyrian,1 ils,2 -im,1 image,46 imagery,1 images,11 @@ -11812,7 +11797,6 @@ knowledge,78 known,188 knows,213 kramer,1 -l,23 la,78 laban,2 label,2 @@ -12376,7 +12360,6 @@ living,121 livings,1 lizard,2 lizards,2 -ll,2409 llous,2 lnd,1 lo,74 @@ -12640,7 +12623,6 @@ lym,1 lymoges,2 lynn,1 lysander,103 -m,30 ma,7 mab,3 macbeth,291 @@ -13833,7 +13815,6 @@ myself,567 myst,1 mysteries,4 mystery,17 -n,159 nag,2 nage,1 nags,1 @@ -14066,7 +14047,6 @@ nit,2 nly,1 nnight,2 nnights,1 -no,3814 noah,2 nob,2 nobility,37 @@ -14231,7 +14211,6 @@ ny,2 nym,63 nymph,9 nymphs,12 -o,3053 oak,27 oaken,2 oaks,5 @@ -14473,7 +14452,6 @@ opprobriously,1 oppugnancy,1 opulency,1 opulent,2 -or,3199 oracle,27 oracles,3 orange,5 @@ -14563,7 +14541,6 @@ oui,6 ounce,6 ounces,1 ouphes,2 -our,3066 ours,88 ourself,24 ourselves,115 @@ -16778,7 +16755,6 @@ quoted,5 quotes,1 quoth,66 quotidian,2 -r,92 rabbit,4 rabble,13 rabblement,2 @@ -17960,7 +17936,6 @@ ruttish,1 ry,60 rye,3 rything,1 -s,7734 sa,6 saba,1 sabbath,2 @@ -20577,7 +20552,6 @@ syracusians,1 syria,6 syrups,2 system,1 -t,1213 ta,96 taber,1 table,60 @@ -20865,7 +20839,6 @@ tetter,3 tevil,1 tewksbury,8 text,11 -th,1177 thaes,1 thames,7 than,1885 @@ -21734,7 +21707,6 @@ tyrant,60 tyrants,10 tyrian,1 tyrrel,21 -u,6 ubique,1 udders,1 udge,1 @@ -22597,7 +22569,6 @@ utterly,8 uttermost,7 utters,5 uy,1 -v,99 va,1 vacancy,4 vacant,6 @@ -22713,7 +22684,6 @@ vaunts,2 vauvado,1 vaux,9 vaward,5 -ve,1 veal,2 vede,1 vehemence,1 @@ -23047,7 +23017,6 @@ vulnerable,1 vulture,4 vultures,2 vurther,1 -w,2 wad,1 waddled,1 wade,3 @@ -23492,7 +23461,6 @@ whoso,4 whosoe,2 whosoever,2 why,1476 -wi,12 wick,1 wicked,64 wickednes,1 @@ -23605,7 +23573,6 @@ wishing,9 wishtly,1 wisp,1 wist,1 -wit,269 witb,2 witch,94 witchcraft,18 @@ -23863,7 +23830,6 @@ xii,2 xiii,2 xiv,1 xv,1 -y,51 yard,12 yards,5 yare,10 diff --git a/Assign2/hadoop.log b/Assign2/hadoop.log index 4b0c489..a5c2ca7 100644 --- a/Assign2/hadoop.log +++ b/Assign2/hadoop.log @@ -134,3 +134,1824 @@ Caused by: java.lang.NullPointerException Bytes Read=5589889 File Output Format Counters Bytes Written=0 +2017-03-18 08:18:14,295 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 08:18:18,093 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 08:18:18,115 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 08:18:20,321 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 08:18:20,450 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 08:18:21,079 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 08:18:22,754 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1486099625_0001 +2017-03-18 08:18:25,049 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 08:18:25,051 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1486099625_0001 +2017-03-18 08:18:25,063 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 08:18:25,153 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 08:18:25,163 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 08:18:25,683 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 08:18:25,685 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1486099625_0001_m_000000_0 +2017-03-18 08:18:26,040 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 08:18:26,076 INFO org.apache.hadoop.mapreduce.Job: Job job_local1486099625_0001 running in uber mode : false +2017-03-18 08:18:26,078 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 08:18:26,170 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 08:18:26,184 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100.txt:0+5589889 +2017-03-18 08:18:27,663 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 08:18:27,678 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 08:18:27,678 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 08:18:27,679 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 08:18:27,679 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 08:18:27,721 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 08:18:27,778 INFO org.apache.hadoop.mapreduce.lib.input.LineRecordReader: Found UTF-8 BOM and skipped it +2017-03-18 08:18:32,174 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:33,097 INFO org.apache.hadoop.mapreduce.Job: map 1% reduce 0% +2017-03-18 08:18:35,177 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:36,105 INFO org.apache.hadoop.mapreduce.Job: map 3% reduce 0% +2017-03-18 08:18:38,179 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:39,109 INFO org.apache.hadoop.mapreduce.Job: map 5% reduce 0% +2017-03-18 08:18:41,180 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:42,111 INFO org.apache.hadoop.mapreduce.Job: map 12% reduce 0% +2017-03-18 08:18:44,182 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:45,116 INFO org.apache.hadoop.mapreduce.Job: map 23% reduce 0% +2017-03-18 08:18:47,183 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:48,126 INFO org.apache.hadoop.mapreduce.Job: map 35% reduce 0% +2017-03-18 08:18:50,184 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:51,133 INFO org.apache.hadoop.mapreduce.Job: map 46% reduce 0% +2017-03-18 08:18:53,186 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:54,135 INFO org.apache.hadoop.mapreduce.Job: map 57% reduce 0% +2017-03-18 08:18:55,937 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map +2017-03-18 08:18:55,945 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 08:18:55,948 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 08:18:55,950 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 5961092; bufvoid = 104857600 +2017-03-18 08:18:55,951 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 23837324(95349296); length = 2377073/6553600 +2017-03-18 08:18:56,187 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort +2017-03-18 08:18:57,139 INFO org.apache.hadoop.mapreduce.Job: map 67% reduce 0% +2017-03-18 08:18:59,189 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort +2017-03-18 08:19:02,190 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort +2017-03-18 08:19:05,191 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort +2017-03-18 08:19:05,383 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 08:19:05,429 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1486099625_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 08:19:05,442 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 08:19:05,457 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1486099625_0001_m_000000_0' done. +2017-03-18 08:19:05,457 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1486099625_0001_m_000000_0 +2017-03-18 08:19:05,458 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 08:19:05,477 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 08:19:05,478 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1486099625_0001_r_000000_0 +2017-03-18 08:19:05,540 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 08:19:05,560 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 08:19:05,564 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@3b7b4f72 +2017-03-18 08:19:05,743 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 08:19:05,780 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1486099625_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 08:19:06,090 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1486099625_0001_m_000000_0 decomp: 7149632 len: 7149636 to MEMORY +2017-03-18 08:19:06,153 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 08:19:06,262 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 7149632 bytes from map-output for attempt_local1486099625_0001_m_000000_0 +2017-03-18 08:19:06,280 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 7149632, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->7149632 +2017-03-18 08:19:06,291 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 08:19:06,292 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 08:19:06,299 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 08:19:06,338 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 08:19:06,354 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7149628 bytes +2017-03-18 08:19:10,080 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 7149632 bytes to disk to satisfy reduce memory limit +2017-03-18 08:19:10,091 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 7149636 bytes from disk +2017-03-18 08:19:10,121 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 08:19:10,121 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 08:19:10,131 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7149628 bytes +2017-03-18 08:19:10,131 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 08:19:10,215 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 08:19:11,558 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 08:19:12,161 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 67% +2017-03-18 08:19:14,559 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 08:19:15,163 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 86% +2017-03-18 08:19:15,939 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1486099625_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 08:19:15,952 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 08:19:15,963 INFO org.apache.hadoop.mapred.Task: Task attempt_local1486099625_0001_r_000000_0 is allowed to commit now +2017-03-18 08:19:15,970 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1486099625_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/WordCount/_temporary/0/task_local1486099625_0001_r_000000 +2017-03-18 08:19:15,978 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 08:19:15,984 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1486099625_0001_r_000000_0' done. +2017-03-18 08:19:15,987 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1486099625_0001_r_000000_0 +2017-03-18 08:19:15,989 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 08:19:16,089 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1486099625_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 08:19:16,164 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 08:19:16,164 INFO org.apache.hadoop.mapreduce.Job: Job job_local1486099625_0001 failed with state FAILED due to: NA +2017-03-18 08:19:16,290 INFO org.apache.hadoop.mapreduce.Job: Counters: 30 + File System Counters + FILE: Number of bytes read=25479428 + FILE: Number of bytes written=22253006 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=124787 + Map output records=594269 + Map output bytes=5961092 + Map output materialized bytes=7149636 + Input split bytes=116 + Combine input records=0 + Combine output records=0 + Reduce input groups=23927 + Reduce shuffle bytes=7149636 + Reduce input records=594269 + Reduce output records=23927 + Spilled Records=1188538 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=463 + Total committed heap usage (bytes)=331227136 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=5589889 + File Output Format Counters + Bytes Written=249540 +2017-03-18 09:01:02,099 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 09:01:08,193 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 09:01:08,213 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 09:01:10,958 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 09:01:11,100 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 09:01:11,689 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 09:01:14,259 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local858246623_0001 +2017-03-18 09:01:18,212 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 09:01:18,245 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local858246623_0001 +2017-03-18 09:01:18,254 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 09:01:18,356 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:01:18,372 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 09:01:19,033 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 09:01:19,035 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local858246623_0001_m_000000_0 +2017-03-18 09:01:19,258 INFO org.apache.hadoop.mapreduce.Job: Job job_local858246623_0001 running in uber mode : false +2017-03-18 09:01:19,270 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 09:01:19,420 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:01:19,720 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:01:20,053 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 09:01:23,039 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 09:01:23,040 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 09:01:23,732 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 09:01:24,554 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 09:01:24,558 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 09:01:24,558 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 09:01:24,572 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 09:01:24,572 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 09:01:24,836 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 09:01:24,897 INFO org.apache.hadoop.mapred.Task: Task:attempt_local858246623_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 09:01:25,349 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 09:01:25,349 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local858246623_0001_m_000000_0' done. +2017-03-18 09:01:25,363 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local858246623_0001_m_000000_0 +2017-03-18 09:01:25,377 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 09:01:25,439 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 09:01:25,461 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local858246623_0001_r_000000_0 +2017-03-18 09:01:25,551 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:01:25,553 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:01:25,643 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@3d4a4a50 +2017-03-18 09:01:25,875 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 09:01:25,919 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local858246623_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 09:01:26,318 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 09:01:26,370 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local858246623_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 09:01:26,440 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local858246623_0001_m_000000_0 +2017-03-18 09:01:26,471 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 09:01:26,505 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 09:01:26,506 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:01:26,508 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 09:01:26,592 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:01:26,593 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:01:26,609 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 09:01:26,610 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 09:01:26,635 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 09:01:26,635 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:01:26,636 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:01:26,637 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:01:26,850 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 09:01:26,944 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 09:01:26,966 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local858246623_0001 +java.lang.Exception: java.lang.NullPointerException + at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556) +Caused by: java.lang.NullPointerException + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:170) + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1) + at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324) + at java.util.TimSort.sort(TimSort.java:189) + at java.util.TimSort.sort(TimSort.java:173) + at java.util.Arrays.sort(Arrays.java:659) + at java.util.Collections.sort(Collections.java:217) + at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:166) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:214) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1) + at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171) + at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627) + at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) + at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) + at java.util.concurrent.FutureTask.run(FutureTask.java:262) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) + at java.lang.Thread.run(Thread.java:745) +2017-03-18 09:01:27,319 INFO org.apache.hadoop.mapreduce.Job: Job job_local858246623_0001 failed with state FAILED due to: NA +2017-03-18 09:01:27,573 INFO org.apache.hadoop.mapreduce.Job: Counters: 30 + File System Counters + FILE: Number of bytes read=441 + FILE: Number of bytes written=276371 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=0 + Reduce shuffle bytes=454 + Reduce input records=0 + Reduce output records=0 + Spilled Records=27 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=346 + Total committed heap usage (bytes)=165613568 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=0 +2017-03-18 09:31:14,110 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 09:31:18,747 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 09:31:18,757 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 09:31:23,780 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 09:31:23,837 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 09:31:24,319 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 09:31:26,140 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local805279743_0001 +2017-03-18 09:31:29,121 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 09:31:29,146 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local805279743_0001 +2017-03-18 09:31:29,180 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 09:31:29,413 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:31:29,437 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 09:31:30,024 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 09:31:30,028 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local805279743_0001_m_000000_0 +2017-03-18 09:31:30,184 INFO org.apache.hadoop.mapreduce.Job: Job job_local805279743_0001 running in uber mode : false +2017-03-18 09:31:30,188 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 09:31:30,374 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:31:30,520 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:31:30,659 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 09:31:32,579 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 09:31:32,581 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 09:31:32,582 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 09:31:32,582 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 09:31:32,582 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 09:31:32,678 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 09:31:32,914 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 09:31:32,914 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 09:31:32,918 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 09:31:32,919 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 09:31:32,920 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 09:31:33,203 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 09:31:33,488 INFO org.apache.hadoop.mapred.Task: Task:attempt_local805279743_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 09:31:33,718 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 09:31:33,721 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local805279743_0001_m_000000_0' done. +2017-03-18 09:31:33,721 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local805279743_0001_m_000000_0 +2017-03-18 09:31:33,724 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 09:31:33,742 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 09:31:33,742 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local805279743_0001_r_000000_0 +2017-03-18 09:31:33,861 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:31:33,862 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:31:33,907 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@6d550439 +2017-03-18 09:31:34,115 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 09:31:34,150 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local805279743_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 09:31:34,321 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 09:31:34,642 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local805279743_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 09:31:34,691 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local805279743_0001_m_000000_0 +2017-03-18 09:31:34,718 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 09:31:34,734 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 09:31:34,738 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:31:34,739 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 09:31:34,811 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:31:34,815 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:31:34,838 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 09:31:34,839 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 09:31:34,851 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 09:31:34,854 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:31:34,855 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:31:34,861 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:31:35,210 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 09:31:35,300 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 09:31:35,325 INFO org.apache.hadoop.mapreduce.Job: Job job_local805279743_0001 failed with state FAILED due to: NA +2017-03-18 09:31:35,359 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local805279743_0001 +java.lang.Exception: java.lang.NullPointerException + at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556) +Caused by: java.lang.NullPointerException + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:170) + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1) + at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324) + at java.util.TimSort.sort(TimSort.java:189) + at java.util.TimSort.sort(TimSort.java:173) + at java.util.Arrays.sort(Arrays.java:659) + at java.util.Collections.sort(Collections.java:217) + at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:165) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:213) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1) + at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171) + at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627) + at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) + at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) + at java.util.concurrent.FutureTask.run(FutureTask.java:262) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) + at java.lang.Thread.run(Thread.java:745) +2017-03-18 09:31:35,574 INFO org.apache.hadoop.mapreduce.Job: Counters: 30 + File System Counters + FILE: Number of bytes read=441 + FILE: Number of bytes written=276371 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=0 + Reduce shuffle bytes=454 + Reduce input records=0 + Reduce output records=0 + Spilled Records=27 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=325 + Total committed heap usage (bytes)=165613568 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=0 +2017-03-18 09:34:34,236 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 09:34:37,311 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 09:34:37,333 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 09:34:39,465 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 09:34:39,510 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 09:34:40,107 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 09:34:41,786 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1657624619_0001 +2017-03-18 09:34:43,586 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 09:34:43,588 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1657624619_0001 +2017-03-18 09:34:43,604 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 09:34:43,680 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:34:43,691 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 09:34:44,104 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 09:34:44,105 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1657624619_0001_m_000000_0 +2017-03-18 09:34:44,380 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:34:44,514 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:34:44,590 INFO org.apache.hadoop.mapreduce.Job: Job job_local1657624619_0001 running in uber mode : false +2017-03-18 09:34:44,593 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 09:34:44,605 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 09:34:45,882 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 09:34:45,967 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 09:34:46,143 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 09:34:46,150 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 09:34:46,152 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 09:34:46,155 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 09:34:46,156 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 09:34:46,625 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 09:34:46,666 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1657624619_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 09:34:46,757 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 09:34:46,769 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1657624619_0001_m_000000_0' done. +2017-03-18 09:34:46,771 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1657624619_0001_m_000000_0 +2017-03-18 09:34:46,774 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 09:34:46,793 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 09:34:46,794 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1657624619_0001_r_000000_0 +2017-03-18 09:34:46,861 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:34:46,873 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:34:46,903 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@17b42596 +2017-03-18 09:34:47,055 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 09:34:47,112 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1657624619_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 09:34:47,365 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1657624619_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 09:34:47,394 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1657624619_0001_m_000000_0 +2017-03-18 09:34:47,406 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 09:34:47,430 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 09:34:47,433 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:34:47,439 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 09:34:47,496 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:34:47,514 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:34:47,523 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 09:34:47,525 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 09:34:47,536 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 09:34:47,539 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:34:47,540 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:34:47,543 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:34:47,602 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 09:34:48,903 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 09:34:48,996 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1657624619_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 09:34:49,029 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:34:49,030 INFO org.apache.hadoop.mapred.Task: Task attempt_local1657624619_0001_r_000000_0 is allowed to commit now +2017-03-18 09:34:49,032 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1657624619_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1657624619_0001_r_000000 +2017-03-18 09:34:49,051 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 09:34:49,066 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1657624619_0001_r_000000_0' done. +2017-03-18 09:34:49,067 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1657624619_0001_r_000000_0 +2017-03-18 09:34:49,067 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 09:34:49,153 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1657624619_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 09:34:49,610 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 09:34:49,611 INFO org.apache.hadoop.mapreduce.Job: Job job_local1657624619_0001 failed with state FAILED due to: NA +2017-03-18 09:34:49,781 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=556327 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=402 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=191 +2017-03-18 09:41:04,072 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 09:41:07,127 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 09:41:07,155 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 09:41:09,446 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 09:41:09,559 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 09:41:10,078 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 09:41:11,652 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1710867457_0001 +2017-03-18 09:41:13,534 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 09:41:13,536 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1710867457_0001 +2017-03-18 09:41:13,552 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 09:41:13,625 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:41:13,642 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 09:41:14,050 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 09:41:14,052 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1710867457_0001_m_000000_0 +2017-03-18 09:41:14,328 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:41:14,459 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:41:14,552 INFO org.apache.hadoop.mapreduce.Job: Job job_local1710867457_0001 running in uber mode : false +2017-03-18 09:41:14,561 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 09:41:14,575 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 09:41:17,724 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 09:41:17,724 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 09:41:17,725 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 09:41:17,725 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 09:41:17,725 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 09:41:17,856 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 09:41:18,043 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 09:41:18,043 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 09:41:18,046 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 09:41:18,049 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 09:41:18,104 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 09:41:18,279 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 09:41:18,363 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1710867457_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 09:41:18,490 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 09:41:18,496 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1710867457_0001_m_000000_0' done. +2017-03-18 09:41:18,497 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1710867457_0001_m_000000_0 +2017-03-18 09:41:18,499 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 09:41:18,514 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 09:41:18,514 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1710867457_0001_r_000000_0 +2017-03-18 09:41:18,597 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 09:41:18,627 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 09:41:18,628 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 09:41:18,803 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@59b8ca86 +2017-03-18 09:41:19,007 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 09:41:19,044 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1710867457_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 09:41:19,394 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1710867457_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 09:41:19,474 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1710867457_0001_m_000000_0 +2017-03-18 09:41:19,503 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 09:41:19,524 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 09:41:19,530 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:41:19,531 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 09:41:19,621 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:41:19,629 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:41:19,668 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 09:41:19,669 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 09:41:19,680 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 09:41:19,681 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 09:41:19,683 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 09:41:19,688 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:41:20,851 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 09:41:20,932 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1710867457_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 09:41:20,969 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 09:41:20,970 INFO org.apache.hadoop.mapred.Task: Task attempt_local1710867457_0001_r_000000_0 is allowed to commit now +2017-03-18 09:41:20,972 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1710867457_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1710867457_0001_r_000000 +2017-03-18 09:41:20,986 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 09:41:21,001 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1710867457_0001_r_000000_0' done. +2017-03-18 09:41:21,001 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1710867457_0001_r_000000_0 +2017-03-18 09:41:21,001 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 09:41:21,089 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1710867457_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 09:41:21,607 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 09:41:21,608 INFO org.apache.hadoop.mapreduce.Job: Job job_local1710867457_0001 failed with state FAILED due to: NA +2017-03-18 09:41:21,819 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=556327 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=412 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=191 +2017-03-18 10:02:31,717 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:02:36,195 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:02:36,217 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:02:39,313 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:02:39,437 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:02:39,946 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:02:41,475 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1299112731_0001 +2017-03-18 10:02:43,571 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:02:43,578 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1299112731_0001 +2017-03-18 10:02:43,622 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:02:43,714 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:02:43,725 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:02:44,329 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:02:44,331 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1299112731_0001_m_000000_0 +2017-03-18 10:02:44,616 INFO org.apache.hadoop.mapreduce.Job: Job job_local1299112731_0001 running in uber mode : false +2017-03-18 10:02:44,639 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:02:44,697 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:02:44,894 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:02:45,012 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:02:48,005 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:02:48,006 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:02:48,006 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:02:48,006 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:02:48,007 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:02:48,081 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:02:48,152 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:02:48,175 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:02:48,175 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:02:48,175 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:02:48,176 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:02:48,295 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:02:48,319 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1299112731_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:02:48,446 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:02:48,447 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1299112731_0001_m_000000_0' done. +2017-03-18 10:02:48,447 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1299112731_0001_m_000000_0 +2017-03-18 10:02:48,447 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:02:48,472 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:02:48,473 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1299112731_0001_r_000000_0 +2017-03-18 10:02:48,592 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:02:48,594 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:02:48,633 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@621e4b65 +2017-03-18 10:02:48,656 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:02:49,394 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:02:49,427 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1299112731_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:02:50,207 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1299112731_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:02:50,243 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1299112731_0001_m_000000_0 +2017-03-18 10:02:50,259 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:02:50,283 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:02:50,285 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:02:50,285 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:02:50,412 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:02:50,413 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:02:50,428 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:02:50,430 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:02:50,448 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:02:50,454 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:02:50,456 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:02:50,459 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:02:51,864 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:02:51,948 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1299112731_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:02:51,965 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:02:51,965 INFO org.apache.hadoop.mapred.Task: Task attempt_local1299112731_0001_r_000000_0 is allowed to commit now +2017-03-18 10:02:51,980 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1299112731_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1299112731_0001_r_000000 +2017-03-18 10:02:51,998 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:02:51,999 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1299112731_0001_r_000000_0' done. +2017-03-18 10:02:51,999 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1299112731_0001_r_000000_0 +2017-03-18 10:02:51,999 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:02:52,093 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1299112731_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:02:52,734 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:02:52,734 INFO org.apache.hadoop.mapreduce.Job: Job job_local1299112731_0001 failed with state FAILED due to: NA +2017-03-18 10:02:52,903 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=556241 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=402 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=105 +2017-03-18 10:09:55,517 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:09:58,604 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:09:58,648 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:10:00,578 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:10:00,646 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:10:01,161 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:10:02,797 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local525023421_0001 +2017-03-18 10:10:04,560 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:10:04,562 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local525023421_0001 +2017-03-18 10:10:04,593 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:10:04,641 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:10:04,655 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:10:05,048 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:10:05,050 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local525023421_0001_m_000000_0 +2017-03-18 10:10:05,306 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:10:05,423 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:10:05,503 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:10:05,595 INFO org.apache.hadoop.mapreduce.Job: Job job_local525023421_0001 running in uber mode : false +2017-03-18 10:10:05,628 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:10:06,690 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:10:06,690 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:10:06,690 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:10:06,691 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:10:06,691 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:10:06,780 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:10:06,915 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:10:06,929 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:10:06,939 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:10:06,942 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:10:06,945 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:10:07,074 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:10:07,102 INFO org.apache.hadoop.mapred.Task: Task:attempt_local525023421_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:10:07,178 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:10:07,201 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local525023421_0001_m_000000_0' done. +2017-03-18 10:10:07,201 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local525023421_0001_m_000000_0 +2017-03-18 10:10:07,205 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:10:07,221 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:10:07,222 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local525023421_0001_r_000000_0 +2017-03-18 10:10:07,279 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:10:07,280 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:10:07,323 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1d8c3d92 +2017-03-18 10:10:07,485 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:10:07,525 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local525023421_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:10:07,648 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:10:09,046 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local525023421_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:10:09,124 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local525023421_0001_m_000000_0 +2017-03-18 10:10:09,139 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:10:09,164 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:10:09,166 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:10:09,167 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:10:09,224 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:10:09,227 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:10:09,244 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:10:09,245 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:10:09,258 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:10:09,261 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:10:09,262 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:10:09,271 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:10:10,314 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:10:10,381 INFO org.apache.hadoop.mapred.Task: Task:attempt_local525023421_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:10:10,409 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:10:10,410 INFO org.apache.hadoop.mapred.Task: Task attempt_local525023421_0001_r_000000_0 is allowed to commit now +2017-03-18 10:10:10,411 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local525023421_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local525023421_0001_r_000000 +2017-03-18 10:10:10,426 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:10:10,443 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local525023421_0001_r_000000_0' done. +2017-03-18 10:10:10,443 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local525023421_0001_r_000000_0 +2017-03-18 10:10:10,444 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:10:10,525 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local525023421_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:10:10,655 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:10:10,655 INFO org.apache.hadoop.mapreduce.Job: Job job_local525023421_0001 failed with state FAILED due to: NA +2017-03-18 10:10:10,821 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=553387 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=376 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=191 +2017-03-18 10:13:43,339 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:13:46,756 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:13:46,793 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:13:49,193 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:13:49,248 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:13:49,894 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:13:52,018 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local156256502_0001 +2017-03-18 10:13:54,111 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:13:54,113 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local156256502_0001 +2017-03-18 10:13:54,135 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:13:54,200 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:13:54,217 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:13:54,625 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:13:54,627 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local156256502_0001_m_000000_0 +2017-03-18 10:13:54,933 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:13:55,069 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:13:55,130 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:13:55,149 INFO org.apache.hadoop.mapreduce.Job: Job job_local156256502_0001 running in uber mode : false +2017-03-18 10:13:55,177 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:13:56,487 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:13:56,501 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:13:56,706 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:13:56,775 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:13:56,784 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:13:56,785 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:13:56,787 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:13:56,787 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:13:56,852 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:13:56,865 INFO org.apache.hadoop.mapred.Task: Task:attempt_local156256502_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:13:56,971 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:13:56,977 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local156256502_0001_m_000000_0' done. +2017-03-18 10:13:56,978 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local156256502_0001_m_000000_0 +2017-03-18 10:13:56,979 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:13:56,996 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:13:56,997 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local156256502_0001_r_000000_0 +2017-03-18 10:13:57,081 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:13:57,083 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:13:57,095 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@72f8516e +2017-03-18 10:13:57,183 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:13:57,313 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:13:57,352 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local156256502_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:13:58,019 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local156256502_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:13:58,075 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local156256502_0001_m_000000_0 +2017-03-18 10:13:58,104 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:13:58,117 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:13:58,118 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:13:58,127 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:13:58,181 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:13:58,181 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:13:58,199 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:13:58,200 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:13:58,217 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:13:58,218 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:13:58,220 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:13:58,226 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:13:59,506 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:13:59,579 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:13:59,603 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local156256502_0001 +java.lang.Exception: java.lang.NullPointerException + at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556) +Caused by: java.lang.NullPointerException + at java.lang.Integer.compareTo(Integer.java:1003) + at java.lang.Integer.compareTo(Integer.java:52) + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:153) + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1) + at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324) + at java.util.TimSort.sort(TimSort.java:189) + at java.util.TimSort.sort(TimSort.java:173) + at java.util.Arrays.sort(Arrays.java:659) + at java.util.Collections.sort(Collections.java:217) + at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:148) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:185) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1) + at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171) + at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627) + at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) + at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) + at java.util.concurrent.FutureTask.run(FutureTask.java:262) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) + at java.lang.Thread.run(Thread.java:745) +2017-03-18 10:14:00,192 INFO org.apache.hadoop.mapreduce.Job: Job job_local156256502_0001 failed with state FAILED due to: NA +2017-03-18 10:14:00,357 INFO org.apache.hadoop.mapreduce.Job: Counters: 30 + File System Counters + FILE: Number of bytes read=441 + FILE: Number of bytes written=276371 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=0 + Reduce shuffle bytes=454 + Reduce input records=0 + Reduce output records=0 + Spilled Records=27 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=318 + Total committed heap usage (bytes)=165613568 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=0 +2017-03-18 10:15:20,636 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:15:23,641 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:15:23,686 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:15:25,708 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:15:25,787 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:15:26,311 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:15:27,986 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local773198118_0001 +2017-03-18 10:15:29,767 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:15:29,769 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local773198118_0001 +2017-03-18 10:15:29,780 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:15:29,852 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:15:29,853 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:15:30,269 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:15:30,271 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local773198118_0001_m_000000_0 +2017-03-18 10:15:30,523 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:15:30,627 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:15:30,714 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:15:30,771 INFO org.apache.hadoop.mapreduce.Job: Job job_local773198118_0001 running in uber mode : false +2017-03-18 10:15:30,774 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:15:32,116 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:15:32,117 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:15:32,158 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:15:32,254 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:15:32,268 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:15:32,270 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:15:32,271 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 419; bufvoid = 104857600 +2017-03-18 10:15:32,277 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214284(104857136); length = 113/6553600 +2017-03-18 10:15:32,382 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:15:32,418 INFO org.apache.hadoop.mapred.Task: Task:attempt_local773198118_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:15:32,504 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:15:32,513 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local773198118_0001_m_000000_0' done. +2017-03-18 10:15:32,513 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local773198118_0001_m_000000_0 +2017-03-18 10:15:32,514 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:15:32,529 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:15:32,530 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local773198118_0001_r_000000_0 +2017-03-18 10:15:32,664 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:15:32,666 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:15:32,756 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@71e2b09b +2017-03-18 10:15:32,787 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:15:33,029 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:15:33,082 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local773198118_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:15:33,366 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local773198118_0001_m_000000_0 decomp: 479 len: 483 to MEMORY +2017-03-18 10:15:33,384 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 479 bytes from map-output for attempt_local773198118_0001_m_000000_0 +2017-03-18 10:15:33,401 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 479, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->479 +2017-03-18 10:15:33,415 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:15:33,417 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:15:33,418 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:15:33,476 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:15:33,487 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 469 bytes +2017-03-18 10:15:33,496 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 479 bytes to disk to satisfy reduce memory limit +2017-03-18 10:15:33,498 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 483 bytes from disk +2017-03-18 10:15:33,513 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:15:33,515 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:15:33,517 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 469 bytes +2017-03-18 10:15:33,519 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:15:34,620 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:15:34,685 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:15:34,694 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local773198118_0001 +java.lang.Exception: java.lang.NullPointerException + at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556) +Caused by: java.lang.NullPointerException + at java.lang.Integer.compareTo(Integer.java:1003) + at java.lang.Integer.compareTo(Integer.java:52) + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:145) + at Preprocessing.Preprocessing_1$Reduce$1.compare(Preprocessing_1.java:1) + at java.util.TimSort.countRunAndMakeAscending(TimSort.java:324) + at java.util.TimSort.sort(TimSort.java:189) + at java.util.TimSort.sort(TimSort.java:173) + at java.util.Arrays.sort(Arrays.java:659) + at java.util.Collections.sort(Collections.java:217) + at Preprocessing.Preprocessing_1$Reduce.sortByValue(Preprocessing_1.java:140) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:177) + at Preprocessing.Preprocessing_1$Reduce.reduce(Preprocessing_1.java:1) + at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171) + at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627) + at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) + at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) + at java.util.concurrent.FutureTask.run(FutureTask.java:262) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) + at java.lang.Thread.run(Thread.java:745) +2017-03-18 10:15:34,796 INFO org.apache.hadoop.mapreduce.Job: Job job_local773198118_0001 failed with state FAILED due to: NA +2017-03-18 10:15:34,977 INFO org.apache.hadoop.mapreduce.Job: Counters: 30 + File System Counters + FILE: Number of bytes read=441 + FILE: Number of bytes written=276400 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=29 + Map output bytes=419 + Map output materialized bytes=483 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=0 + Reduce shuffle bytes=483 + Reduce input records=0 + Reduce output records=0 + Spilled Records=29 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=299 + Total committed heap usage (bytes)=165613568 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=0 +2017-03-18 10:16:35,727 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:16:38,709 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:16:38,719 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:16:40,831 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:16:40,909 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:16:41,416 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:16:43,118 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1537778759_0001 +2017-03-18 10:16:44,879 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:16:44,881 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1537778759_0001 +2017-03-18 10:16:44,904 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:16:44,953 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:16:44,960 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:16:45,357 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:16:45,359 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1537778759_0001_m_000000_0 +2017-03-18 10:16:45,587 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:16:45,695 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:16:45,760 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:16:45,909 INFO org.apache.hadoop.mapreduce.Job: Job job_local1537778759_0001 running in uber mode : false +2017-03-18 10:16:46,263 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:16:46,932 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:16:46,933 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:16:46,979 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:16:47,062 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:16:47,084 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:16:47,085 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:16:47,085 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:16:47,085 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:16:47,186 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:16:47,209 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1537778759_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:16:47,312 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:16:47,317 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1537778759_0001_m_000000_0' done. +2017-03-18 10:16:47,318 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1537778759_0001_m_000000_0 +2017-03-18 10:16:47,318 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:16:47,339 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:16:47,343 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1537778759_0001_r_000000_0 +2017-03-18 10:16:47,408 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:16:47,409 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:16:47,441 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1d8c3d92 +2017-03-18 10:16:47,582 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:16:47,624 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1537778759_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:16:47,865 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1537778759_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:16:47,890 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1537778759_0001_m_000000_0 +2017-03-18 10:16:47,898 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:16:47,920 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:16:47,922 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:16:47,923 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:16:47,981 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:16:47,992 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:16:48,001 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:16:48,005 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:16:48,033 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:16:48,039 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:16:48,056 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:16:48,057 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:16:48,268 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:16:49,175 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:16:49,250 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1537778759_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:16:49,281 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:16:49,281 INFO org.apache.hadoop.mapred.Task: Task attempt_local1537778759_0001_r_000000_0 is allowed to commit now +2017-03-18 10:16:49,283 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1537778759_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1537778759_0001_r_000000 +2017-03-18 10:16:49,317 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:16:49,326 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1537778759_0001_r_000000_0' done. +2017-03-18 10:16:49,326 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1537778759_0001_r_000000_0 +2017-03-18 10:16:49,326 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:16:49,412 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1537778759_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:16:50,272 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:16:50,272 INFO org.apache.hadoop.mapreduce.Job: Job job_local1537778759_0001 failed with state FAILED due to: NA +2017-03-18 10:16:50,425 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=556327 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=401 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=191 +2017-03-18 10:17:33,330 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:17:36,373 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:17:36,409 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:17:38,467 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:17:38,568 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:17:39,104 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:17:40,665 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local270399040_0001 +2017-03-18 10:17:42,497 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:17:42,499 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local270399040_0001 +2017-03-18 10:17:42,524 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:17:42,593 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:17:42,607 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:17:43,008 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:17:43,010 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local270399040_0001_m_000000_0 +2017-03-18 10:17:43,262 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:17:43,388 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:17:43,474 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:17:43,524 INFO org.apache.hadoop.mapreduce.Job: Job job_local270399040_0001 running in uber mode : false +2017-03-18 10:17:43,527 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:17:44,458 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:17:44,459 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:17:44,523 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:17:44,609 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:17:44,615 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:17:44,616 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:17:44,616 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:17:44,616 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:17:44,677 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:17:44,701 INFO org.apache.hadoop.mapred.Task: Task:attempt_local270399040_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:17:44,778 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:17:44,797 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local270399040_0001_m_000000_0' done. +2017-03-18 10:17:44,798 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local270399040_0001_m_000000_0 +2017-03-18 10:17:44,799 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:17:44,813 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:17:44,814 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local270399040_0001_r_000000_0 +2017-03-18 10:17:44,875 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:17:44,877 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:17:44,907 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@72f8516e +2017-03-18 10:17:45,037 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:17:45,062 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local270399040_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:17:45,290 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local270399040_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:17:45,313 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local270399040_0001_m_000000_0 +2017-03-18 10:17:45,333 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:17:45,346 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:17:45,347 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:17:45,353 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:17:45,415 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:17:45,416 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:17:45,438 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:17:45,444 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:17:45,447 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:17:45,450 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:17:45,452 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:17:45,455 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:17:45,552 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:17:46,562 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:17:46,602 INFO org.apache.hadoop.mapred.Task: Task:attempt_local270399040_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:17:46,646 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:17:46,647 INFO org.apache.hadoop.mapred.Task: Task attempt_local270399040_0001_r_000000_0 is allowed to commit now +2017-03-18 10:17:46,648 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local270399040_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local270399040_0001_r_000000 +2017-03-18 10:17:46,662 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:17:46,663 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local270399040_0001_r_000000_0' done. +2017-03-18 10:17:46,663 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local270399040_0001_r_000000_0 +2017-03-18 10:17:46,675 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:17:46,741 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local270399040_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:17:47,556 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:17:47,557 INFO org.apache.hadoop.mapreduce.Job: Job job_local270399040_0001 failed with state FAILED due to: NA +2017-03-18 10:17:47,666 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=553387 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=352 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=191 +2017-03-18 10:22:19,077 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:22:22,159 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:22:22,175 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:22:23,976 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:22:24,049 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:22:24,419 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:22:25,911 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local315696638_0001 +2017-03-18 10:22:27,672 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:22:27,674 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local315696638_0001 +2017-03-18 10:22:27,699 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:22:27,747 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:22:27,760 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:22:28,186 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:22:28,188 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local315696638_0001_m_000000_0 +2017-03-18 10:22:28,454 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:22:28,572 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:22:28,660 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:22:28,702 INFO org.apache.hadoop.mapreduce.Job: Job job_local315696638_0001 running in uber mode : false +2017-03-18 10:22:28,706 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:22:29,934 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:22:29,937 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:22:29,937 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:22:29,938 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:22:29,939 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:22:30,041 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:22:30,153 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:22:30,153 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:22:30,156 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:22:30,157 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:22:30,158 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:22:30,251 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:22:30,285 INFO org.apache.hadoop.mapred.Task: Task:attempt_local315696638_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:22:30,389 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:22:30,392 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local315696638_0001_m_000000_0' done. +2017-03-18 10:22:30,395 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local315696638_0001_m_000000_0 +2017-03-18 10:22:30,397 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:22:30,423 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:22:30,423 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local315696638_0001_r_000000_0 +2017-03-18 10:22:30,515 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:22:30,516 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:22:30,560 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@61ea9ce5 +2017-03-18 10:22:30,728 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:22:30,778 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:22:30,810 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local315696638_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:22:31,123 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local315696638_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:22:31,197 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local315696638_0001_m_000000_0 +2017-03-18 10:22:31,205 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:22:31,222 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:22:31,224 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:22:31,225 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:22:31,302 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:22:31,302 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:22:31,308 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:22:31,310 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:22:31,321 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:22:31,324 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:22:31,325 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:22:31,332 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:22:32,704 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:22:32,785 INFO org.apache.hadoop.mapred.Task: Task:attempt_local315696638_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:22:32,822 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:22:32,823 INFO org.apache.hadoop.mapred.Task: Task attempt_local315696638_0001_r_000000_0 is allowed to commit now +2017-03-18 10:22:32,824 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local315696638_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local315696638_0001_r_000000 +2017-03-18 10:22:32,839 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:22:32,839 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local315696638_0001_r_000000_0' done. +2017-03-18 10:22:32,840 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local315696638_0001_r_000000_0 +2017-03-18 10:22:32,847 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:22:32,941 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local315696638_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:22:33,739 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:22:33,739 INFO org.apache.hadoop.mapreduce.Job: Job job_local315696638_0001 failed with state FAILED due to: NA +2017-03-18 10:22:33,890 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=553366 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=370 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=170 +2017-03-18 10:23:34,780 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:23:37,781 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:23:37,804 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:23:39,906 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:23:39,934 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:23:40,482 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:23:42,200 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1059822994_0001 +2017-03-18 10:23:43,991 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:23:43,993 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1059822994_0001 +2017-03-18 10:23:44,009 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:23:44,076 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:23:44,083 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:23:44,493 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:23:44,494 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1059822994_0001_m_000000_0 +2017-03-18 10:23:44,761 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:23:44,863 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:23:44,943 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+264 +2017-03-18 10:23:44,997 INFO org.apache.hadoop.mapreduce.Job: Job job_local1059822994_0001 running in uber mode : false +2017-03-18 10:23:45,000 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:23:46,315 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:23:46,552 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:23:46,686 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:23:46,691 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:23:46,692 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:23:46,692 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:23:46,692 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:23:46,791 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:23:46,829 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1059822994_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:23:46,891 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:23:46,910 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1059822994_0001_m_000000_0' done. +2017-03-18 10:23:46,916 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1059822994_0001_m_000000_0 +2017-03-18 10:23:46,918 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:23:46,937 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:23:46,938 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1059822994_0001_r_000000_0 +2017-03-18 10:23:46,985 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:23:46,987 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:23:47,006 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:23:47,019 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@621e4b65 +2017-03-18 10:23:47,182 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:23:47,211 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1059822994_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:23:47,457 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1059822994_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:23:47,473 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1059822994_0001_m_000000_0 +2017-03-18 10:23:47,485 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:23:47,507 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:23:47,508 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:23:47,509 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:23:47,550 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:23:47,553 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:23:47,571 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:23:47,573 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:23:47,576 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:23:47,580 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:23:47,583 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:23:47,592 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:23:48,680 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:23:48,764 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1059822994_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:23:48,784 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:23:48,785 INFO org.apache.hadoop.mapred.Task: Task attempt_local1059822994_0001_r_000000_0 is allowed to commit now +2017-03-18 10:23:48,786 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1059822994_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1059822994_0001_r_000000 +2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1059822994_0001_r_000000_0' done. +2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1059822994_0001_r_000000_0 +2017-03-18 10:23:48,810 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:23:48,892 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1059822994_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:23:49,011 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:23:49,012 INFO org.apache.hadoop.mapreduce.Job: Job job_local1059822994_0001 failed with state FAILED due to: NA +2017-03-18 10:23:49,202 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1822 + FILE: Number of bytes written=556331 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=4 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=395 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=264 + File Output Format Counters + Bytes Written=195 +2017-03-18 10:25:10,795 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:25:13,885 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:25:13,914 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:25:15,934 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:25:16,030 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:25:16,475 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:25:18,086 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1796876123_0001 +2017-03-18 10:25:19,864 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:25:19,865 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1796876123_0001 +2017-03-18 10:25:19,882 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:25:19,944 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:25:19,957 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:25:20,353 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:25:20,355 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1796876123_0001_m_000000_0 +2017-03-18 10:25:20,601 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:25:20,721 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:25:20,840 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+265 +2017-03-18 10:25:20,900 INFO org.apache.hadoop.mapreduce.Job: Job job_local1796876123_0001 running in uber mode : false +2017-03-18 10:25:20,904 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:25:21,990 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:25:22,008 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:25:22,010 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:25:22,010 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:25:22,011 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:25:22,041 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:25:22,092 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:25:22,114 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:25:22,115 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:25:22,115 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:25:22,115 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:25:22,182 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:25:22,206 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1796876123_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:25:22,299 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:25:22,310 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1796876123_0001_m_000000_0' done. +2017-03-18 10:25:22,311 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1796876123_0001_m_000000_0 +2017-03-18 10:25:22,313 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:25:22,331 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:25:22,332 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1796876123_0001_r_000000_0 +2017-03-18 10:25:22,403 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:25:22,404 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:25:22,436 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@72f8516e +2017-03-18 10:25:22,557 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:25:22,587 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1796876123_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:25:22,859 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1796876123_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:25:22,878 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local1796876123_0001_m_000000_0 +2017-03-18 10:25:22,896 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:25:22,909 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:25:22,919 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:25:22,920 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:25:22,928 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:25:22,989 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:25:22,992 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:25:23,015 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:25:23,029 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:25:23,096 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:25:23,097 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:25:23,098 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:25:23,105 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:25:24,357 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:25:24,432 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1796876123_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:25:24,467 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:25:24,472 INFO org.apache.hadoop.mapred.Task: Task attempt_local1796876123_0001_r_000000_0 is allowed to commit now +2017-03-18 10:25:24,474 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1796876123_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local1796876123_0001_r_000000 +2017-03-18 10:25:24,488 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:25:24,497 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1796876123_0001_r_000000_0' done. +2017-03-18 10:25:24,497 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1796876123_0001_r_000000_0 +2017-03-18 10:25:24,497 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:25:24,585 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1796876123_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:25:24,913 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:25:24,914 INFO org.apache.hadoop.mapreduce.Job: Job job_local1796876123_0001 failed with state FAILED due to: NA +2017-03-18 10:25:25,061 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1824 + FILE: Number of bytes written=556331 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=5 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=389 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=265 + File Output Format Counters + Bytes Written=195 +2017-03-18 10:28:16,986 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2017-03-18 10:28:20,013 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2017-03-18 10:28:20,040 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= +2017-03-18 10:28:21,855 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). +2017-03-18 10:28:21,901 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1 +2017-03-18 10:28:22,330 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1 +2017-03-18 10:28:23,799 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local165609805_0001 +2017-03-18 10:28:25,593 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/ +2017-03-18 10:28:25,594 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local165609805_0001 +2017-03-18 10:28:25,599 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null +2017-03-18 10:28:25,657 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:28:25,668 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter +2017-03-18 10:28:26,042 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks +2017-03-18 10:28:26,044 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local165609805_0001_m_000000_0 +2017-03-18 10:28:26,287 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:28:26,392 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:28:26,470 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100_test.txt:0+265 +2017-03-18 10:28:26,599 INFO org.apache.hadoop.mapreduce.Job: Job job_local165609805_0001 running in uber mode : false +2017-03-18 10:28:26,608 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0% +2017-03-18 10:28:27,393 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2017-03-18 10:28:27,395 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2017-03-18 10:28:27,396 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2017-03-18 10:28:27,396 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2017-03-18 10:28:27,397 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2017-03-18 10:28:27,422 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2017-03-18 10:28:27,485 INFO org.apache.hadoop.mapred.LocalJobRunner: +2017-03-18 10:28:27,500 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output +2017-03-18 10:28:27,510 INFO org.apache.hadoop.mapred.MapTask: Spilling map output +2017-03-18 10:28:27,510 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 394; bufvoid = 104857600 +2017-03-18 10:28:27,510 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214292(104857168); length = 105/6553600 +2017-03-18 10:28:27,565 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0 +2017-03-18 10:28:27,589 INFO org.apache.hadoop.mapred.Task: Task:attempt_local165609805_0001_m_000000_0 is done. And is in the process of committing +2017-03-18 10:28:27,669 INFO org.apache.hadoop.mapred.LocalJobRunner: map +2017-03-18 10:28:27,679 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local165609805_0001_m_000000_0' done. +2017-03-18 10:28:27,681 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local165609805_0001_m_000000_0 +2017-03-18 10:28:27,683 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete. +2017-03-18 10:28:27,696 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks +2017-03-18 10:28:27,696 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local165609805_0001_r_000000_0 +2017-03-18 10:28:27,747 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1 +2017-03-18 10:28:27,749 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2017-03-18 10:28:27,777 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@7bd00baf +2017-03-18 10:28:27,906 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10 +2017-03-18 10:28:27,958 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local165609805_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events +2017-03-18 10:28:28,177 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local165609805_0001_m_000000_0 decomp: 450 len: 454 to MEMORY +2017-03-18 10:28:28,204 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 450 bytes from map-output for attempt_local165609805_0001_m_000000_0 +2017-03-18 10:28:28,218 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 450, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->450 +2017-03-18 10:28:28,248 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning +2017-03-18 10:28:28,250 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:28:28,259 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs +2017-03-18 10:28:28,293 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:28:28,294 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:28:28,322 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 450 bytes to disk to satisfy reduce memory limit +2017-03-18 10:28:28,326 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 454 bytes from disk +2017-03-18 10:28:28,329 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce +2017-03-18 10:28:28,333 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments +2017-03-18 10:28:28,343 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 440 bytes +2017-03-18 10:28:28,344 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:28:28,621 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0% +2017-03-18 10:28:29,441 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords +2017-03-18 10:28:29,487 INFO org.apache.hadoop.mapred.Task: Task:attempt_local165609805_0001_r_000000_0 is done. And is in the process of committing +2017-03-18 10:28:29,534 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied. +2017-03-18 10:28:29,534 INFO org.apache.hadoop.mapred.Task: Task attempt_local165609805_0001_r_000000_0 is allowed to commit now +2017-03-18 10:28:29,536 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local165609805_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1_test/_temporary/0/task_local165609805_0001_r_000000 +2017-03-18 10:28:29,554 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce +2017-03-18 10:28:29,567 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local165609805_0001_r_000000_0' done. +2017-03-18 10:28:29,567 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local165609805_0001_r_000000_0 +2017-03-18 10:28:29,568 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete. +2017-03-18 10:28:29,624 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100% +2017-03-18 10:28:29,642 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local165609805_0001 +java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest + at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573) +Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest + at java.net.URLClassLoader$1.run(URLClassLoader.java:366) + at java.net.URLClassLoader$1.run(URLClassLoader.java:355) + at java.security.AccessController.doPrivileged(Native Method) + at java.net.URLClassLoader.findClass(URLClassLoader.java:354) + at java.lang.ClassLoader.loadClass(ClassLoader.java:425) + at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) + at java.lang.ClassLoader.loadClass(ClassLoader.java:358) + ... 1 more +2017-03-18 10:28:30,631 INFO org.apache.hadoop.mapreduce.Job: Job job_local165609805_0001 failed with state FAILED due to: NA +2017-03-18 10:28:30,746 INFO org.apache.hadoop.mapreduce.Job: Counters: 31 + File System Counters + FILE: Number of bytes read=1824 + FILE: Number of bytes written=553387 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + Map-Reduce Framework + Map input records=5 + Map output records=27 + Map output bytes=394 + Map output materialized bytes=454 + Input split bytes=121 + Combine input records=0 + Combine output records=0 + Reduce input groups=4 + Reduce shuffle bytes=454 + Reduce input records=27 + Reduce output records=4 + Spilled Records=54 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=362 + Total committed heap usage (bytes)=331227136 + Preprocessing.Preprocessing_1$COUNTS + COUNT_LINES=4 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=265 + File Output Format Counters + Bytes Written=191 diff --git a/Assign2/pg100_test.txt b/Assign2/pg100_test.txt index 6cb295c..a407ab2 100644 --- a/Assign2/pg100_test.txt +++ b/Assign2/pg100_test.txt @@ -1,4 +1,5 @@ This eBook is for the use of anyone anywhere at no cost and with anyone cost -almost no restrictions whatsoever. You may copy it, give it away or +almost no restrictions whatsoever. You may copy it, give it away or + re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org diff --git a/Assign2/pg100_test.txt~ b/Assign2/pg100_test.txt~ index b212032..6cb295c 100644 --- a/Assign2/pg100_test.txt~ +++ b/Assign2/pg100_test.txt~ @@ -1,4 +1,4 @@ -This eBook is for the use of anyone anywhere at no cost and with +This eBook is for the use of anyone anywhere at no cost and with anyone cost almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org diff --git a/Assign2/src/Preprocessing/Preprocessing_1.java b/Assign2/src/Preprocessing/Preprocessing_1.java index 3ba0873..8018614 100644 --- a/Assign2/src/Preprocessing/Preprocessing_1.java +++ b/Assign2/src/Preprocessing/Preprocessing_1.java @@ -1,10 +1,24 @@ package Preprocessing; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map.Entry; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; @@ -19,19 +33,8 @@ import org.apache.hadoop.util.ToolRunner; -import java.io.*; -import java.util.*; - - public class Preprocessing_1 extends Configured implements Tool { - - public static enum COUNTER { - COUNT_LINES - }; - - - public static void main(String[] args) throws Exception { System.out.println(Arrays.toString(args)); @@ -41,6 +44,8 @@ public class Preprocessing_1 extends Configured implements Tool { System.exit(res); } + + public static enum COUNTS {COUNT_LINES}; @Override public int run(String[] args) throws Exception { @@ -72,14 +77,14 @@ public class Preprocessing_1 extends Configured implements Tool { job.waitForCompletion(true); - // Write counter to file - long counter = job.getCounters().findCounter(COUNTER.COUNT_LINES).getValue(); - Path outFile = new Path(new Path(args[1]),"NB_LINES_AFTER_Preprocessing.txt"); - BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - fs.create(outFile, true))); - writer.write(String.valueOf(counter)); - writer.close(); + + long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue(); + Path countFile = new Path(new Path(args[1]),"nb_output_records.txt"); + File file = new File(countFile.toString()); + FileWriter fileWriter = new FileWriter(file); + fileWriter.write(String.valueOf(counter)); + fileWriter.flush(); + fileWriter.close(); return 0; } @@ -91,167 +96,92 @@ public class Preprocessing_1 extends Configured implements Tool { public static class Map extends Mapper<LongWritable, Text, LongWritable, Text> { private Text word = new Text(); - private HashSet<String> stopwords = new HashSet<String>(); + String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords"; + String stopwords = new String(Files.readAllBytes(Paths.get(stopwords_file))); - public Map() throws NumberFormatException, IOException{ - // Default constructor to load one time the stop words file - /* Read file of stopwords*/ - BufferedReader Reader = new BufferedReader( - new FileReader( - new File( - "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords"))); - - /* Add each line (word) in the variable stopwords*/ - String pattern; - while ((pattern = Reader.readLine()) != null) { - stopwords.add(pattern.toLowerCase()); - } - - Reader.close(); - + public Map() throws IOException{ + System.out.println(stopwords); } @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - - - - for (String token: value.toString().replaceAll("[^a-zA-Z0-9 ]", " ").split("\\s+")) { - /* if word not in stop words list then we set word with the value then write it into context */ - - - if (!stopwords.contains(token.toLowerCase())) { - // if token only contains a blank character we do not write it - - - + if (!stopwords.contains(token.toLowerCase())) { word.set(token.toLowerCase()); context.write(key, word); } - - } - } } - - - - public static class Reduce extends Reducer<LongWritable, Text, LongWritable, Text> { - - /* Initialise one time a hashmap to store each word of the vocabulary and its global - * frequency in pg100.txt from the wordcountpg100.txt */ - private static HashMap<String,Integer> map_word_count = new HashMap<String,Integer>(); + + private static HashMap<String,Integer> word_freq = new HashMap<String,Integer>(); - public Reduce() throws NumberFormatException, IOException{ - - /*Default constructor to store (word,frequency) pair - * in the created hashmap from the file wordcountpg100.txt */ + public Reduce() throws IOException{ - BufferedReader Reader_count = new BufferedReader( - new FileReader( - new File( - "/home/cloudera/workspace/bpa/Assign2/WordCount/WordCount" - ))); - - String line; + String wordcount_file = "/home/cloudera/workspace/bpa/Assign2/WordCount/WordCount"; + String wordcount = new String(Files.readAllBytes( + Paths.get(wordcount_file))); - while ((line = Reader_count.readLine()) != null) - { - String[] parts = line.split(",", 2); - if (parts.length >= 2) - { - - map_word_count.put(parts[0].toString(),new Integer (parts[1])); - - } else { - System.out.println("ignoring line: " + line); - } + for (String line : wordcount.split("\n")){ + String[] word_count = line.split(","); + word_freq.put(word_count[0],new Integer(word_count[1])); + } - Reader_count.close(); - } - + /*SOURCE : http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java + */ public static <K, V extends Comparable<? super V>> LinkedHashSet<String> - sortByValue( HashMap<K, V> map ){ - List<java.util.Map.Entry<K, V>> list = new LinkedList<>( map.entrySet() ); + sortHM( HashMap<K, V> map ){ + List<Entry<K, V>> list = + new LinkedList<>( map.entrySet() ); - // sort the list of pairs - - Collections.sort( list, new Comparator<java.util.Map.Entry<K, V>>() + Collections.sort( list, new Comparator<Entry<K, V>>() { - public int compare( java.util.Map.Entry<K, V> o1, java.util.Map.Entry<K, V> o2 ) + public int compare(Entry<K, V> o1, Entry<K, V> o2 ) { return (o1.getValue()).compareTo(o2.getValue()); } } ); - // Create LinkedHashset to store the word in ascending order - LinkedHashSet<String> result = new LinkedHashSet<String>(); - for (java.util.Map.Entry<K, V> entry : list) + for (Entry<K, V> entry : list) { result.add(entry.getKey().toString()); } return result; } - @Override public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { - - - - - - /*Create a reduced hashmap where each key is a word for the same - * mapper key and the value is the global frequency with the static hashmap - * word_word_count containing the global frequency of word in pg100.txt*/ - - HashMap<String, Integer> map_word_count_key = new HashMap<String, Integer>(); + + HashMap<String, Integer> line_word_count = new HashMap<String, Integer>(); - for (Text val : values) + for (Text token : values) { - /*store the global frequency of each word for words corresponding to a same key*/ - map_word_count_key.put(val.toString(),map_word_count.get(val.toString())); + line_word_count.put(token.toString(), + word_freq.get(token.toString())); } - - // Sort Hashmap and return a LinkedHashset (to keep the order) with word in ascending order - // Using the sortByValue method - - LinkedHashSet<String> setvalue = new LinkedHashSet<String>(); - - setvalue = sortByValue(map_word_count_key); - - /* Concatenate the words in ascending order of frequency */ - - StringBuilder reducedvalue = new StringBuilder(); - for (String val : setvalue) { - - if (reducedvalue.length() !=0){ - reducedvalue.append(' '); - } - - reducedvalue.append(val); + StringBuilder concat_words = new StringBuilder(); + String prefix = ""; + for (String token : sortHM(line_word_count)) { + concat_words.append(prefix); + prefix = " "; + concat_words.append(token); } - - - // write for each line the words in the ascending order if not empty - if(!reducedvalue.toString().isEmpty()){ - // Increment counter - context.getCounter(COUNTER.COUNT_LINES).increment(1); - context.write(key, new Text(reducedvalue.toString())); + + if(!concat_words.toString().isEmpty()){ + context.getCounter(COUNTS.COUNT_LINES).increment(1); + context.write(key, new Text(concat_words.toString())); } } diff --git a/Assign2/src/WordCount/WordCount.java b/Assign2/src/WordCount/WordCount.java index 88784c2..ba848bb 100644 --- a/Assign2/src/WordCount/WordCount.java +++ b/Assign2/src/WordCount/WordCount.java @@ -1,10 +1,16 @@ package WordCount; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; @@ -16,20 +22,6 @@ import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import Preprocessing.Preprocessing_1; -import Preprocessing.Preprocessing_1.COUNTER; -import Preprocessing.Preprocessing_1.Map; -import Preprocessing.Preprocessing_1.Reduce; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.util.Arrays; -import java.util.HashSet; - public class WordCount extends Configured implements Tool { public static void main(String[] args) throws Exception { System.out.println(Arrays.toString(args)); @@ -42,7 +34,7 @@ public class WordCount extends Configured implements Tool { public int run(String[] args) throws Exception { System.out.println(Arrays.toString(args)); Job job = new Job(getConf(), "WordCount"); - job.setJarByClass(Preprocessing_1.class); + job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); @@ -68,8 +60,6 @@ public class WordCount extends Configured implements Tool { } job.waitForCompletion(true); - - return 0; } @@ -77,45 +67,20 @@ public class WordCount extends Configured implements Tool { public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable ONE = new IntWritable(1); private Text word = new Text(); + private String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords"; @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - - - - /* Initialize a hashset variable, set of strings without duplicates*/ - HashSet<String> stopwords = new HashSet<String>(); - - /* Read file of stopwords*/ - BufferedReader Reader = new BufferedReader( - new FileReader( - new File( - "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords"))); - - /* Add each line (word) in the variable stopwords*/ - String pattern; - while ((pattern = Reader.readLine()) != null) { - stopwords.add(pattern.toLowerCase()); - } - + + String stopwords = new String(Files.readAllBytes( + Paths.get(stopwords_file))); + for (String token: value.toString().replaceAll("[^a-zA-Z0-9 ]", " ").split("\\s+")) { - - /* if word not in stop words list then we set word with the value then write it into context */ - - if (!stopwords.contains(token.toLowerCase())) { word.set(token.toLowerCase()); context.write(word, ONE); } - - - - - - - - } } } @@ -128,7 +93,6 @@ public class WordCount extends Configured implements Tool { for (IntWritable val : values) { sum += val.get(); } - context.write(key, new IntWritable(sum)); } -- GitLab