Skip to content
Snippets Groups Projects
Commit d47d6c7c authored by cloudera_vm's avatar cloudera_vm
Browse files

Taking into acount capital letters after Julien's advice

parent 6c23b92d
No related branches found
No related tags found
No related merge requests found
114969
\ No newline at end of file
No preview for this file type
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -2234,3 +2234,407 @@ Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.Http
Bytes Read=5589889
File Output Format Counters
Bytes Written=3559078
2017-03-18 10:45:12,755 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2017-03-18 10:45:15,668 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
2017-03-18 10:45:15,683 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
2017-03-18 10:45:17,496 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-03-18 10:45:17,543 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
2017-03-18 10:45:18,000 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
2017-03-18 10:45:19,468 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1047252402_0001
2017-03-18 10:45:21,154 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
2017-03-18 10:45:21,156 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1047252402_0001
2017-03-18 10:45:21,179 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
2017-03-18 10:45:21,245 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:45:21,254 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-03-18 10:45:21,654 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
2017-03-18 10:45:21,656 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1047252402_0001_m_000000_0
2017-03-18 10:45:21,902 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:45:22,037 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-03-18 10:45:22,112 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100.txt:0+5589889
2017-03-18 10:45:22,164 INFO org.apache.hadoop.mapreduce.Job: Job job_local1047252402_0001 running in uber mode : false
2017-03-18 10:45:22,178 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0%
2017-03-18 10:45:23,281 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
2017-03-18 10:45:23,281 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
2017-03-18 10:45:23,281 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
2017-03-18 10:45:23,281 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
2017-03-18 10:45:23,282 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
2017-03-18 10:45:23,341 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2017-03-18 10:45:23,417 INFO org.apache.hadoop.mapreduce.lib.input.LineRecordReader: Found UTF-8 BOM and skipped it
2017-03-18 10:45:28,181 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:45:28,212 INFO org.apache.hadoop.mapreduce.Job: map 6% reduce 0%
2017-03-18 10:45:31,186 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:45:31,214 INFO org.apache.hadoop.mapreduce.Job: map 28% reduce 0%
2017-03-18 10:45:34,199 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:45:34,234 INFO org.apache.hadoop.mapreduce.Job: map 63% reduce 0%
2017-03-18 10:45:34,571 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:45:34,576 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
2017-03-18 10:45:34,581 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
2017-03-18 10:45:34,582 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 8338168; bufvoid = 104857600
2017-03-18 10:45:34,582 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 23837324(95349296); length = 2377073/6553600
2017-03-18 10:45:37,201 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:45:37,244 INFO org.apache.hadoop.mapreduce.Job: map 67% reduce 0%
2017-03-18 10:45:40,202 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:45:41,200 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
2017-03-18 10:45:41,245 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1047252402_0001_m_000000_0 is done. And is in the process of committing
2017-03-18 10:45:41,252 INFO org.apache.hadoop.mapred.LocalJobRunner: map
2017-03-18 10:45:41,272 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1047252402_0001_m_000000_0' done.
2017-03-18 10:45:41,273 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1047252402_0001_m_000000_0
2017-03-18 10:45:41,273 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
2017-03-18 10:45:41,283 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
2017-03-18 10:45:41,283 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1047252402_0001_r_000000_0
2017-03-18 10:45:41,405 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:45:41,406 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-03-18 10:45:41,458 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@3f545b48
2017-03-18 10:45:41,638 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-03-18 10:45:41,693 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1047252402_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-03-18 10:45:42,020 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1047252402_0001_m_000000_0 decomp: 9526708 len: 9526712 to MEMORY
2017-03-18 10:45:42,246 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 9526708 bytes from map-output for attempt_local1047252402_0001_m_000000_0
2017-03-18 10:45:42,256 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0%
2017-03-18 10:45:42,277 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 9526708, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->9526708
2017-03-18 10:45:42,315 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
2017-03-18 10:45:42,317 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-03-18 10:45:42,321 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-03-18 10:45:42,388 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-03-18 10:45:42,389 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 9526698 bytes
2017-03-18 10:45:45,670 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 9526708 bytes to disk to satisfy reduce memory limit
2017-03-18 10:45:45,675 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 9526712 bytes from disk
2017-03-18 10:45:45,692 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
2017-03-18 10:45:45,697 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-03-18 10:45:45,698 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 9526698 bytes
2017-03-18 10:45:45,705 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-03-18 10:45:46,142 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2017-03-18 10:45:47,429 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:45:48,274 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 67%
2017-03-18 10:45:50,435 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:45:51,293 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 73%
2017-03-18 10:45:53,439 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:45:54,298 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 87%
2017-03-18 10:45:56,443 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:45:57,119 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1047252402_0001_r_000000_0 is done. And is in the process of committing
2017-03-18 10:45:57,139 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:45:57,149 INFO org.apache.hadoop.mapred.Task: Task attempt_local1047252402_0001_r_000000_0 is allowed to commit now
2017-03-18 10:45:57,150 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1047252402_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/Preprocessing_1/_temporary/0/task_local1047252402_0001_r_000000
2017-03-18 10:45:57,151 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:45:57,158 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1047252402_0001_r_000000_0' done.
2017-03-18 10:45:57,160 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1047252402_0001_r_000000_0
2017-03-18 10:45:57,162 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
2017-03-18 10:45:57,286 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1047252402_0001
java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 1 more
2017-03-18 10:45:57,305 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100%
2017-03-18 10:45:57,339 INFO org.apache.hadoop.mapreduce.Job: Job job_local1047252402_0001 failed with state FAILED due to: NA
2017-03-18 10:45:57,537 INFO org.apache.hadoop.mapreduce.Job: Counters: 31
File System Counters
FILE: Number of bytes read=30233580
FILE: Number of bytes written=32693156
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=124787
Map output records=594269
Map output bytes=8338168
Map output materialized bytes=9526712
Input split bytes=116
Combine input records=0
Combine output records=0
Reduce input groups=114969
Reduce shuffle bytes=9526712
Reduce input records=594269
Reduce output records=114969
Spilled Records=1188538
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=510
Total committed heap usage (bytes)=331227136
Preprocessing.Preprocessing_1$COUNTS
COUNT_LINES=114969
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=5589889
File Output Format Counters
Bytes Written=3559078
2017-03-18 10:50:11,147 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2017-03-18 10:50:14,099 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
2017-03-18 10:50:14,144 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
2017-03-18 10:50:16,188 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-03-18 10:50:16,275 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
2017-03-18 10:50:16,777 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
2017-03-18 10:50:18,510 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1109750021_0001
2017-03-18 10:50:20,273 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
2017-03-18 10:50:20,275 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1109750021_0001
2017-03-18 10:50:20,308 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
2017-03-18 10:50:20,383 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:50:20,385 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-03-18 10:50:20,784 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
2017-03-18 10:50:20,787 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1109750021_0001_m_000000_0
2017-03-18 10:50:21,058 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:50:21,156 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-03-18 10:50:21,186 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100.txt:0+5589889
2017-03-18 10:50:21,309 INFO org.apache.hadoop.mapreduce.Job: Job job_local1109750021_0001 running in uber mode : false
2017-03-18 10:50:21,312 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0%
2017-03-18 10:50:22,506 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
2017-03-18 10:50:22,506 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
2017-03-18 10:50:22,507 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
2017-03-18 10:50:22,507 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
2017-03-18 10:50:22,507 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
2017-03-18 10:50:22,574 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2017-03-18 10:50:22,641 INFO org.apache.hadoop.mapreduce.lib.input.LineRecordReader: Found UTF-8 BOM and skipped it
2017-03-18 10:50:27,228 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:27,685 INFO org.apache.hadoop.mapreduce.Job: map 2% reduce 0%
2017-03-18 10:50:30,247 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:30,695 INFO org.apache.hadoop.mapreduce.Job: map 4% reduce 0%
2017-03-18 10:50:33,251 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:33,709 INFO org.apache.hadoop.mapreduce.Job: map 13% reduce 0%
2017-03-18 10:50:36,257 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:36,729 INFO org.apache.hadoop.mapreduce.Job: map 29% reduce 0%
2017-03-18 10:50:39,260 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:39,775 INFO org.apache.hadoop.mapreduce.Job: map 44% reduce 0%
2017-03-18 10:50:42,265 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:42,781 INFO org.apache.hadoop.mapreduce.Job: map 60% reduce 0%
2017-03-18 10:50:43,638 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:50:43,656 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
2017-03-18 10:50:43,657 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
2017-03-18 10:50:43,657 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 6243266; bufvoid = 104857600
2017-03-18 10:50:43,657 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 23688208(94752832); length = 2526189/6553600
2017-03-18 10:50:45,270 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:50:45,785 INFO org.apache.hadoop.mapreduce.Job: map 67% reduce 0%
2017-03-18 10:50:48,271 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:50:51,277 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:50:51,430 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
2017-03-18 10:50:51,495 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1109750021_0001_m_000000_0 is done. And is in the process of committing
2017-03-18 10:50:51,505 INFO org.apache.hadoop.mapred.LocalJobRunner: map
2017-03-18 10:50:51,521 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1109750021_0001_m_000000_0' done.
2017-03-18 10:50:51,525 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1109750021_0001_m_000000_0
2017-03-18 10:50:51,525 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
2017-03-18 10:50:51,539 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
2017-03-18 10:50:51,540 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1109750021_0001_r_000000_0
2017-03-18 10:50:51,640 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:50:51,642 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-03-18 10:50:51,687 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@31245703
2017-03-18 10:50:51,804 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0%
2017-03-18 10:50:51,887 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-03-18 10:50:51,928 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1109750021_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-03-18 10:50:52,312 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1109750021_0001_m_000000_0 decomp: 7506364 len: 7506368 to MEMORY
2017-03-18 10:50:52,509 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 7506364 bytes from map-output for attempt_local1109750021_0001_m_000000_0
2017-03-18 10:50:52,547 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 7506364, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->7506364
2017-03-18 10:50:52,570 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
2017-03-18 10:50:52,572 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-03-18 10:50:52,577 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-03-18 10:50:52,707 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-03-18 10:50:52,711 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7506360 bytes
2017-03-18 10:50:56,396 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 7506364 bytes to disk to satisfy reduce memory limit
2017-03-18 10:50:56,404 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 7506368 bytes from disk
2017-03-18 10:50:56,431 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
2017-03-18 10:50:56,432 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-03-18 10:50:56,441 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7506360 bytes
2017-03-18 10:50:56,442 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-03-18 10:50:56,573 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2017-03-18 10:50:57,759 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:50:57,821 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 67%
2017-03-18 10:51:00,770 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:51:00,857 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 75%
2017-03-18 10:51:03,351 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1109750021_0001_r_000000_0 is done. And is in the process of committing
2017-03-18 10:51:03,355 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:51:03,355 INFO org.apache.hadoop.mapred.Task: Task attempt_local1109750021_0001_r_000000_0 is allowed to commit now
2017-03-18 10:51:03,379 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1109750021_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/WordCount/_temporary/0/task_local1109750021_0001_r_000000
2017-03-18 10:51:03,381 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:51:03,383 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1109750021_0001_r_000000_0' done.
2017-03-18 10:51:03,383 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1109750021_0001_r_000000_0
2017-03-18 10:51:03,384 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
2017-03-18 10:51:03,659 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1109750021_0001
java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 1 more
2017-03-18 10:51:03,860 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100%
2017-03-18 10:51:03,861 INFO org.apache.hadoop.mapreduce.Job: Job job_local1109750021_0001 failed with state FAILED due to: NA
2017-03-18 10:51:04,087 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=26192892
FILE: Number of bytes written=23378494
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=124787
Map output records=631548
Map output bytes=6243266
Map output materialized bytes=7506368
Input split bytes=116
Combine input records=0
Combine output records=0
Reduce input groups=29737
Reduce shuffle bytes=7506368
Reduce input records=631548
Reduce output records=29737
Spilled Records=1263096
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=380
Total committed heap usage (bytes)=331227136
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=5589889
File Output Format Counters
Bytes Written=304832
2017-03-18 10:51:50,188 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2017-03-18 10:51:53,247 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
2017-03-18 10:51:53,276 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
2017-03-18 10:51:55,659 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-03-18 10:51:55,781 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
2017-03-18 10:51:56,393 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
2017-03-18 10:51:58,640 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1070793075_0001
2017-03-18 10:52:00,645 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
2017-03-18 10:52:00,647 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1070793075_0001
2017-03-18 10:52:00,666 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
2017-03-18 10:52:00,737 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:52:00,756 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-03-18 10:52:01,183 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
2017-03-18 10:52:01,185 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1070793075_0001_m_000000_0
2017-03-18 10:52:01,450 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:52:01,574 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-03-18 10:52:01,648 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/bpa/Assign2/pg100.txt:0+5589889
2017-03-18 10:52:01,667 INFO org.apache.hadoop.mapreduce.Job: Job job_local1070793075_0001 running in uber mode : false
2017-03-18 10:52:01,687 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0%
2017-03-18 10:52:02,887 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
2017-03-18 10:52:02,897 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
2017-03-18 10:52:02,897 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
2017-03-18 10:52:02,897 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
2017-03-18 10:52:02,898 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
2017-03-18 10:52:02,943 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2017-03-18 10:52:03,010 INFO org.apache.hadoop.mapreduce.lib.input.LineRecordReader: Found UTF-8 BOM and skipped it
2017-03-18 10:52:07,687 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:52:07,708 INFO org.apache.hadoop.mapreduce.Job: map 4% reduce 0%
2017-03-18 10:52:10,691 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:52:10,717 INFO org.apache.hadoop.mapreduce.Job: map 32% reduce 0%
2017-03-18 10:52:13,321 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-03-18 10:52:13,323 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
2017-03-18 10:52:13,324 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
2017-03-18 10:52:13,324 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 6243266; bufvoid = 104857600
2017-03-18 10:52:13,324 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 23688208(94752832); length = 2526189/6553600
2017-03-18 10:52:13,695 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:52:13,722 INFO org.apache.hadoop.mapreduce.Job: map 67% reduce 0%
2017-03-18 10:52:16,700 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:52:19,705 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-03-18 10:52:21,590 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
2017-03-18 10:52:21,664 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1070793075_0001_m_000000_0 is done. And is in the process of committing
2017-03-18 10:52:21,675 INFO org.apache.hadoop.mapred.LocalJobRunner: map
2017-03-18 10:52:21,680 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1070793075_0001_m_000000_0' done.
2017-03-18 10:52:21,685 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1070793075_0001_m_000000_0
2017-03-18 10:52:21,687 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
2017-03-18 10:52:21,698 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
2017-03-18 10:52:21,698 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1070793075_0001_r_000000_0
2017-03-18 10:52:21,738 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0%
2017-03-18 10:52:21,830 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-03-18 10:52:21,831 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-03-18 10:52:21,902 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@57d47ef4
2017-03-18 10:52:22,094 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-03-18 10:52:22,147 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1070793075_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-03-18 10:52:22,570 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1070793075_0001_m_000000_0 decomp: 7506364 len: 7506368 to MEMORY
2017-03-18 10:52:22,725 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 7506364 bytes from map-output for attempt_local1070793075_0001_m_000000_0
2017-03-18 10:52:22,758 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 7506364, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->7506364
2017-03-18 10:52:22,777 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
2017-03-18 10:52:22,779 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-03-18 10:52:22,780 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-03-18 10:52:22,837 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-03-18 10:52:22,841 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7506360 bytes
2017-03-18 10:52:26,560 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 7506364 bytes to disk to satisfy reduce memory limit
2017-03-18 10:52:26,566 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 7506368 bytes from disk
2017-03-18 10:52:26,622 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
2017-03-18 10:52:26,623 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-03-18 10:52:26,672 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 7506360 bytes
2017-03-18 10:52:26,673 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-03-18 10:52:26,896 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2017-03-18 10:52:27,859 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:52:28,752 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 67%
2017-03-18 10:52:30,956 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:52:31,759 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 78%
2017-03-18 10:52:33,755 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1070793075_0001_r_000000_0 is done. And is in the process of committing
2017-03-18 10:52:33,772 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:52:33,773 INFO org.apache.hadoop.mapred.Task: Task attempt_local1070793075_0001_r_000000_0 is allowed to commit now
2017-03-18 10:52:33,782 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1070793075_0001_r_000000_0' to file:/home/cloudera/workspace/bpa/Assign2/WordCount/_temporary/0/task_local1070793075_0001_r_000000
2017-03-18 10:52:33,801 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-03-18 10:52:33,803 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1070793075_0001_r_000000_0' done.
2017-03-18 10:52:33,803 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1070793075_0001_r_000000_0
2017-03-18 10:52:33,804 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
2017-03-18 10:52:33,953 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1070793075_0001
java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 1 more
2017-03-18 10:52:34,764 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100%
2017-03-18 10:52:34,765 INFO org.apache.hadoop.mapreduce.Job: Job job_local1070793075_0001 failed with state FAILED due to: NA
2017-03-18 10:52:34,882 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=26192892
FILE: Number of bytes written=23378494
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=124787
Map output records=631548
Map output bytes=6243266
Map output materialized bytes=7506368
Input split bytes=116
Combine input records=0
Combine output records=0
Reduce input groups=29737
Reduce shuffle bytes=7506368
Reduce input records=631548
Reduce output records=29737
Spilled Records=1263096
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=444
Total committed heap usage (bytes)=331227136
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=5589889
File Output Format Counters
Bytes Written=304832
......@@ -58,8 +58,7 @@ public class Preprocessing_1 extends Configured implements Tool {
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");
job.setOutputFormatClass(TextOutputFormat.class);;
Path outputFilePath = new Path(args[1]);
......@@ -76,7 +75,7 @@ public class Preprocessing_1 extends Configured implements Tool {
job.waitForCompletion(true);
long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue();
Path countFile = new Path(new Path(args[1]),"nb_output_records.txt");
Path countFile = new Path(new Path(args[1]),"nb_output_records");
File file = new File(countFile.toString());
FileWriter fileWriter = new FileWriter(file);
fileWriter.write(String.valueOf(counter));
......
......@@ -67,18 +67,19 @@ public class WordCount extends Configured implements Tool {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private Text word = new Text();
private String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
String stopwords = new String(Files.readAllBytes(Paths.get(stopwords_file)));
public Map() throws IOException{
System.out.println(stopwords);
}
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String stopwords = new String(Files.readAllBytes(
Paths.get(stopwords_file)));
for (String token: value.toString().replaceAll("[^a-zA-Z0-9 ]", " ").split("\\s+")) {
if (!stopwords.contains(token.toLowerCase())) {
word.set(token.toLowerCase());
if (!stopwords.contains(token)) {
word.set(token);
context.write(word, ONE);
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment