Commit 54c0b218 authored by cloudera's avatar cloudera
Browse files

full project

parent a46b605a
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry kind="lib" path="conf"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/avro.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-beanutils-core.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-beanutils.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-cli.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-codec.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-collections.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-compress.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-configuration.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-digester.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-httpclient.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-io.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-lang.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-logging.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-math3.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/commons-net.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/guava.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-annotations.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-auth.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-common.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-hdfs.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-mapreduce-client-app.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-mapreduce-client-common.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-mapreduce-client-core.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-mapreduce-client-jobclient.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-mapreduce-client-shuffle.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-yarn-api.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-yarn-client.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-yarn-common.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/hadoop-yarn-server-common.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/htrace-core4.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/jetty-util.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/jsr305.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/log4j.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/paranamer.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/protobuf-java.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/slf4j-api.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/snappy-java.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/xmlenc.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/xz.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client/zookeeper.jar"/>
<classpathentry kind="lib" path="/home/cloudera/lib/mrunit-0.9.0-incubating-hadoop2.jar"/>
<classpathentry kind="lib" path="/home/cloudera/lib/junit-4.11.jar"/>
<classpathentry kind="lib" path="/home/cloudera/lib/hamcrest-all-1.1.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jackson-core-asl.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jackson-core-asl-1.8.8.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jackson-mapper-asl.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jackson-mapper-asl-1.8.8.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/slf4j-api.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/slf4j-api-1.7.5.jar"/>
<classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/slf4j-log4j12.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Assign1</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
#Thu Apr 19 09:46:53 CDT 2012
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.source=1.7
# Autogenerated by Cloudera SCM on Tue Apr 10 13:04:56 CDT 2012
# Define some default values that can be overridden by system properties
hadoop.root.logger=INFO,DRFA,console
hadoop.log.dir=.
hadoop.log.file=hadoop.log
# Define the root logger to the system property "hadoop.root.logger".
log4j.rootLogger=${hadoop.root.logger}, EventCounter
# Logging Threshold
log4j.threshhold=ALL
#
# console
# This is left here because hadoop scripts use it if the environment variable
# HADOOP_ROOT_LOGGER is not set
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
#
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
# Rollver at midnight
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
# 30-day backup
#log4j.appender.DRFA.MaxBackupIndex=30
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Pattern format: Date LogLevel LoggerName LogMessage
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
# Debugging Pattern format
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
#=======
# security audit logging
security.audit.logger=INFO, console
log4j.category.SecurityLogger=${security.audit.logger}
log4j.additivity.SecurityLogger=false
log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFAS.File=${hadoop.log.dir}/security/${hadoop.id.str}-auth.log
log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
# hdfs audit logging
hdfs.audit.logger=INFO, console
log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
log4j.appender.DRFAAUDIT=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFAAUDIT.File=${hadoop.log.dir}/audit/hdfs-audit.log
log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd
#
# FSNamesystem Audit logging
# All audit events are logged at INFO level
#
log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
# Jets3t library
log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
#
# Event Counter Appender
# Sends counts of logging messages at different severity levels to Hadoop Metrics.
#
log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
2017-02-18 02:36:22,445 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2017-02-18 02:36:24,389 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
2017-02-18 02:36:24,424 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
2017-02-18 02:37:48,329 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2017-02-18 02:37:49,998 INFO org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
2017-02-18 02:37:50,000 INFO org.apache.hadoop.metrics.jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
2017-02-18 02:37:51,700 WARN org.apache.hadoop.mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-02-18 02:37:51,741 INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to process : 1
2017-02-18 02:37:52,081 INFO org.apache.hadoop.mapreduce.JobSubmitter: number of splits:1
2017-02-18 02:37:53,299 INFO org.apache.hadoop.mapreduce.JobSubmitter: Submitting tokens for job: job_local1560003292_0001
2017-02-18 02:37:54,908 INFO org.apache.hadoop.mapreduce.Job: The url to track the job: http://localhost:8080/
2017-02-18 02:37:54,909 INFO org.apache.hadoop.mapreduce.Job: Running job: job_local1560003292_0001
2017-02-18 02:37:54,923 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter set in config null
2017-02-18 02:37:54,983 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-02-18 02:37:54,991 INFO org.apache.hadoop.mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-02-18 02:37:55,317 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for map tasks
2017-02-18 02:37:55,321 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1560003292_0001_m_000000_0
2017-02-18 02:37:55,573 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-02-18 02:37:55,691 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-02-18 02:37:55,717 INFO org.apache.hadoop.mapred.MapTask: Processing split: file:/home/cloudera/workspace/Assign1/pg100.txt:0+5589886
2017-02-18 02:37:56,108 INFO org.apache.hadoop.mapreduce.Job: Job job_local1560003292_0001 running in uber mode : false
2017-02-18 02:37:56,130 INFO org.apache.hadoop.mapreduce.Job: map 0% reduce 0%
2017-02-18 02:37:56,758 INFO org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
2017-02-18 02:37:56,760 INFO org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
2017-02-18 02:37:56,761 INFO org.apache.hadoop.mapred.MapTask: soft limit at 83886080
2017-02-18 02:37:56,761 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
2017-02-18 02:37:56,761 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
2017-02-18 02:37:56,794 INFO org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2017-02-18 02:38:01,693 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-02-18 02:38:02,182 INFO org.apache.hadoop.mapreduce.Job: map 48% reduce 0%
2017-02-18 02:38:02,562 INFO org.apache.hadoop.mapred.LocalJobRunner: map > map
2017-02-18 02:38:02,565 INFO org.apache.hadoop.mapred.MapTask: Starting flush of map output
2017-02-18 02:38:02,566 INFO org.apache.hadoop.mapred.MapTask: Spilling map output
2017-02-18 02:38:02,567 INFO org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 9171648; bufvoid = 104857600
2017-02-18 02:38:02,567 INFO org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 22120624(88482496); length = 4093773/6553600
2017-02-18 02:38:04,704 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-02-18 02:38:05,194 INFO org.apache.hadoop.mapreduce.Job: map 67% reduce 0%
2017-02-18 02:38:07,715 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-02-18 02:38:10,719 INFO org.apache.hadoop.mapred.LocalJobRunner: map > sort
2017-02-18 02:38:11,523 INFO org.apache.hadoop.mapred.MapTask: Finished spill 0
2017-02-18 02:38:11,593 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1560003292_0001_m_000000_0 is done. And is in the process of committing
2017-02-18 02:38:11,598 INFO org.apache.hadoop.mapred.LocalJobRunner: map
2017-02-18 02:38:11,605 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1560003292_0001_m_000000_0' done.
2017-02-18 02:38:11,609 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1560003292_0001_m_000000_0
2017-02-18 02:38:11,611 INFO org.apache.hadoop.mapred.LocalJobRunner: map task executor complete.
2017-02-18 02:38:11,630 INFO org.apache.hadoop.mapred.LocalJobRunner: Waiting for reduce tasks
2017-02-18 02:38:11,631 INFO org.apache.hadoop.mapred.LocalJobRunner: Starting task: attempt_local1560003292_0001_r_000000_0
2017-02-18 02:38:11,696 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: File Output Committer Algorithm version is 1
2017-02-18 02:38:11,696 INFO org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2017-02-18 02:38:11,724 INFO org.apache.hadoop.mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@7b948a59
2017-02-18 02:38:11,858 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: MergerManager: memoryLimit=679778688, maxSingleShuffleLimit=169944672, mergeThreshold=448653952, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-02-18 02:38:11,897 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: attempt_local1560003292_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-02-18 02:38:12,182 INFO org.apache.hadoop.mapreduce.task.reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1560003292_0001_m_000000_0 decomp: 11218538 len: 11218542 to MEMORY
2017-02-18 02:38:12,220 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 0%
2017-02-18 02:38:12,391 INFO org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput: Read 11218538 bytes from map-output for attempt_local1560003292_0001_m_000000_0
2017-02-18 02:38:12,416 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 11218538, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->11218538
2017-02-18 02:38:12,427 INFO org.apache.hadoop.mapreduce.task.reduce.EventFetcher: EventFetcher is interrupted.. Returning
2017-02-18 02:38:12,428 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-02-18 02:38:12,428 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-02-18 02:38:12,535 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-02-18 02:38:12,553 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 11218535 bytes
2017-02-18 02:38:16,458 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merged 1 segments, 11218538 bytes to disk to satisfy reduce memory limit
2017-02-18 02:38:16,460 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 1 files, 11218542 bytes from disk
2017-02-18 02:38:16,461 INFO org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
2017-02-18 02:38:16,461 INFO org.apache.hadoop.mapred.Merger: Merging 1 sorted segments
2017-02-18 02:38:16,469 INFO org.apache.hadoop.mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 11218535 bytes
2017-02-18 02:38:16,471 INFO org.apache.hadoop.mapred.LocalJobRunner: 1 / 1 copied.
2017-02-18 02:38:16,598 INFO org.apache.hadoop.conf.Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2017-02-18 02:38:17,735 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-02-18 02:38:18,244 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 68%
2017-02-18 02:38:20,745 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-02-18 02:38:21,257 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 88%
2017-02-18 02:38:21,893 INFO org.apache.hadoop.mapred.Task: Task:attempt_local1560003292_0001_r_000000_0 is done. And is in the process of committing
2017-02-18 02:38:21,902 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-02-18 02:38:21,907 INFO org.apache.hadoop.mapred.Task: Task attempt_local1560003292_0001_r_000000_0 is allowed to commit now
2017-02-18 02:38:21,909 INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Saved output of task 'attempt_local1560003292_0001_r_000000_0' to file:/home/cloudera/workspace/Assign1/output/_temporary/0/task_local1560003292_0001_r_000000
2017-02-18 02:38:21,915 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce > reduce
2017-02-18 02:38:21,917 INFO org.apache.hadoop.mapred.Task: Task 'attempt_local1560003292_0001_r_000000_0' done.
2017-02-18 02:38:21,918 INFO org.apache.hadoop.mapred.LocalJobRunner: Finishing task: attempt_local1560003292_0001_r_000000_0
2017-02-18 02:38:21,919 INFO org.apache.hadoop.mapred.LocalJobRunner: reduce task executor complete.
2017-02-18 02:38:21,971 WARN org.apache.hadoop.mapred.LocalJobRunner: job_local1560003292_0001
java.lang.NoClassDefFoundError: org/apache/http/client/methods/HttpUriRequest
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:573)
Caused by: java.lang.ClassNotFoundException: org.apache.http.client.methods.HttpUriRequest
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 1 more
2017-02-18 02:38:22,259 INFO org.apache.hadoop.mapreduce.Job: map 100% reduce 100%
2017-02-18 02:38:22,260 INFO org.apache.hadoop.mapreduce.Job: Job job_local1560003292_0001 failed with state FAILED due to: NA
2017-02-18 02:38:22,337 INFO org.apache.hadoop.mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=33617226
FILE: Number of bytes written=34935992
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=124787
Map output records=1023444
Map output bytes=9171648
Map output materialized bytes=11218542
Input split bytes=112
Combine input records=0
Combine output records=0
Reduce input groups=67780
Reduce shuffle bytes=11218542
Reduce input records=1023444
Reduce output records=67780
Spilled Records=2046888
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=338
Total committed heap usage (bytes)=331227136
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=5589886
File Output Format Counters
Bytes Written=726624
This diff is collapsed.
package Question1;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new WordCount(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
Job job = new Job(getConf(), "WordCount");
job.setJarByClass(WordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
return 0;
}
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private Text word = new Text();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String token: value.toString().split("\\s+")) {
word.set(token);
context.write(word, ONE);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
}
import org.apache.hadoop.mapreduce.Job;
public class StubDriver {
public static void main(String[] args) throws Exception {
/*
* Validate that two arguments were passed from the command line.
*/
if (args.length != 2) {
System.out.printf("Usage: StubDriver <input dir> <output dir>\n");
System.exit(-1);
}
/*
* Instantiate a Job object for your job's configuration.
*/
Job job = new Job();
/*
* Specify the jar file that contains your driver, mapper, and reducer.
* Hadoop will transfer this jar file to nodes in your cluster running
* mapper and reducer tasks.
*/
job.setJarByClass(StubDriver.class);
/*
* Specify an easily-decipherable name for the job.
* This job name will appear in reports and logs.
*/
job.setJobName("Stub Driver");
/*
* TODO implement
*/
/*
* Start the MapReduce job and wait for it to finish.
* If it finishes successfully, return 0. If not, return 1.
*/
boolean success = job.waitForCompletion(true);
System.exit(success ? 0 : 1);
}
}
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class StubMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
/*
* TODO implement
*/
}
}
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class StubReducer extends Reducer<Text, IntWritable, Text, DoubleWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
/*
* TODO implement
*/
}
}
\ No newline at end of file
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import static org.junit.Assert.fail;
import org.junit.Before;
import org.junit.Test;
public class StubTest {
/*
* Declare harnesses that let you test a mapper, a reducer, and
* a mapper and a reducer working together.
*/
MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
ReduceDriver<Text, IntWritable, Text, DoubleWritable> reduceDriver;
MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, DoubleWritable> mapReduceDriver;
/*
* Set up the test. This method will be called before every test.
*/
@Before
public void setUp() {
/*
* Set up the mapper test harness.
*/
StubMapper mapper = new StubMapper();
mapDriver = new MapDriver<LongWritable, Text, Text, IntWritable>();
mapDriver.setMapper(mapper);
/*
* Set up the reducer test harness.
*/
StubReducer reducer = new StubReducer();
reduceDriver = new ReduceDriver<Text, IntWritable, Text, DoubleWritable>();
reduceDriver.setReducer(reducer);
/*
* Set up the mapper/reducer test harness.
*/
mapReduceDriver = new MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, DoubleWritable>();
mapReduceDriver.setMapper(mapper);
mapReduceDriver.setReducer(reducer);
}
/*
* Test the mapper.
*/
@Test
public void testMapper() {
/*
* For this test, the mapper's input will be "1 cat cat dog"
* TODO: implement
*/
fail("Please implement test.");
}
/*
* Test the reducer.
*/
@Test
public void testReducer() {
/*
* For this test, the reducer's input will be "cat 1 1".
* The expected output is "cat 2".
* TODO: implement
*/
fail("Please implement test.");
}
/*
* Test the mapper and reducer working together.
*/
@Test
public void testMapReduce() {
/*
* For this test, the mapper's input will be "1 cat cat dog"
* The expected output (from the reducer) is "cat 2", "dog 1".
* TODO: implement
*/
fail("Please implement test.");
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment