Commit 6c23b92d authored by cloudera_vm's avatar cloudera_vm
Browse files

Preprocessingon the whole pg100.txt

parent 4b2f399e
114969
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -20,6 +20,7 @@ import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
......@@ -38,10 +39,7 @@ public class Preprocessing_1 extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new Preprocessing_1(), args);
System.exit(res);
}
......@@ -50,7 +48,7 @@ public class Preprocessing_1 extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
Job job = new Job(getConf(), "Preprocessing_1_test (1)");
Job job = new Job(getConf(), "Preprocessing_1");
job.setJarByClass(Preprocessing_1.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
......@@ -77,7 +75,6 @@ public class Preprocessing_1 extends Configured implements Tool {
job.waitForCompletion(true);
long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue();
Path countFile = new Path(new Path(args[1]),"nb_output_records.txt");
File file = new File(countFile.toString());
......@@ -88,11 +85,7 @@ public class Preprocessing_1 extends Configured implements Tool {
return 0;
}
public static class Map extends Mapper<LongWritable, Text, LongWritable, Text> {
private Text word = new Text();
......@@ -116,7 +109,7 @@ public class Preprocessing_1 extends Configured implements Tool {
}
}
public static class Reduce extends Reducer<LongWritable, Text, LongWritable, Text> {
public static class Reduce extends Reducer<LongWritable, Text, NullWritable, Text> {
private static HashMap<String,Integer> word_freq = new HashMap<String,Integer>();
......@@ -180,7 +173,7 @@ public class Preprocessing_1 extends Configured implements Tool {
if(!concat_words.toString().isEmpty()){
context.getCounter(COUNTS.COUNT_LINES).increment(1);
context.write(key, new Text(concat_words.toString()));
context.write(NullWritable.get(), new Text(concat_words.toString()));
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment