Skip to content
Snippets Groups Projects
Commit 6c23b92d authored by cloudera_vm's avatar cloudera_vm
Browse files

Preprocessingon the whole pg100.txt

parent 4b2f399e
No related branches found
No related tags found
No related merge requests found
File added
File added
114969
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
...@@ -20,6 +20,7 @@ import org.apache.hadoop.conf.Configured; ...@@ -20,6 +20,7 @@ import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Mapper;
...@@ -38,10 +39,7 @@ public class Preprocessing_1 extends Configured implements Tool { ...@@ -38,10 +39,7 @@ public class Preprocessing_1 extends Configured implements Tool {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args)); System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new Preprocessing_1(), args); int res = ToolRunner.run(new Configuration(), new Preprocessing_1(), args);
System.exit(res); System.exit(res);
} }
...@@ -50,7 +48,7 @@ public class Preprocessing_1 extends Configured implements Tool { ...@@ -50,7 +48,7 @@ public class Preprocessing_1 extends Configured implements Tool {
@Override @Override
public int run(String[] args) throws Exception { public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args)); System.out.println(Arrays.toString(args));
Job job = new Job(getConf(), "Preprocessing_1_test (1)"); Job job = new Job(getConf(), "Preprocessing_1");
job.setJarByClass(Preprocessing_1.class); job.setJarByClass(Preprocessing_1.class);
job.setOutputKeyClass(LongWritable.class); job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class); job.setOutputValueClass(Text.class);
...@@ -77,7 +75,6 @@ public class Preprocessing_1 extends Configured implements Tool { ...@@ -77,7 +75,6 @@ public class Preprocessing_1 extends Configured implements Tool {
job.waitForCompletion(true); job.waitForCompletion(true);
long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue(); long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue();
Path countFile = new Path(new Path(args[1]),"nb_output_records.txt"); Path countFile = new Path(new Path(args[1]),"nb_output_records.txt");
File file = new File(countFile.toString()); File file = new File(countFile.toString());
...@@ -89,10 +86,6 @@ public class Preprocessing_1 extends Configured implements Tool { ...@@ -89,10 +86,6 @@ public class Preprocessing_1 extends Configured implements Tool {
return 0; return 0;
} }
public static class Map extends Mapper<LongWritable, Text, LongWritable, Text> { public static class Map extends Mapper<LongWritable, Text, LongWritable, Text> {
private Text word = new Text(); private Text word = new Text();
...@@ -116,7 +109,7 @@ public class Preprocessing_1 extends Configured implements Tool { ...@@ -116,7 +109,7 @@ public class Preprocessing_1 extends Configured implements Tool {
} }
} }
public static class Reduce extends Reducer<LongWritable, Text, LongWritable, Text> { public static class Reduce extends Reducer<LongWritable, Text, NullWritable, Text> {
private static HashMap<String,Integer> word_freq = new HashMap<String,Integer>(); private static HashMap<String,Integer> word_freq = new HashMap<String,Integer>();
...@@ -180,7 +173,7 @@ public class Preprocessing_1 extends Configured implements Tool { ...@@ -180,7 +173,7 @@ public class Preprocessing_1 extends Configured implements Tool {
if(!concat_words.toString().isEmpty()){ if(!concat_words.toString().isEmpty()){
context.getCounter(COUNTS.COUNT_LINES).increment(1); context.getCounter(COUNTS.COUNT_LINES).increment(1);
context.write(key, new Text(concat_words.toString())); context.write(NullWritable.get(), new Text(concat_words.toString()));
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment