clearing files for Qa

efdea1c5 · cloudera_vm · 1e3b38e5 · 1e3b38e5 · 1e3b38e5
Commit efdea1c5 authored 8 years ago by cloudera_vm
--- a/Assign2/bin/SetSimilarityJoins/Qa.class
+++ b/Assign2/bin/SetSimilarityJoins/Qa.class
--- a/Assign2/src/SetSimilarityJoins/Qa.java
+++ b/Assign2/src/SetSimilarityJoins/Qa.java
-package SetSimilarityJoins;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.Arrays;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-public class Qa extends Configured implements Tool {
-   public static void main(String[] args) throws Exception {
-      System.out.println(Arrays.toString(args));
-      int res = ToolRunner.run(new Configuration(), new Qa(), args);
-      System.exit(res);
-   }
-   @Override
-   public int run(String[] args) throws Exception {
-	   System.out.println(Arrays.toString(args));
-	      Job job = new Job(getConf(), "Qa_ALL_PAIRS");
-	      job.setJarByClass(Qa.class);
-	      job.setOutputKeyClass(Text.class);
-	      job.setOutputValueClass(IntWritable.class);
-	      job.setMapperClass(Map.class);
-	      job.setReducerClass(Reduce.class);
-	      job.setInputFormatClass(TextInputFormat.class);
-	      job.setOutputFormatClass(TextOutputFormat.class);
-	      Path outputFilePath = new Path(args[1]);
-	      FileInputFormat.addInputPath(job, new Path(args[0]));
-	      FileOutputFormat.setOutputPath(job, outputFilePath);
-	      FileSystem fs = FileSystem.newInstance(getConf());
-	      if (fs.exists(outputFilePath)) {
-				fs.delete(outputFilePath, true);
-			}
-	      job.waitForCompletion(true);
-	      return 0;
-	   }
-   public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
-      private final static IntWritable ONE = new IntWritable(1);
-      private Text word = new Text();
-      String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
-	  String stopwords = new String(Files.readAllBytes(Paths.get(stopwords_file)));
-      public Map() throws  IOException{
-    	 System.out.println(stopwords);
-      }
-      @Override
-      public void map(LongWritable key, Text value, Context context)
-              throws IOException, InterruptedException {
-        for (String token: value.toString().replaceAll("[^A-Za-z0-9 ]", " ").split("\\s+")) {
-          if (!stopwords.contains(token)) {
-                word.set(token);
-                context.write(word, ONE);
-                }
-         }
-      }
-   }
-   public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
-      @Override
-      public void reduce(Text key, Iterable<IntWritable> values, Context context)
-              throws IOException, InterruptedException {
-         int sum = 0;
-         for (IntWritable val : values) {
-            sum += val.get();
-         }
-         context.write(key, new IntWritable(sum));
-      }
-   }
-}
\ No newline at end of file