diff --git a/Assign2/bin/SetSimilarityJoins/Qa.class b/Assign2/bin/SetSimilarityJoins/Qa.class deleted file mode 100644 index 2253ae7c5baf77a777bf125943765ca96db84654..0000000000000000000000000000000000000000 Binary files a/Assign2/bin/SetSimilarityJoins/Qa.class and /dev/null differ diff --git a/Assign2/src/SetSimilarityJoins/Qa.java b/Assign2/src/SetSimilarityJoins/Qa.java deleted file mode 100644 index 1215f84a788d94034f2fee7569fcffc23d2d279c..0000000000000000000000000000000000000000 --- a/Assign2/src/SetSimilarityJoins/Qa.java +++ /dev/null @@ -1,100 +0,0 @@ -package SetSimilarityJoins; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.Arrays; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; - -public class Qa extends Configured implements Tool { - public static void main(String[] args) throws Exception { - System.out.println(Arrays.toString(args)); - int res = ToolRunner.run(new Configuration(), new Qa(), args); - - System.exit(res); - } - - @Override - public int run(String[] args) throws Exception { - System.out.println(Arrays.toString(args)); - Job job = new Job(getConf(), "Qa_ALL_PAIRS"); - job.setJarByClass(Qa.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(IntWritable.class); - - job.setMapperClass(Map.class); - job.setReducerClass(Reduce.class); - - - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - - - - Path outputFilePath = new Path(args[1]); - - FileInputFormat.addInputPath(job, new Path(args[0])); - FileOutputFormat.setOutputPath(job, outputFilePath); - - FileSystem fs = FileSystem.newInstance(getConf()); - - if (fs.exists(outputFilePath)) { - fs.delete(outputFilePath, true); - } - - job.waitForCompletion(true); - - return 0; - } - - public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { - private final static IntWritable ONE = new IntWritable(1); - private Text word = new Text(); - String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords"; - String stopwords = new String(Files.readAllBytes(Paths.get(stopwords_file))); - - public Map() throws IOException{ - System.out.println(stopwords); - } - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - - for (String token: value.toString().replaceAll("[^A-Za-z0-9 ]", " ").split("\\s+")) { - if (!stopwords.contains(token)) { - word.set(token); - context.write(word, ONE); - } - } - } - } - - public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { - @Override - public void reduce(Text key, Iterable<IntWritable> values, Context context) - throws IOException, InterruptedException { - int sum = 0; - for (IntWritable val : values) { - sum += val.get(); - } - context.write(key, new IntWritable(sum)); - - } - } -} \ No newline at end of file