diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..397b4a7624e35fa60563a9c03b1213d93f7b6546
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.log
diff --git a/Assign2/Preprocessing_1_test/._SUCCESS.crc b/Assign2/Preprocessing_1_test/._SUCCESS.crc
deleted file mode 100644
index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000
Binary files a/Assign2/Preprocessing_1_test/._SUCCESS.crc and /dev/null differ
diff --git a/Assign2/Preprocessing_1_test/.part-r-00000.crc b/Assign2/Preprocessing_1_test/.part-r-00000.crc
deleted file mode 100644
index a82a4326b15ab48d1271a57d45ab704cfe992207..0000000000000000000000000000000000000000
Binary files a/Assign2/Preprocessing_1_test/.part-r-00000.crc and /dev/null differ
diff --git a/Assign2/Preprocessing_1_test/_SUCCESS b/Assign2/Preprocessing_1_test/_SUCCESS
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/Assign2/Preprocessing_1_test/nb_output_records.txt b/Assign2/Preprocessing_1_test/nb_output_records.txt
deleted file mode 100644
index bf0d87ab1b2b0ec1a11a3973d2845b42413d9767..0000000000000000000000000000000000000000
--- a/Assign2/Preprocessing_1_test/nb_output_records.txt
+++ /dev/null
@@ -1 +0,0 @@
-4
\ No newline at end of file
diff --git a/Assign2/Preprocessing_1_test/part-r-00000 b/Assign2/Preprocessing_1_test/part-r-00000
deleted file mode 100644
index 96f5477bcdb5d053e2900575a5e1ff51e2c1ebd1..0000000000000000000000000000000000000000
--- a/Assign2/Preprocessing_1_test/part-r-00000
+++ /dev/null
@@ -1,4 +0,0 @@
-0,anyone anywhere ebook cost use
-78,restrictions whatsoever copy almost away give may
-149,included license terms under re gutenberg project use
-218,online www org ebook gutenberg
diff --git a/Assign2/src/SetSimilarityJoins/Qa_ALL_PAIRS.java b/Assign2/src/SetSimilarityJoins/Qa_ALL_PAIRS.java
deleted file mode 100644
index 746cc760460e031fb57dc1d9b96051af7235ec53..0000000000000000000000000000000000000000
--- a/Assign2/src/SetSimilarityJoins/Qa_ALL_PAIRS.java
+++ /dev/null
@@ -1,181 +0,0 @@
-package SetSimilarityJoins;
-
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.google.common.collect.Sets;
-
-
-
-public class Qa_ALL_PAIRS extends Configured implements Tool {
-
-   public static void main(String[] args) throws Exception {
-	   
-      System.out.println(Arrays.toString(args));
-      int res = ToolRunner.run(new Configuration(), new Qa_ALL_PAIRS(), args);
-      System.exit(res);
-   }
-   
-   public static enum COUNTS {COUNT_COMP};
-
-   @Override
-   public int run(String[] args) throws Exception {
-      System.out.println(Arrays.toString(args));
-      Job job = new Job(getConf(), "Qa_ALL_PAIRS");
-      job.setJarByClass(Qa_ALL_PAIRS.class);
-      job.setOutputKeyClass(Text.class);
-      job.setOutputValueClass(Text.class);
-
-      job.setMapperClass(Map.class);
-      job.setReducerClass(Reduce.class);
-     
-
-      job.setInputFormatClass(TextInputFormat.class);
-      job.setOutputFormatClass(TextOutputFormat.class);
-     
-      job.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");
-
-      job.getConfiguration().set("mapreduce.input.keyvaluelinerecordreader.key.value.separator",",");
-
-  
-      Path outputFilePath = new Path(args[1]);
-      
-      FileInputFormat.addInputPath(job, new Path(args[0]));
-      FileOutputFormat.setOutputPath(job, outputFilePath);
-      
-      FileSystem fs = FileSystem.newInstance(getConf());
-
-      if (fs.exists(outputFilePath)) {
-			fs.delete(outputFilePath, true);
-		}
-
-      job.waitForCompletion(true);
-
-      long counter = job.getCounters().findCounter(COUNTS.COUNT_COMP).getValue();
-      Path countFile = new Path(new Path(args[1]),"nb_comp");
-      File file = new File(countFile.toString());
-      FileWriter fileWriter = new FileWriter(file);
-      fileWriter.write(String.valueOf(counter));
-      fileWriter.flush();
-      fileWriter.close();
-      
-      return 0;
-   }
-
-   public static class Map extends Mapper<LongWritable, Text, Text, Text> {
-
-      String doc_path = "/home/cloudera/workspace/bpa/Assign2/Preprocessing_1/processed_doc";
-	  String doc = new String(Files.readAllBytes(Paths.get(doc_path)));
-	  HashMap<String, String> id_doc = new HashMap<String, String>();
-      
-      public Map() throws  IOException{
-    	  for (String line : doc.split("\n")){
-   
-    		  id_doc.put(line.split(",")[0], 
-    				  line.split(",")[1]);
-    		  /*
-    		  
-    		  */
-    	  }
-      }
-      @Override
-      public void map(LongWritable key, Text value, Context context)
-              throws IOException, InterruptedException {
-    	  
-
-    		 int id_current_doc = Integer.valueOf(
-    				 value.toString().split(",")[0]);
-             
-    	 
-             for (String other_doc : id_doc.keySet()) {
-    		 
-            	 int id_other_doc = Integer.valueOf(other_doc);
-    		 
-	    		 if (id_current_doc < id_other_doc){
-	    			 StringBuilder pair = new StringBuilder();
-	                 pair.append(id_current_doc);
-	                 pair.append("--");
-	                 pair.append(id_other_doc);  
-	                 context.write(new Text(pair.toString()), 
-	                		 new Text(
-	                				 value.toString().split(",")[1].toLowerCase()));
-    		 }
-    	}
-      }
-   }
-
-   public static class Reduce extends Reducer<Text, Text, Text, DoubleWritable> {
-	      
-	   
-	   String doc_path = "/home/cloudera/workspace/bpa/Assign2/Preprocessing_1/processed_doc";
-		  String doc = new String(Files.readAllBytes(Paths.get(doc_path)));
-		  HashMap<String, String> id_doc = new HashMap<String, String>();
-	      
-	      public Reduce() throws  IOException{
-	    	  for (String line : doc.split("\n")){
-	    		  id_doc.put(line.split(",")[0], 
-	    				  line.split(",")[1]);
-	    	  }
-	      }
-	   
-	   public static double Jaccard(String[] A, String[] B){
-		   Set<String> A_set = new HashSet<String>(Arrays.asList(A)); 
-		   Set<String> B_set = new HashSet<String>(Arrays.asList(B)); 
-		   Set<String> union = Sets.union(A_set, B_set);
-		   Set<String> intersection = Sets.intersection(A_set, B_set);
-	       return (double)intersection.size()/(double)union.size();
-	   }
-	   
-	  @Override
-      public void reduce(Text key, Iterable<Text> values, Context context)
-              throws IOException, InterruptedException {
-
-         String[] ids = key.toString().split("--");
-         String content_1 = id_doc.get(ids[0]).toLowerCase();
-         String content_2 = id_doc.get(ids[1]).toLowerCase();
-         
-         
-         double jaccsim = Jaccard(content_1.split(" "),
-        		content_2.split(" "));
-         if (jaccsim >=0.8){
-        		 context.write(key,new DoubleWritable(jaccsim));
-         }
-         context.getCounter(COUNTS.COUNT_COMP).increment(1);
-       } 
-       }
-   
-}
-
diff --git a/Assign2/src/SetSimilarityJoins/Qb_invert_index.java b/Assign2/src/SetSimilarityJoins/Qb_invert_index.java
deleted file mode 100644
index cc64b91545486ae52d2d4c96c9ed6bee68bff894..0000000000000000000000000000000000000000
--- a/Assign2/src/SetSimilarityJoins/Qb_invert_index.java
+++ /dev/null
@@ -1,179 +0,0 @@
-package SetSimilarityJoins;
-
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.google.common.collect.Sets;
-
-
-
-public class Qb_invert_index extends Configured implements Tool {
-
-   public static void main(String[] args) throws Exception {
-	   
-      System.out.println(Arrays.toString(args));
-      int res = ToolRunner.run(new Configuration(), new Qb_invert_index(), args);
-      System.exit(res);
-   }
-   
-   public static enum COUNTS {COUNT_COMP};
-
-   @Override
-   public int run(String[] args) throws Exception {
-      System.out.println(Arrays.toString(args));
-      Job job = new Job(getConf(), "Qb_Efficient_1000");
-      job.setJarByClass(Qb_invert_index.class);
-      job.setOutputKeyClass(Text.class);
-      job.setOutputValueClass(Text.class);
-
-      job.setMapperClass(Map.class);
-      job.setReducerClass(Reduce.class);
-     
-
-      job.setInputFormatClass(TextInputFormat.class);
-      job.setOutputFormatClass(TextOutputFormat.class);
-      
-      job.getConfiguration().set("mapreduce.input.keyvaluelinerecordreader.key.value.separator",",");
-
-  
-      Path outputFilePath = new Path(args[1]);
-      
-      FileInputFormat.addInputPath(job, new Path(args[0]));
-      FileOutputFormat.setOutputPath(job, outputFilePath);
-      
-      FileSystem fs = FileSystem.newInstance(getConf());
-
-      if (fs.exists(outputFilePath)) {
-			fs.delete(outputFilePath, true);
-		}
-
-      job.waitForCompletion(true);
-
-      long counter = job.getCounters().findCounter(COUNTS.COUNT_COMP).getValue();
-      Path countFile = new Path(new Path(args[1]),"nb_comp");
-      File file = new File(countFile.toString());
-      FileWriter fileWriter = new FileWriter(file);
-      fileWriter.write(String.valueOf(counter));
-      fileWriter.flush();
-      fileWriter.close();
-      
-      return 0;
-   }
-
-   public static class Map extends Mapper<LongWritable, Text, Text, Text> {
-
-      String doc_path = "/home/cloudera/workspace/bpa/Assign2/Preprocessing_1/processed_doc_1000";
-	  String doc = new String(Files.readAllBytes(Paths.get(doc_path)));
-	  HashMap<String, String> id_doc = new HashMap<String, String>();
-      
-      public Map() throws  IOException{
-    	  for (String line : doc.split("\n")){
-   
-    		  id_doc.put(line.split(",")[0], 
-    				  line.split(",")[1]);
-    		  /*
-    		  
-    		  */
-    	  }
-      }
-      @Override
-      public void map(LongWritable key, Text value, Context context)
-              throws IOException, InterruptedException {
-    	  
-
-    		 int id_current_doc = Integer.valueOf(
-    				 value.toString().split(",")[0]);
-             
-    	 
-             for (String other_doc : id_doc.keySet()) {
-    		 
-            	 int id_other_doc = Integer.valueOf(other_doc);
-    		 
-	    		 if (id_current_doc < id_other_doc){
-	    			 StringBuilder pair = new StringBuilder();
-	                 pair.append(id_current_doc);
-	                 pair.append("--");
-	                 pair.append(id_other_doc);  
-	                 context.write(new Text(pair.toString()), 
-	                		 new Text(
-	                				 value.toString().split(",")[1].toLowerCase()));
-    		 }
-    	}
-      }
-   }
-
-   public static class Reduce extends Reducer<Text, Text, Text, DoubleWritable> {
-	      
-	   
-	   String doc_path = "/home/cloudera/workspace/bpa/Assign2/Preprocessing_1/processed_doc_1000";
-		  String doc = new String(Files.readAllBytes(Paths.get(doc_path)));
-		  HashMap<String, String> id_doc = new HashMap<String, String>();
-	      
-	      public Reduce() throws  IOException{
-	    	  for (String line : doc.split("\n")){
-	    		  id_doc.put(line.split(",")[0], 
-	    				  line.split(",")[1]);
-	    	  }
-	      }
-	   
-	   public static double Jaccard(String[] A, String[] B){
-		   Set<String> A_set = new HashSet<String>(Arrays.asList(A)); 
-		   Set<String> B_set = new HashSet<String>(Arrays.asList(B)); 
-		   Set<String> union = Sets.union(A_set, B_set);
-		   Set<String> intersection = Sets.intersection(A_set, B_set);
-	       return (double)intersection.size()/(double)union.size();
-	   }
-	   
-	  @Override
-      public void reduce(Text key, Iterable<Text> values, Context context)
-              throws IOException, InterruptedException {
-
-         String[] ids = key.toString().split("--");
-         String content_1 = id_doc.get(ids[0]).toLowerCase();
-         String content_2 = id_doc.get(ids[1]).toLowerCase();
-         
-         
-         double jaccsim = Jaccard(content_1.split(" "),
-        		content_2.split(" "));
-         if (jaccsim >=0.8){
-        		 context.write(key,new DoubleWritable(jaccsim));
-         }
-         context.getCounter(COUNTS.COUNT_COMP).increment(1);
-       } 
-       }
-   
-}
-
diff --git a/Assign2/stopwords.csv b/Assign2/stopwords.csv
deleted file mode 100644
index ba2294be7a1f2f8e5e25c4c581457e93f2b39a46..0000000000000000000000000000000000000000
--- a/Assign2/stopwords.csv
+++ /dev/null
@@ -1,118 +0,0 @@
-about
-be
-before
-by
-her
-mr.
-much
-old
-up
-where
-you
-after
-been
-come
-down
-get
-got
-have
-is
-me
-my
-now
-only
-she
-their
-them
-this
-upon
-we
-will
-but
-do
-great
-had
-it
-like
-most
-no
-on
-take
-the
-then
-thy
-time
-was
-which
-are
-as
-first
-into
-one
-our
-shall
-they
-think
-us
-all
-at
-of
-that
-your
-go
-his
-how
-make
-never
-out
-very
-when
-who
-with
-could
-did
-every
-good
-he
-i
-in
-it,
-let
-more
-or
-such
-were
-would
-am
-can
-from
-has
-its
-little
-man
-other
-so
-some
-there
-two
-what
-a
-an
-and
-for
-if
-it.
-know
-made
-may
-must
-not
-said
-see
-should
-than
-these
-to
-any
-him
-thou