Skip to content
Snippets Groups Projects
Commit d47d6c7c authored by cloudera_vm's avatar cloudera_vm
Browse files

Taking into acount capital letters after Julien's advice

parent 6c23b92d
No related branches found
No related tags found
No related merge requests found
114969
\ No newline at end of file
No preview for this file type
This diff is collapsed.
This diff is collapsed.
......@@ -58,8 +58,7 @@ public class Preprocessing_1 extends Configured implements Tool {
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.getConfiguration().set("mapreduce.output.textoutputformat.separator", ",");
job.setOutputFormatClass(TextOutputFormat.class);;
Path outputFilePath = new Path(args[1]);
......@@ -76,7 +75,7 @@ public class Preprocessing_1 extends Configured implements Tool {
job.waitForCompletion(true);
long counter = job.getCounters().findCounter(COUNTS.COUNT_LINES).getValue();
Path countFile = new Path(new Path(args[1]),"nb_output_records.txt");
Path countFile = new Path(new Path(args[1]),"nb_output_records");
File file = new File(countFile.toString());
FileWriter fileWriter = new FileWriter(file);
fileWriter.write(String.valueOf(counter));
......
......@@ -67,18 +67,19 @@ public class WordCount extends Configured implements Tool {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private Text word = new Text();
private String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
String stopwords_file = "/home/cloudera/workspace/bpa/Assign2/stopwords/stopwords";
String stopwords = new String(Files.readAllBytes(Paths.get(stopwords_file)));
public Map() throws IOException{
System.out.println(stopwords);
}
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String stopwords = new String(Files.readAllBytes(
Paths.get(stopwords_file)));
for (String token: value.toString().replaceAll("[^a-zA-Z0-9 ]", " ").split("\\s+")) {
if (!stopwords.contains(token.toLowerCase())) {
word.set(token.toLowerCase());
if (!stopwords.contains(token)) {
word.set(token);
context.write(word, ONE);
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment