Skip to content
Snippets Groups Projects
Commit dda5b7d4 authored by cloudera's avatar cloudera
Browse files

Qa.i et Qa.ii, there is a problem with the combiner we dont retrieve the...

Qa.i et Qa.ii, there is a problem with the combiner we dont retrieve the number of stopwords... this is weird
parent 92d6d48a
Branches
No related tags found
No related merge requests found
are 7566
as 29531
into 4263
one 9926
our 4088
they 14089
all 11971
at 18706
of 119701
that 54938
your 11160
his 34380
out 7891
when 8507
who 4428
with 34665
could 6614
he 44747
i 72836
in 69179
or 8610
were 9682
would 9567
from 9433
has 5077
its 4523
other 4002
so 13588
some 4462
there 8909
what 5911
a 99209
an 8104
and 167100
for 36130
if 7613
it. 4557
not 32386
said 5039
see 4020
to 114272
any 5067
him 8425
thou 5138
about
be
by
her
up
you
been
down
got
have
is
me
my
she
their
them
this
upon
we
will
but
do
had
it
like
no
on
the
then
thy
was
which
are
as
into
one
our
they
all
at
of
that
your
his
out
when
who
with
could
he
i
in
or
were
would
from
has
its
other
so
some
there
what
a
an
and
for
if
it.
not
said
see
to
any
him
thou
about
be
by
her
up
you
been
down
got
have
is
me
my
she
their
them
this
upon
we
will
but
do
had
it
like
no
on
the
then
thy
was
which
are
as
into
one
our
they
all
at
of
that
your
his
out
when
who
with
could
he
i
in
or
were
would
from
has
its
other
so
some
there
what
a
an
and
for
if
it.
not
said
see
to
any
him
thou
about 6121
be 27239
by 15659
her 21272
up 7138
you 35121
202317
been 6037
down 4100
got 4047
have 23991
is 37433
me 11514
my 22712
she 19935
their 6428
them 5581
this 18019
upon 4160
we 13668
will 11037
but 30329
do 5427
had 24180
it 50587
like 4217
no 7952
on 12904
the 208421
then 6525
thy 4028
was 49624
which 7409
are 7566
as 29531
into 4263
one 9926
our 4088
they 14089
all 11971
at 18706
of 119701
that 54938
your 11160
his 34380
out 7891
when 8507
who 4428
with 34665
could 6614
he 44747
i 72836
in 69179
or 8610
were 9682
would 9567
from 9433
has 5077
its 4523
other 4002
so 13588
some 4462
there 8909
what 5911
a 99209
an 8104
and 167100
for 36130
if 7613
it. 4557
not 32386
said 5039
see 4020
to 114272
any 5067
him 8425
thou 5138
package Question1;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Stopword_i extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new Stopword_i(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
Job job = new Job(getConf(), "Stopword_i");
job.setJarByClass(Stopword_i.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(10);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileInputFormat.addInputPath(job, new Path(args[2]));
FileOutputFormat.setOutputPath(job, new Path(args[3]));
job.waitForCompletion(true);
return 0;
}
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private Text word = new Text();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String token: value.toString().split("\\s+")) {
word.set(token.toLowerCase());
context.write(word, ONE);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
if (sum > 4000) {
context.write(key, new IntWritable(sum));
}
}
}
}
package Question1;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Stopword_ii extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new Stopword_ii(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
Job job = new Job(getConf(), "Stopword_ii");
job.setJarByClass(Stopword_ii.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setCombinerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(10);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileInputFormat.addInputPath(job, new Path(args[2]));
FileOutputFormat.setOutputPath(job, new Path(args[3]));
job.waitForCompletion(true);
return 0;
}
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private Text word = new Text();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String token: value.toString().split("\\s+")) {
word.set(token.toLowerCase());
context.write(word, ONE);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
if (sum > 4000) {
context.write(key, new IntWritable(sum));
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment