Skip to content
Snippets Groups Projects
Commit 722dcc48 authored by cloudera_vm's avatar cloudera_vm
Browse files

Q1.iii with BZip2 compression and 10 reducers, Snappy or Gzip are not working...

parent 82e382db
No related branches found
No related tags found
No related merge requests found
his 34380
out 7891
when 8507
who 4428
with 34665
could 6614
he 44747
i 72836
in 69179
or 8610
were 9682
would 9567
from 9433
has 5077
its 4523
other 4002
so 13588
some 4462
there 8909
what 5911
a 99209
an 8104
and 167100
for 36130
if 7613
it. 4557
not 32386
said 5039
see 4020
to 114272
any 5067
him 8425
thou 5138
about
be
by
her
up
you
been
down
got
have
is
me
my
she
their
them
this
upon
we
will
but
do
had
it
like
no
on
the
then
thy
was
which
are
as
into
one
our
they
all
at
of
that
your
his
out
when
who
with
could
he
i
in
or
were
would
from
has
its
other
so
some
there
what
a
an
and
for
if
it.
not
said
see
to
any
him
thou
about
be
by
her
up
you
been
down
got
have
is
me
my
she
their
them
this
upon
we
will
but
do
had
it
like
no
on
the
then
thy
was
which
are
as
into
one
our
they
all
at
of
that
your
his
out
when
who
with
could
he
i
in
or
were
would
from
has
its
other
so
some
there
what
a
an
and
for
if
it.
not
said
see
to
any
him
thou
package Question1;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Stopword_iii extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new Stopword_iii(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
Job job = new Job(getConf(), "Stopword_iii");
job.setJarByClass(Stopword_iii.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setCombinerClass(Reduce.class);
job.getConfiguration().setBoolean("mapred.compress.map.output",
true);
job.getConfiguration().setClass("mapred.map.output.compression.codec",
BZip2Codec.class, CompressionCodec.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(10);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileInputFormat.addInputPath(job, new Path(args[2]));
FileOutputFormat.setOutputPath(job, new Path(args[3]));
job.waitForCompletion(true);
return 0;
}
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable ONE = new IntWritable(1);
private Text word = new Text();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String token: value.toString().split("\\s+")) {
word.set(token.toLowerCase());
context.write(word, ONE);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
if (sum > 4000) {
context.write(key, new IntWritable(sum));
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment