Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mohammed Meftah
BDPA_Assign2_MMEFTAH
Commits
6c23b92d
Commit
6c23b92d
authored
Mar 18, 2017
by
cloudera_vm
Browse files
Preprocessingon the whole pg100.txt
parent
4b2f399e
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Assign2/Preprocessing_1/._SUCCESS.crc
0 → 100644
View file @
6c23b92d
File added
Assign2/Preprocessing_1/.part-r-00000.crc
0 → 100644
View file @
6c23b92d
File added
Assign2/Preprocessing_1/_SUCCESS
0 → 100644
View file @
6c23b92d
Assign2/Preprocessing_1/nb_output_records.txt
0 → 100644
View file @
6c23b92d
114969
\ No newline at end of file
Assign2/Preprocessing_1/part-r-00000
0 → 100644
View file @
6c23b92d
This diff is collapsed.
Click to expand it.
Assign2/hadoop.log
View file @
6c23b92d
This diff is collapsed.
Click to expand it.
Assign2/src/Preprocessing/Preprocessing_1.java
View file @
6c23b92d
...
...
@@ -20,6 +20,7 @@ import org.apache.hadoop.conf.Configured;
import
org.apache.hadoop.fs.FileSystem
;
import
org.apache.hadoop.fs.Path
;
import
org.apache.hadoop.io.LongWritable
;
import
org.apache.hadoop.io.NullWritable
;
import
org.apache.hadoop.io.Text
;
import
org.apache.hadoop.mapreduce.Job
;
import
org.apache.hadoop.mapreduce.Mapper
;
...
...
@@ -38,10 +39,7 @@ public class Preprocessing_1 extends Configured implements Tool {
public
static
void
main
(
String
[]
args
)
throws
Exception
{
System
.
out
.
println
(
Arrays
.
toString
(
args
));
int
res
=
ToolRunner
.
run
(
new
Configuration
(),
new
Preprocessing_1
(),
args
);
System
.
exit
(
res
);
}
...
...
@@ -50,7 +48,7 @@ public class Preprocessing_1 extends Configured implements Tool {
@Override
public
int
run
(
String
[]
args
)
throws
Exception
{
System
.
out
.
println
(
Arrays
.
toString
(
args
));
Job
job
=
new
Job
(
getConf
(),
"Preprocessing_1
_test (1)
"
);
Job
job
=
new
Job
(
getConf
(),
"Preprocessing_1"
);
job
.
setJarByClass
(
Preprocessing_1
.
class
);
job
.
setOutputKeyClass
(
LongWritable
.
class
);
job
.
setOutputValueClass
(
Text
.
class
);
...
...
@@ -77,7 +75,6 @@ public class Preprocessing_1 extends Configured implements Tool {
job
.
waitForCompletion
(
true
);
long
counter
=
job
.
getCounters
().
findCounter
(
COUNTS
.
COUNT_LINES
).
getValue
();
Path
countFile
=
new
Path
(
new
Path
(
args
[
1
]),
"nb_output_records.txt"
);
File
file
=
new
File
(
countFile
.
toString
());
...
...
@@ -88,11 +85,7 @@ public class Preprocessing_1 extends Configured implements Tool {
return
0
;
}
public
static
class
Map
extends
Mapper
<
LongWritable
,
Text
,
LongWritable
,
Text
>
{
private
Text
word
=
new
Text
();
...
...
@@ -116,7 +109,7 @@ public class Preprocessing_1 extends Configured implements Tool {
}
}
public
static
class
Reduce
extends
Reducer
<
LongWritable
,
Text
,
Long
Writable
,
Text
>
{
public
static
class
Reduce
extends
Reducer
<
LongWritable
,
Text
,
Null
Writable
,
Text
>
{
private
static
HashMap
<
String
,
Integer
>
word_freq
=
new
HashMap
<
String
,
Integer
>();
...
...
@@ -180,7 +173,7 @@ public class Preprocessing_1 extends Configured implements Tool {
if
(!
concat_words
.
toString
().
isEmpty
()){
context
.
getCounter
(
COUNTS
.
COUNT_LINES
).
increment
(
1
);
context
.
write
(
key
,
new
Text
(
concat_words
.
toString
()));
context
.
write
(
NullWritable
.
get
()
,
new
Text
(
concat_words
.
toString
()));
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment