Step 1:Write the following program using any editor then save it.
you can use vi editor.
type vi WordCount.cpp into your command script then press i to write then write it. then press esc then wq to save and quite.
// WordCount.cpp
#include "algorithm"
#include "limits"
#include "string"
#include "stdint.h" // <--- to prevent uint64_t errors!
#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"
using namespace std;
class WordCountMapper : public HadoopPipes::Mapper {
public:
// constructor: does nothing
WordCountMapper( HadoopPipes::TaskContext& context ) {
}
// map function: receives a line, outputs (word,"1")
// to reducer.
void map( HadoopPipes::MapContext& context ) {
//--- get line of text ---
string line = context.getInputValue();
//--- split it into words ---
vector< string > words =
HadoopUtils::splitString( line, " " );
//--- emit each word tuple (word, "1" ) ---
for ( unsigned int i=0; i < words.size(); i++ ) {
context.emit( words[i], HadoopUtils::toString( 1 ) );
}
}
};
class WordCountReducer : public HadoopPipes::Reducer {
public:
// constructor: does nothing
WordCountReducer(HadoopPipes::TaskContext& context) {
}
// reduce function
void reduce( HadoopPipes::ReduceContext& context ) {
int count = 0;
//--- get all tuples with the same key, and count their numbers ---
while ( context.nextValue() ) {
count += HadoopUtils::toInt( context.getInputValue() );
}
//--- emit (word, count) ---
context.emit(context.getInputKey(), HadoopUtils::toString( count ));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<
WordCountMapper,
WordCountReducer >() );
}
Step 2:
compile the program using g++
following is the command to compile
g++ -I/opt/hadoop/c++/Linux-amd64-64/include -c wordcount.cpp
g++ wordcount.o -o wordcount -L/opt/hadoop/c++/Linux-amd64-64/lib -lnsl -lpthread -lhadooppipes -lhadooputils
it will create WordCount binary into your current folder.
Step 3:
Now pur this binary into hadoop hdfs by using following command:
hadoop fs -mkdir /user/test
hadoop fs -put WordCount /user/test/
Step 4:
Now make a input file for your test.
use the following command
vi input.txt
press i
write few lines
press esc then press wq.
Step 5:
keep your input inside the hadoop by using the following command:
hadoop fs -put input.txt /user/test/
Step 6:
Now run your program using following command:
hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -program /user/test/WordCount -input /user/test/input.txt -output /user/test/output
Hopefully it will get run now you can see your output in the output directory which we have given at the time of run /user/test/output.
to see the output type following command:
hadoop fs -text /user/test/output/part-00000
any query is welcome
good luck :)
you can use vi editor.
type vi WordCount.cpp into your command script then press i to write then write it. then press esc then wq to save and quite.
// WordCount.cpp
#include "algorithm"
#include "limits"
#include "string"
#include "stdint.h" // <--- to prevent uint64_t errors!
#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"
using namespace std;
class WordCountMapper : public HadoopPipes::Mapper {
public:
// constructor: does nothing
WordCountMapper( HadoopPipes::TaskContext& context ) {
}
// map function: receives a line, outputs (word,"1")
// to reducer.
void map( HadoopPipes::MapContext& context ) {
//--- get line of text ---
string line = context.getInputValue();
//--- split it into words ---
vector< string > words =
HadoopUtils::splitString( line, " " );
//--- emit each word tuple (word, "1" ) ---
for ( unsigned int i=0; i < words.size(); i++ ) {
context.emit( words[i], HadoopUtils::toString( 1 ) );
}
}
};
class WordCountReducer : public HadoopPipes::Reducer {
public:
// constructor: does nothing
WordCountReducer(HadoopPipes::TaskContext& context) {
}
// reduce function
void reduce( HadoopPipes::ReduceContext& context ) {
int count = 0;
//--- get all tuples with the same key, and count their numbers ---
while ( context.nextValue() ) {
count += HadoopUtils::toInt( context.getInputValue() );
}
//--- emit (word, count) ---
context.emit(context.getInputKey(), HadoopUtils::toString( count ));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<
WordCountMapper,
WordCountReducer >() );
}
Step 2:
compile the program using g++
following is the command to compile
g++ -I/opt/hadoop/c++/Linux-amd64-64/include -c wordcount.cpp
g++ wordcount.o -o wordcount -L/opt/hadoop/c++/Linux-amd64-64/lib -lnsl -lpthread -lhadooppipes -lhadooputils
it will create WordCount binary into your current folder.
Step 3:
Now pur this binary into hadoop hdfs by using following command:
hadoop fs -mkdir /user/test
hadoop fs -put WordCount /user/test/
Step 4:
Now make a input file for your test.
use the following command
vi input.txt
press i
write few lines
press esc then press wq.
Step 5:
keep your input inside the hadoop by using the following command:
hadoop fs -put input.txt /user/test/
Step 6:
Now run your program using following command:
hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -program /user/test/WordCount -input /user/test/input.txt -output /user/test/output
Hopefully it will get run now you can see your output in the output directory which we have given at the time of run /user/test/output.
to see the output type following command:
hadoop fs -text /user/test/output/part-00000
any query is welcome
good luck :)
My Hadoop is working fine with Java but when I run any c++ example including the above one, I am getting following error:
ReplyDeletewordcount.cpp:8:27: error: hadoop/Pipes.hh: No such file or directory
wordcount.cpp:9:37: error: hadoop/TemplateFactory.hh: No such file or directory
wordcount.cpp:10:33: error: hadoop/StringUtils.hh: No such file or directory
wordcount.cpp:14: error: ‘HadoopPipes’ has not been declared
wordcount.cpp:14: error: expected ‘{’ before ‘Mapper’
wordcount.cpp:14: error: invalid type in declaration before ‘{’ token
wordcount.cpp:14: warning: extended initializer lists only available with -std=c++0x or -std=gnu++0x
wordcount.cpp:15: error: expected primary-expression before ‘public’
wordcount.cpp:15: error: expected ‘}’ before ‘public’
wordcount.cpp:15: error: expected ‘,’ or ‘;’ before ‘public’
wordcount.cpp:22: error: variable or field ‘map’ declared void
wordcount.cpp:22: error: ‘HadoopPipes’ has not been declared
wordcount.cpp:22: error: ‘context’ was not declared in this scope
Please help me. I have installed hadoop many times
Hi Gul
Deletesee the error shows that u r not including proper path.
here's the command please try with is by changing the paths for ur files and let you tell me what command u r firing for the compilation of ur program
rm -rf *.o *~ wordcount
g++ wordcount.cpp -I /usr/include/libxml2 -I /usr/src/hadoop-0.20.1+133/c++/Linux-i386-32/include -I /usr/src/hadoop-0.20/src/c++/librecordio/ -L/usr/src/hadoop-0.20.1+133/c++/Linux-i386-32/lib -lhadooppipes -lhadooputils -lpthread -lxml2 -o wordcount
wordcount.cpp:8:27: error: hadoop/Pipes.hh: No such file or directory
ReplyDeletewordcount.cpp:9:37: error: hadoop/TemplateFactory.hh: No such file or directory
wordcount.cpp:10:33: error: hadoop/StringUtils.hh: No such file or directory
wordcount.cpp:14: error: ‘HadoopPipes’ has not been declared
wordcount.cpp:14: error: expected ‘{’ before ‘Mapper’
wordcount.cpp:14: error: invalid type in declaration before ‘{’ token
wordcount.cpp:14: warning: extended initializer lists only available with -std=c++0x or -std=gnu++0x
wordcount.cpp:15: error: expected primary-expression before ‘public’
wordcount.cpp:15: error: expected ‘}’ before ‘public’
wordcount.cpp:15: error: expected ‘,’ or ‘;’ before ‘public’
wordcount.cpp:22: error: variable or field ‘map’ declared void
wordcount.cpp:22: error: ‘HadoopPipes’ has not been declared
wordcount.cpp:22: error: ‘context’ was not declared in this scope
I am getting this error even after verifying the path
I have the same problem
ReplyDeleteWorked for me!
DeleteHi Rakhi,
ReplyDeleteI use the following command in your step2 to generate the binary code and get an error:
hduser@localhost:/usr/local/hadoop$ g++ wordcount.o -o wordcount -L/opt/usr/local/hadoop/c++/Linux-i386-32/lib -lnsl -lpthread -lhadooppipes -lhadooputils
/usr/bin/ld: cannot find -lhadooppipes
/usr/bin/ld: cannot find -lhadooputils
collect2: ld returned 1 exit status
Sorry for late reply, try using below command once and let me know:
Deleteg++ -I/opt/hadoop/c++/Linux-i386-32/include -c wordcount.cpp
g++ wordcount.o -o wordcount -L/opt/hadoop/c++/Linux-i386-32/lib -lnsl -lpthread -lhadooppipes -lhadooputils
also please check once libhadooppipes.a and libhadooputils.a should be present in the path -L/opt/hadoop/c++/Linux-i386-32/lib
DeleteI'm getting that same error. I'm using a 64-bit linux and the .a files are in the correct directory
Deleteme too getting the same issue in 64bit machine
DeleteHi, thanks for the nice tutorial. I am on Mac OSX. I am trying to compile the code using the following command:
ReplyDeleteg++ -I/usr/lib/c++/v1 -c wordcount.cpp
As I have found my c++ header files inside /usr/lib/c++/v1 directory.
But I am getting the error:
fatal error: 'hadoop/Pipes.hh' file not found
so what should I do to make it work? Please let me know. Thanks in advance.
Hi,
Delete1. Check whether you have directory hadoop inside path" /usr/lib/c++/v1".
If not then create and then try it and please let me know.
2. Please check the command once.
g++ -I/opt/hadoop/c++/Linux-amd64-64/include -c wordcount.cpp
In this -I is Capital i.
Hi All,
ReplyDeleteHope you have typed the command correct.
g++ -I/opt/hadoop/c++/Linux-amd64-64/include -c wordcount.cpp
In this -I is Capital i.
Hi
ReplyDeleteI am getting an error on running the wordcount in hadoop. I tried to run the java word count also which worked.
Please help
13/11/28 10:58:52 INFO mapred.LocalJobRunner: Map task executor complete.
13/11/28 10:58:52 WARN output.FileOutputCommitter: Output Path is null in cleanupJob()
13/11/28 10:58:52 WARN mapred.LocalJobRunner: job_local1620942588_0001
java.lang.Exception: java.lang.NullPointerException
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:403)
Caused by: java.lang.NullPointerException
at org.apache.hadoop.mapred.pipes.Application.(Application.java:104)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:69)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:235)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334)
at java.util.concurrent.FutureTask.run(FutureTask.java:166)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603)
at java.lang.Thread.run(Thread.java:679)
13/11/28 10:58:52 INFO mapreduce.Job: Job job_local1620942588_0001 running in uber mode : false
13/11/28 10:58:52 INFO mapreduce.Job: map 0% reduce 0%
13/11/28 10:58:52 INFO mapreduce.Job: Job job_local1620942588_0001 failed with state FAILED due to: NA
13/11/28 10:58:52 INFO mapreduce.Job: Counters: 0
Exception in thread "main" java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:836)
at org.apache.hadoop.mapred.pipes.Submitter.runJob(Submitter.java:264)
at org.apache.hadoop.mapred.pipes.Submitter.run(Submitter.java:503)
at org.apache.hadoop.mapred.pipes.Submitter.main(Submitter.java:518)
Please let me know whcih version of hadoop to use. I am using 2.2.0
DeleteHi, I have the same proble with hadoop 2.2.0. It is not a problem of your code, it occurs before running the executable. Can you post your configuration files?
DeleteI have the same problem on running the wordcount. But I still don't know how to solve it... Do you have any ideas?
DeleteHadoop 2.2.0
Hi everybody ! did someone solve the problem ???
DeleteHi Rakhi,
ReplyDeleteI installed Apached Hadoop version 2.5.2 in my centos virtual box based on instructions pe http://alanxelsys.com/2014/02/01/hadoop-2-2-single-node-installation-on-centos-6-5/
I am able to run wordcount java application.I tried C++ Wordcount. My image is 32bit image.What I see is I dont have dir like /opt/hadoop/c++/Linux-amd64-64/include ( pasted in earlier post).
My HADOOP_HOME is /usr/local/hadoop. I see the headers in /usr/local/hadoop/include.. . I see the static libraries under /usr/local/hadoop/lib/native.
I changed the makefile accordingly, but still I get error. Could you please provide info on what hadoop you are running ? Am I missing any installation for pipes?
-----------------------------
$ ls -l /usr/local/hadoop/include
total 56
-rw-r--r--. 1 hadoopuser hadoopgroup 29267 Nov 14 18:53 hdfs.h
-rw-r--r--. 1 hadoopuser hadoopgroup 6330 Nov 14 18:53 Pipes.hh
-rw-r--r--. 1 hadoopuser hadoopgroup 4514 Nov 14 18:53 SerialUtils.hh
-rw-r--r--. 1 hadoopuser hadoopgroup 2441 Nov 14 18:53 StringUtils.hh
-rw-r--r--. 1 hadoopuser hadoopgroup 3319 Nov 14 18:53 TemplateFactory.hh
-----------
$ ls -l /usr/local/hadoop/lib/native
total 4556
-rw-r--r--. 1 hadoopuser hadoopgroup 1200400 Nov 14 18:53 libhadoop.a
-rw-r--r--. 1 hadoopuser hadoopgroup 1634592 Nov 14 18:53 libhadooppipes.a
lrwxrwxrwx. 1 hadoopuser hadoopgroup 18 Nov 21 21:16 libhadoop.so -> libhadoop.so.1.0.0
-rwxr-xr-x. 1 hadoopuser hadoopgroup 705117 Nov 14 18:53 libhadoop.so.1.0.0
-rw-r--r--. 1 hadoopuser hadoopgroup 476666 Nov 14 18:53 libhadooputils.a
-rw-r--r--. 1 hadoopuser hadoopgroup 379634 Nov 14 18:53 libhdfs.a
lrwxrwxrwx. 1 hadoopuser hadoopgroup 16 Nov 21 21:16 libhdfs.so -> libhdfs.so.0.0.0
-rwxr-xr-x. 1 hadoopuser hadoopgroup 252213 Nov 14 18:53 libhdfs.so.0.0.0
$ cat Makefile
CC = g++
HADOOP_INSTALL = /usr/local/hadoop
PLATFORM = Linux-i386-32
CPPFLAGS = -m32 -I$(HADOOP_INSTALL)/include
wordcount: wordcount.cpp
$(CC) $(CPPFLAGS) $< -Wall -L$(HADOOP_INSTALL)/lib/native/ -lhadooppipes \
-lhadooputils -lpthread -g -O2 -o $@
---------------- I run it ------------------
$ make wordcount
g++ -m32 -I/usr/local/hadoop/include wordcount.cpp -Wall -L/usr/local/hadoop/lib/native/ -lhadooppipes \
-lhadooputils -lpthread -g -O2 -o wordcount
/usr/bin/ld: skipping incompatible /usr/local/hadoop/lib/native//libhadooppipes.a when searching for -lhadooppipes
/usr/bin/ld: cannot find -lhadooppipes
collect2: ld returned 1 exit status
make: *** [wordcount] Error 1
The code I tweeked the include directory
Delete--------- Program -----------------
$ cat wordcount.cpp
#include
#include
#include
#include "stdint.h" // <--- to prevent uint64_t errors!
#include "Pipes.hh"
#include "TemplateFactory.hh"
#include "StringUtils.hh"
using namespace std;
class WordCountMapper : public HadoopPipes::Mapper {
public:
// constructor: does nothing
WordCountMapper( HadoopPipes::TaskContext& context ) {
}
// map function: receives a line, outputs (word,"1")
// to reducer.
void map( HadoopPipes::MapContext& context ) {
//--- get line of text ---
string line = context.getInputValue();
//--- split it into words ---
vector< string > words =
HadoopUtils::splitString( line, " " );
//--- emit each word tuple (word, "1" ) ---
for ( unsigned int i=0; i < words.size(); i++ ) {
context.emit( words[i], HadoopUtils::toString( 1 ) );
}
}
};
class WordCountReducer : public HadoopPipes::Reducer {
public:
// constructor: does nothing
WordCountReducer(HadoopPipes::TaskContext& context) {
}
// reduce function
void reduce( HadoopPipes::ReduceContext& context ) {
int count = 0;
//--- get all tuples with the same key, and count their numbers ---
while ( context.nextValue() ) {
count += HadoopUtils::toInt( context.getInputValue() );
}
//--- emit (word, count) ---
context.emit(context.getInputKey(), HadoopUtils::toString( count ));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<
WordCountMapper,
WordCountReducer >() );
}
I also tried your program, edited the include alone
ReplyDelete#include "Pipes.hh"
#include "TemplateFactory.hh"
#include "StringUtils.hh"
I could find the include headers only in /usr/local/hadoop/include and static libraries under /usr/local/hadoop/lib/native
$ g++ -I/usr/local/hadoop/include -c WordCount2.cpp
compiled and produced WordCount2.o
then
$ g++ WordCount2.o -o WordCount2 -L/usr/local/hadoop/lib/native -lnsl -lpthread -lhadooppipes -lhadooputils
/usr/bin/ld: skipping incompatible /usr/local/hadoop/lib/native/libhadooppipes.a when searching for -lhadooppipes
/usr/bin/ld: cannot find -lhadooppipes
collect2: ld returned 1 exit status
Really appreciate your response. Thanks in advance
I couldn't find c++ folder in /usr/local/hadoop folder. I am using 2.7.1 version. I downloaded it from http://www.us.apache.org/dist/hadoop/common/hadoop-2.7.1/
ReplyDeleteSo, I am getting the following error
wordcount.cpp:8:27: error: hadoop/Pipes.hh: No such file or directory
wordcount.cpp:9:37: error: hadoop/TemplateFactory.hh: No such file or directory
wordcount.cpp:10:33: error: hadoop/StringUtils.hh: No such file or directory
Any idea on how to solve it? Thanks in advance
This comment has been removed by the author.
ReplyDeleteHi,
ReplyDeleteCan I use other third-party libraries like igraph, GTL or boost?
TIA
hi sir,thanks for sharing the post on Map Reduce in hadoop it wa s easy to understand and good ,thanks for posting Hadoop Training in Velachery | Hadoop Training .
ReplyDelete