Step 1:Write the following program using any editor then save it.
you can use vi editor.
type vi WordCount.cpp into your command script then press i to write then write it. then press esc then wq to save and quite.
// WordCount.cpp
#include "algorithm"
#include "limits"
#include "string"
#include "stdint.h" // <--- to prevent uint64_t errors!
#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"
using namespace std;
class WordCountMapper : public HadoopPipes::Mapper {
public:
// constructor: does nothing
WordCountMapper( HadoopPipes::TaskContext& context ) {
}
// map function: receives a line, outputs (word,"1")
// to reducer.
void map( HadoopPipes::MapContext& context ) {
//--- get line of text ---
string line = context.getInputValue();
//--- split it into words ---
vector< string > words =
HadoopUtils::splitString( line, " " );
//--- emit each word tuple (word, "1" ) ---
for ( unsigned int i=0; i < words.size(); i++ ) {
context.emit( words[i], HadoopUtils::toString( 1 ) );
}
}
};
class WordCountReducer : public HadoopPipes::Reducer {
public:
// constructor: does nothing
WordCountReducer(HadoopPipes::TaskContext& context) {
}
// reduce function
void reduce( HadoopPipes::ReduceContext& context ) {
int count = 0;
//--- get all tuples with the same key, and count their numbers ---
while ( context.nextValue() ) {
count += HadoopUtils::toInt( context.getInputValue() );
}
//--- emit (word, count) ---
context.emit(context.getInputKey(), HadoopUtils::toString( count ));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<
WordCountMapper,
WordCountReducer >() );
}
Step 2:
compile the program using g++
following is the command to compile
g++ -I/opt/hadoop/c++/Linux-amd64-64/include -c wordcount.cpp
g++ wordcount.o -o wordcount -L/opt/hadoop/c++/Linux-amd64-64/lib -lnsl -lpthread -lhadooppipes -lhadooputils
it will create WordCount binary into your current folder.
Step 3:
Now pur this binary into hadoop hdfs by using following command:
hadoop fs -mkdir /user/test
hadoop fs -put WordCount /user/test/
Step 4:
Now make a input file for your test.
use the following command
vi input.txt
press i
write few lines
press esc then press wq.
Step 5:
keep your input inside the hadoop by using the following command:
hadoop fs -put input.txt /user/test/
Step 6:
Now run your program using following command:
hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -program /user/test/WordCount -input /user/test/input.txt -output /user/test/output
Hopefully it will get run now you can see your output in the output directory which we have given at the time of run /user/test/output.
to see the output type following command:
hadoop fs -text /user/test/output/part-00000
any query is welcome
good luck :)
you can use vi editor.
type vi WordCount.cpp into your command script then press i to write then write it. then press esc then wq to save and quite.
// WordCount.cpp
#include "algorithm"
#include "limits"
#include "string"
#include "stdint.h" // <--- to prevent uint64_t errors!
#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"
using namespace std;
class WordCountMapper : public HadoopPipes::Mapper {
public:
// constructor: does nothing
WordCountMapper( HadoopPipes::TaskContext& context ) {
}
// map function: receives a line, outputs (word,"1")
// to reducer.
void map( HadoopPipes::MapContext& context ) {
//--- get line of text ---
string line = context.getInputValue();
//--- split it into words ---
vector< string > words =
HadoopUtils::splitString( line, " " );
//--- emit each word tuple (word, "1" ) ---
for ( unsigned int i=0; i < words.size(); i++ ) {
context.emit( words[i], HadoopUtils::toString( 1 ) );
}
}
};
class WordCountReducer : public HadoopPipes::Reducer {
public:
// constructor: does nothing
WordCountReducer(HadoopPipes::TaskContext& context) {
}
// reduce function
void reduce( HadoopPipes::ReduceContext& context ) {
int count = 0;
//--- get all tuples with the same key, and count their numbers ---
while ( context.nextValue() ) {
count += HadoopUtils::toInt( context.getInputValue() );
}
//--- emit (word, count) ---
context.emit(context.getInputKey(), HadoopUtils::toString( count ));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<
WordCountMapper,
WordCountReducer >() );
}
Step 2:
compile the program using g++
following is the command to compile
g++ -I/opt/hadoop/c++/Linux-amd64-64/include -c wordcount.cpp
g++ wordcount.o -o wordcount -L/opt/hadoop/c++/Linux-amd64-64/lib -lnsl -lpthread -lhadooppipes -lhadooputils
it will create WordCount binary into your current folder.
Step 3:
Now pur this binary into hadoop hdfs by using following command:
hadoop fs -mkdir /user/test
hadoop fs -put WordCount /user/test/
Step 4:
Now make a input file for your test.
use the following command
vi input.txt
press i
write few lines
press esc then press wq.
Step 5:
keep your input inside the hadoop by using the following command:
hadoop fs -put input.txt /user/test/
Step 6:
Now run your program using following command:
hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -program /user/test/WordCount -input /user/test/input.txt -output /user/test/output
Hopefully it will get run now you can see your output in the output directory which we have given at the time of run /user/test/output.
to see the output type following command:
hadoop fs -text /user/test/output/part-00000
any query is welcome
good luck :)