Differences between revisions 1 and 2
Revision 1 as of 2008-04-24 02:16:07
Size: 2101
Editor: udanax
Comment:
Revision 2 as of 2009-09-20 23:54:29
Size: 2101
Editor: localhost
Comment: converted to 1.6 markup
Deletions are marked like this. Additions are marked like this.
Line 1: Line 1:
[[TableOfContents]] <<TableOfContents>>

C/C++ MapReduce Code & build

This is the WordCount example using C/C++.

   1 #include "hadoop/Pipes.hh"
   2 #include "hadoop/TemplateFactory.hh"
   3 #include "hadoop/StringUtils.hh"
   4 
   5 class WordCountMap: public HadoopPipes::Mapper {
   6 public:
   7   WordCountMap(HadoopPipes::TaskContext& context){}
   8   void map(HadoopPipes::MapContext& context) {
   9     std::vector<std::string> words =
  10       HadoopUtils::splitString(context.getInputValue(), " ");
  11     for(unsigned int i=0; i < words.size(); ++i) {
  12       context.emit(words[i], "1");
  13     }
  14   }
  15 };
  16 
  17 class WordCountReduce: public HadoopPipes::Reducer {
  18 public:
  19   WordCountReduce(HadoopPipes::TaskContext& context){}
  20   void reduce(HadoopPipes::ReduceContext& context) {
  21     int sum = 0;
  22     while (context.nextValue()) {
  23       sum += HadoopUtils::toInt(context.getInputValue());
  24     }
  25     context.emit(context.getInputKey(), HadoopUtils::toString(sum));
  26   }
  27 };
  28 
  29 int main(int argc, char *argv[]) {
  30   return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap,
  31                               WordCountReduce>());
  32 }

To compile the example, build the Hadoop code and the C/C++ word count example:

# ant -Dcompile.c++=yes examples

Upload C++ binary files to HDFS

To upload the binary files to HDFS, the command syntax is:

# bin/hadoop fs -put build/c++-examples/Linux-i386-32/bin /examples/bin

Set the MapReduce Config

# vi src/examples/pipes/conf/word.xml

<?xml version="1.0"?>
<configuration>
  <property>
    // Set the binary path on DFS
    <name>hadoop.pipes.executable</name>
    <value>/examples/bin/wordcount</value>
  </property>
  <property>
    <name>hadoop.pipes.java.recordreader</name>
    <value>true</value>
  </property>
  <property>
    <name>hadoop.pipes.java.recordwriter</name>
    <value>true</value>
  </property>
</configuration>

Execute

To run the example, the command syntax is:

# bin/hadoop pipes -conf src/examples/pipes/conf/word.xml -input in-dir -output out-dir

C++WordCount (last edited 2009-09-20 23:54:29 by localhost)