Differences between revisions 1 and 2
Revision 1 as of 2007-03-13 22:53:37
Size: 2514
Comment:
Revision 2 as of 2009-09-20 23:10:15
Size: 2514
Editor: localhost
Comment: converted to 1.6 markup
No differences found!

package org.apache.nutch.examples

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

import org.apache.nutch.parse.*;
import org.apache.nutch.util.*;

public class LinkCounter {

        public static class CounterMapper extends MapReduceBase implements Mapper
        {
                public void map(WritableComparable key, Writable value, OutputCollector collector, Reporter reporter) throws IOException {
                        // TODO Auto-generated method stub
                        ParseData data = (ParseData)value;
                        
                        IntWritable outboundLinkCount = new IntWritable(data.getOutlinks().length);                                     
                        
                        collector.collect(key, outboundLinkCount);
                }

                public void close() throws IOException {
                        // TODO Auto-generated method stub
                        super.close();
                }

                public void configure(JobConf arg0) {
                        // TODO Auto-generated method stub
                        super.configure(arg0);
                }
                
        }
        
        public static class CounterReducer extends MapReduceBase implements Reducer
        {

                public void reduce(WritableComparable url, Iterator iterator, OutputCollector output, Reporter reporter) throws IOException {
                        IntWritable linkCount = (IntWritable)iterator.next();
                        output.collect(url, linkCount);
                }

                public void close() throws IOException {
                        // TODO Auto-generated method stub
                        super.close();
                }

                public void configure(JobConf arg0) {
                        // TODO Auto-generated method stub
                        super.configure(arg0);
                }
                
        
        }
        
        public static void main(String[] args) throws IOException{
                Configuration config = NutchConfiguration.create();
                
            JobConf jobConfig = new NutchJob(config);
            jobConfig.setJobName("countlinks");
         
            jobConfig.setInputFormat(SequenceFileInputFormat.class);
            
            jobConfig.setOutputFormat(MapFileOutputFormat.class);
            
            // the keys are words (strings)
            jobConfig.setOutputKeyClass(Text.class);
            // the values are counts (ints)
            jobConfig.setOutputValueClass(IntWritable.class);
            
            jobConfig.setMapperClass(CounterMapper.class);        
            jobConfig.setCombinerClass(CounterReducer.class);
            jobConfig.setReducerClass(CounterReducer.class);
            
            jobConfig.setInputPath(new Path((String) args[0], ParseData.DIR_NAME));
            jobConfig.setOutputPath(new Path((String) args[1]));
            
            JobClient.runJob(jobConfig);
        }

}

TutorialOneCompleteSourceListing (last edited 2009-09-20 23:10:15 by localhost)