Monday, January 12, 2015

Word Count Mapreduce program

WordCountMapper.Java :-
------------------------------
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{
                private static final IntWritable one = new IntWritable(1);
                @Override
                public void map(LongWritable key, Text value, Context context)
                                                throws IOException, InterruptedException
                {
                                StringTokenizer paragraph = new StringTokenizer(value.toString());
                                Text word = new Text();
                                while(paragraph.hasMoreTokens())
                                {
                                                word.set(paragraph.nextToken().toString());    
                                                context.write(word, one);
                                }
                } 

}

WordCountReducer.Java:-
----------------------------------
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends
                                Reducer<Text, IntWritable, Text, IntWritable> 
{

                IntWritable maxresult = new IntWritable();
                @Override
                public void reduce(Text key, Iterable<IntWritable> values, Context context)
                                                throws IOException, InterruptedException {

                                int minValue = Integer.MIN_VALUE;
                                for (IntWritable value : values) {
                                                minValue += value.get();
                                }
                                maxresult.set(minValue);
                                context.write(key, maxresult);
                }
} 

WordCountDriver.Java:-
------------------------------
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountDriver {
                 /**
                 * @param args
                 */
                public static void main(String[] args) throws Exception {
                                if (args.length != 2) {
                                                System.err.println("Usage: WordCount <input path> <output path>");
                                                System.exit(-1);
                                }

                                Job job = new Job();
                                job.setJarByClass(WordCountDriver.class);
                                job.setJobName("Word Count");
                                FileInputFormat.addInputPath(job, new Path(args[0]));
                                FileOutputFormat.setOutputPath(job, new Path(args[1]));

                                job.setMapperClass(WordCountMapper.class);
                                job.setCombinerClass(WordCountReducer.class);
                                job.setReducerClass(WordCountReducer.class);
                                job.setOutputKeyClass(Text.class);
                                job.setOutputValueClass(IntWritable.class);

                                System.exit(job.waitForCompletion(true) ? 0 : 1);
                }
 }

No comments:

Post a Comment