Whats going on HADOOP with BIGDATA?: Word Count Mapreduce program

WordCountMapper.Java :-
------------------------------
import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>

{

private static final IntWritable one = new IntWritable(1);

@Override

public void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException

{

StringTokenizer paragraph = new StringTokenizer(value.toString());

Text word = new Text();

while(paragraph.hasMoreTokens())

{

word.set(paragraph.nextToken().toString());

context.write(word, one);

}

WordCountReducer.Java:-

----------------------------------

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends

Reducer<Text, IntWritable, Text, IntWritable>

{

IntWritable maxresult = new IntWritable();

@Override

public void reduce(Text key, Iterable<IntWritable> values, Context context)

throws IOException, InterruptedException {

int minValue = Integer.MIN_VALUE;

for (IntWritable value : values) {

minValue += value.get();

}

maxresult.set(minValue);

context.write(key, maxresult);

}

WordCountDriver.Java:-

------------------------------

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountDriver {

/**

* @param args

public static void main(String[] args) throws Exception {

if (args.length != 2) {

System.err.println("Usage: WordCount <input path> <output path>");

System.exit(-1);

}

Job job = new Job();

job.setJarByClass(WordCountDriver.class);

job.setJobName("Word Count");

FileInputFormat.addInputPath(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.setMapperClass(WordCountMapper.class);

job.setCombinerClass(WordCountReducer.class);

job.setReducerClass(WordCountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

Whats going on HADOOP with BIGDATA?

Monday, January 12, 2015

Word Count Mapreduce program

No comments:

Post a Comment