1. 程式人生 > >Hadoop mapduce 統計單詞程式設計示例

Hadoop mapduce 統計單詞程式設計示例

首先,完成mapper類

package sinc.hadoops.mr;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> {

	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		String[] words = line.split(" ");
		for (String w : words) {
			context.write(new Text(w), new LongWritable(1));
		}
	}

}

再完成reduce類
package sinc.hadoops.mr;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
	@Override
	protected void reduce(Text key, Iterable<LongWritable> values, Context context)
			throws IOException, InterruptedException {
		long counter = 0;
		for (LongWritable l : values) {
			counter += l.get();
		}
		context.write(key, new LongWritable(counter));
	}
}

最後完成主類main方法:
package sinc.hadoops.mr;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class WordCount {

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

		if (args.length < 2) {
			System.err.println("Input math and out path");
			System.exit(-1);
		}
		
		Job job = Job.getInstance();
		
		//重要:main方法所在類
		job.setJarByClass(WordCount.class);
		
		//設定mapper相關屬性
		job.setMapperClass(WCMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		
		//設定reducer相關屬性
		job.setReducerClass(WCReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.waitForCompletion(true);
	}

}

至此,編寫完成。

Hdfs上放入檔案/tmp/test

hello zhang san hello zhang si hello nihao
hello zhang wu

執行:hadoop jar wordcount.jar /tmp/test /tmp/201701181347

檢視:hadoop fs -ls /tmp/201701181347

-rw-r--r--   2 root supergroup          0 2017-01-18 13:53 /tmp/201701181347/_SUCCESS
-rw-r--r--   2 root supergroup         40 2017-01-18 13:53 /tmp/201701181347/part-r-00000

檢視結果:hadoop fs -cat /tmp/201701181347/part-r-00000

hello   4
nihao   1
san     1
si      1
wu      1
zhang   3

至此,測試結束。