MapReduce編寫wordcount程序代碼實現
阿新 • • 發佈:2018-08-01
經典 .com .class count -o args val ring xtend MapReduce經典案例代碼(wordcount)
以經典的wordcount為例,通過自定義的mapper和reducer來實現單詞計數
package com.fwmagic.mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /** * MapReduce單詞統計 */ public class WordCountDemo { /** * 自定義Mapper繼承:org.apache.hadoop.mapreduce.Mapper,實現map方法 */ public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String[] words = value.toString().split(" "); for (String word : words) { context.write(new Text(word), new IntWritable(1)); } } } /** * 自定義Reducer繼承:org.apache.hadoop.mapreduce.Reducer,實現reduce方法 */ public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable writable : values) { count += writable.get(); } context.write(key, new IntWritable(count)); } } /** * job啟動類,設置參數並集群中提交job * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(WordCountDemo.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path("/wordcount/input")); FileOutputFormat.setOutputPath(job, new Path("/wordcount/output")); boolean b = job.waitForCompletion(true); System.exit(b ? 0 : 1); } }
集群中/wordcount/input目錄下數據內容
打包項目,執行job
hadoop jar fwmagic-wordcount.jar
執行輸出結果
MapReduce編寫wordcount程序代碼實現