mapreduce統計單詞個數
阿新 • • 發佈:2022-05-09
WordCount類程式碼:
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class WordCount { public static void main(String[] args) throws Exception { //定義配置物件 Configuration conf = new Configuration(); //定義一個工作物件 Job job = Job.getInstance(conf); //獲取map的一個物件 job.setMapperClass(WordCountMap.class); //指定map階段的一個輸出keyjob.setMapOutputKeyClass(Text.class); //指定map階段輸出的values型別 job.setMapOutputValueClass(IntWritable.class); //指定map階段的輸入檔案 FileInputFormat.setInputPaths(job,new Path("D:\\程式碼\\大二下\\Hadoop\\4、MapReduce_Reduce\\123.txt")); //獲取reduce的類 job.setReducerClass(WordCountReduce.class); //指定reduce階段的一個輸出的key job.setOutputKeyClass(Text.class); //指定reduce階段輸出的values型別 job.setOutputValueClass(IntWritable.class); //指定reduce階段的輸出檔案 FileOutputFormat.setOutputPath(job,new Path("D:\\程式碼\\大二下\\Hadoop\\4、MapReduce_Reduce\\456.txt")); job.waitForCompletion(true); } }
WordMap類程式碼:
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> { @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { //讀取每行文字 String line = value.toString(); //splite拆分每行 String[] words = line.split(" "); //取出每個單詞 for (String word: words) { //將單詞轉為Text型別 Text wordText = new Text(word); //將1轉變成IntWritable IntWritable outValue = new IntWritable(1); //寫出每個單詞,跟對應1 context.write(wordText, outValue); } } }
WordReduce類程式碼:
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /* * Text 輸入的字串型別,序列化 * IntWritable 輸入一串1,序列化 * Text 輸出的字串型別,序列化 * IntWritable 輸出的求和陣列,序列化 * */ public class WordCountReduce extends Reducer<Text, IntWritable,Text,IntWritable> { /* * key 輸入單詞名字 * values 輸入一串1 * context 輸出的工具 * */ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum=0; for(IntWritable number:values){ sum += number.get(); } context.write(key,new IntWritable(sum)); } }