hadoop入門6:hadoop查詢兩兩之間有共同好友,及他倆的共同好友都是誰
阿新 • • 發佈:2018-12-13
A:B,C,D,F,E,O B:A,C,E,K C:F,A,D,I D:A,E,F,L E:B,C,D,M,L F:A,B,C,D,E,O,M G:A,C,D,E,F H:A,C,D,E,O I:A,O J:B,O K:A,C,D L:D,E,F M:E,F,G O:A,H,I,J
該資料可以看作好友,例如:A有B,C,D,F,E,O好友;B有A,C,E,K好友,以此類推;
求兩兩之間有共同好友,及他倆的共同好友都是誰,例如:A和B之間共同好友是:C、E
編碼思路:
第一步是可以把好友當作key,value是擁有key好友的使用者,例如:擁有好友B的是:A,F,J,E使用者
第二步在第一步結果後,雙重for迴圈進行兩兩之間進行拼接,這樣就可以得出正確結果
具體程式碼實現:
第一步:
package com.zsy.mr.commonfriend; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class commonFriendStepOne { static class commonFriendStepOneMapper extends Mapper<LongWritable, Text, Text, Text>{ Text k = new Text(); Text v = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { //通過過冒號分割 String[] splits = value.toString().split(":"); //獲取擁有好友的使用者名稱 String name = splits[0]; //獲取該使用者下的好友列表 String[] friends = StringUtils.isNotBlank(splits[1])? splits[1].split(","):null; if(friends != null) { //迴圈好友,好友當作key,擁有好友的使用者名稱當作value for (String friend : friends) { k.set(friend); v.set(name); context.write(k, v); } } } } static class commonFriendStepOneReducer extends Reducer<Text, Text, Text, Text>{ Text v = new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { List<String> resultList = new ArrayList<String>();//實際生產程式碼不建議用list接收,應該是直接處理掉 //處理資料,該資料是擁有key好友的所有使用者 for (Text value : values) { resultList.add(value.toString()); } v.set(StringUtils.join(resultList, ",")); context.write(key, v); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); /*conf.set("mapreduce.framework.name", "yarn"); conf.set("yarn.resoucemanger.hostname", "hadoop01");*/ Job job = Job.getInstance(conf); job.setJarByClass(commonFriendStepOne.class); //指定本業務job要使用的業務類 job.setMapperClass(commonFriendStepOneMapper.class); job.setReducerClass(commonFriendStepOneReducer.class); //指定mapper輸出的k v型別 如果map的輸出和reduce的輸出一樣,只需要設定輸出即可 //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); //指定最終輸出kv型別(reduce輸出型別) job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //指定job的輸入檔案所在目錄 FileInputFormat.setInputPaths(job, new Path(args[0])); //指定job的輸出結果目錄 FileOutputFormat.setOutputPath(job, new Path(args[1])); //將job中配置的相關引數,以及job所有的java類所在 的jar包,提交給yarn去執行 //job.submit();無結果返回,建議不使用它 boolean res = job.waitForCompletion(true); System.exit(res?0:1); } }
結果:
第二步:
程式碼實現
package com.zsy.mr.commonfriend; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class commonFriendStepTwo { static class commonFriendStepTwoMapper extends Mapper<LongWritable, Text, Text, Text>{ Text k = new Text(); Text v = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String[] splits = value.toString().split("\t"); //獲取好友 String friend = splits[0]; //獲取擁有該好友所有的使用者資訊 String[] names = splits[1].split(","); //進行排序,防止計算資料重複,例如:A-B和B-A其實一個對 Arrays.sort(names); //進行雙重for迴圈 for (int i = 0; i < names.length-1; i++) { String string = names[i]; for (int j = i+1; j < names.length; j++) { String string2 = names[j]; k.set(string+"-"+string2); v.set(friend); context.write(k, v); } } } } static class commonFriendStepTwoReducer extends Reducer<Text, Text, Text, NullWritable>{ Text k = new Text(); @Override protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { List<String> resultList = new ArrayList<String>();//實際生產程式碼不建議用list接收,應該是直接處理掉 for (Text text : value) { resultList.add(text.toString()); } k.set(key.toString()+":"+ StringUtils.join(resultList,",")); context.write(k, NullWritable.get()); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); /*conf.set("mapreduce.framework.name", "yarn"); conf.set("yarn.resoucemanger.hostname", "hadoop01");*/ Job job = Job.getInstance(conf); job.setJarByClass(commonFriendStepTwo.class); //指定本業務job要使用的業務類 job.setMapperClass(commonFriendStepTwoMapper.class); job.setReducerClass(commonFriendStepTwoReducer.class); //指定mapper輸出的k v型別 如果map的輸出和reduce的輸出一樣,只需要設定輸出即可 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //指定最終輸出kv型別(reduce輸出型別) job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); //指定job的輸入檔案所在目錄 FileInputFormat.setInputPaths(job, new Path(args[0])); //指定job的輸出結果目錄 FileOutputFormat.setOutputPath(job, new Path(args[1])); //將job中配置的相關引數,以及job所有的java類所在 的jar包,提交給yarn去執行 //job.submit();無結果返回,建議不使用它 boolean res = job.waitForCompletion(true); System.exit(res?0:1); } }
結果:
這樣就可以找到正確結果