MapReduce案例之尋找共同好友
以下是部落格的好友列表資料,冒號前是一個使用者,冒號後是該使用者的所有好友(資料中的好友關係是單向的)
求出哪些人兩兩之間有共同好友,及他倆的共同好友都有誰?
輸出格式:
A-B:C,E
(使用者-使用者:共同好友...)
需求分析
分為兩個job
第一次輸出結果,先求出A、B、C、….等是誰的好友
Job1:
Mapper
:
keyin-valuein
: (A:B,C,D,F,E,O)
map()
: 將valuein拆分為若干好友,作為keyout寫出
將keyin作為valueout
keyout-valueout
: (友:使用者)
(c:A),(C:B),(C:E)
Reducer
keyin-valuein
: (友:使用者)(c:A),(C:B),(C:E)
reduce()
: keyout-valueout
:(友:使用者,使用者,使用者,使用者)
A I,K,C,B,G,F,H,O,D,
B A,F,J,E,
C A,E,B,H,F,G,K,
D G,C,K,A,L,F,E,H,
E G,M,L,H,A,F,B,D,
F L,M,D,C,G,A,
G M,
H O,
I O,C,
J O,
K B,
L D,E,
M E,F,
O A,H,I,J,F,
第二次輸出結果,輸出每兩個人的共同好友
Job2
:
Mapper
:
keyin-valuein
map()
: 使用keyin作為valueout將valuein切分後,兩兩拼接,作為keyout
keyout-valueout
: (使用者-使用者,友)(A-B,C),(A-B,E)
(A-E,C), (A-G,C), (A-F,C), (A-K,C)
(B-E,C ),(B-G,C)
--------------------
(B-E,C)
(E-B,G)
B-E: C,G
A-B E C
A-C D F
A-D E F
A-E D B C
A-F O B C D E
A-G F E C D
A-H E C D O
A-I O
A-J O B
A-K D C
A-L F E D
A-M E F
B-C A
B-D A E
B-E C
B-F E A C
B-G C E A
B-H A E C
B-I A
B-K C A
B-L E
B-M E
B-O A
C-D A F
C-E D
C-F D A
C-G D F A
C-H D A
C-I A
C-K A D
C-L D F
C-M F
C-O I A
D-E L
D-F A E
D-G E A F
D-H A E
D-I A
D-K A
D-L E F
D-M F E
D-O A
E-F D M C B
E-G C D
E-H C D
E-J B
E-K C D
E-L D
F-G D C A E
F-H A D O E C
F-I O A
F-J B O
F-K D C A
F-L E D
F-M E
F-O A
G-H D C E A
G-I A
G-K D A C
G-L D F E
G-M E F
G-O A
H-I O A
H-J O
H-K A C D
H-L D E
H-M E
H-O A
I-J O
I-K A
I-O A
K-L D
K-O A
L-M E F
Reducer
:
keyin-valuein
: (A-B,C),(A-B,E)
reduce()
:
keyout-valueout
: (A-B:C,E)
程式碼實現
(1)第一次Mapper類
public class OneShareFriendsMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// 1 獲取一行 A:B,C,D,F,E,O
String line = value.toString();
// 2 切割,冒號前面的是使用者,冒號後面的是好友
String[] fields = line.split(":");
// 3 獲取person和好友
String person = fields[0];
String[] friends = fields[1].split(",");
// 4寫出
for(String friend: friends){
// 輸出 <好友,使用者>
context.write(new Text(friend), new Text(person));
}
}
}
(2)第一次Reducer類
public class OneShareFriendsReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
//拼接
for(Text person: values){
sb.append(person).append(",");
}
//寫出
context.write(key, new Text(sb.toString()));
}
}
(3)第一次Driver類
public class OneShareFriendsDriver {
public static void main(String[] args) throws Exception {
// 1 獲取job物件
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
// 2 指定jar包執行的路徑
job.setJarByClass(OneShareFriendsDriver.class);
// 3 指定map/reduce使用的類
job.setMapperClass(OneShareFriendsMapper.class);
job.setReducerClass(OneShareFriendsReducer.class);
// 4 指定map輸出的資料型別
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 5 指定最終輸出的資料型別
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 6 指定job的輸入原始所在目錄
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 7 提交
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}
(4)第二次Mapper類
public class TwoShareFriendsMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// A I,K,C,B,G,F,H,O,D,
// 友 人,人,人
String line = value.toString();
String[] friend_persons = line.split("\t");
String friend = friend_persons[0];
String[] persons = friend_persons[1].split(",");
Arrays.sort(persons);
for (int i = 0; i < persons.length - 1; i++) {
for (int j = i + 1; j < persons.length; j++) {
// 發出 <人-人,好友> ,這樣,相同的“人-人”對的所有好友就會到同1個reduce中去
context.write(new Text(persons[i] + "-" + persons[j]), new Text(friend));
}
}
}
}
(5)第二次Reducer類
public class TwoShareFriendsReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text friend : values) {
sb.append(friend).append(" ");
}
context.write(key, new Text(sb.toString()));
}
}
(6)第二次Driver類
public class TwoShareFriendsDriver {
public static void main(String[] args) throws Exception {
// 1 獲取job物件
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
// 2 指定jar包執行的路徑
job.setJarByClass(TwoShareFriendsDriver.class);
// 3 指定map/reduce使用的類
job.setMapperClass(TwoShareFriendsMapper.class);
job.setReducerClass(TwoShareFriendsReducer.class);
// 4 指定map輸出的資料型別
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 5 指定最終輸出的資料型別
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 6 指定job的輸入原始所在目錄
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 7 提交
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}
程式碼實現方案二
mapper1.java
/*
* keyin-valuein: (A:B,C,D,F,E,O)
map(): 將valuein拆分為若干好友,作為keyout寫出
將keyin作為valueout
keyout-valueout: (友:使用者)
(c:A),(C:B),(C:E)
*/
public class Example3Mapper1 extends Mapper<Text, Text, Text, Text>{
private Text out_key=new Text();
@Override
protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] friends = value.toString().split(",");
for (String friend : friends) {
out_key.set(friend);
context.write(out_key, key);
}
}
}
reducer1.java
/*
* keyin-valuein : (友:使用者)
(c:A),(C:B),(C:E)
reduce():
keyout-valueout :(友:使用者,使用者,使用者,使用者)
*/
public class Example3Reducer extends Reducer<Text, Text, Text, Text>{
private Text out_value=new Text();
@Override
protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text text : value) {
sb.append(text.toString()+",");
}
out_value.set(sb.toString());
context.write(key, out_value);
}
}
mapper2.java
/*
keyin-valuein: (友\t使用者,使用者,使用者,使用者)
map(): 使用keyin作為valueout
將valuein切分後,兩兩拼接,作為keyout
keyout-valueout: (使用者-使用者,友)
(A-B,C),(A-B,E)
(A-E,C), (A-G,C), (A-F,C), (A-K,C)
(B-E,C),(B-G,C)
*/
public class Example3Mapper2 extends Mapper<Text, Text, Text, Text>{
private Text out_key=new Text();
@Override
protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] users = value.toString().split(",");
//保證陣列中的使用者名稱有序
Arrays.sort(users);
//將valuein切分後,兩兩拼接,作為keyout
for (int i = 0; i < users.length-1; i++) {
for (int j = i+1; j < users.length; j++) {
out_key.set(users[i]+"-"+users[j]);
context.write(out_key, key);
}
}
}
}
reducer2.java
/*
*keyin-valuein : (A-B,C),(A-B,E)
reduce():
keyout-valueout : (A-B:C,E)
*/
public class Example3Reducer2 extends Reducer<Text, Text, Text, Text>{
private Text out_value=new Text();
@Override
protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text text : value) {
sb.append(text.toString()+",");
}
out_value.set(sb.toString());
context.write(key, out_value);
}
}
driver.java
/*
* 1. Example1Driver 提交兩個Job
* Job2 必須 依賴於 Job1,必須在Job1已經執行完成之後,生成結果後,才能執行!
*
* 2. JobControl: 定義一組MR jobs,還可以指定其依賴關係
* 可以通過addJob(ControlledJob aJob)向一個JobControl中新增Job物件!
*
* 3. ControlledJob: 可以指定依賴關係的Job物件
* addDependingJob(ControlledJob dependingJob): 為當前Job新增依賴的Job
* public ControlledJob(Configuration conf) : 基於配置構建一個ControlledJob
*
*/
public class Example3Driver {
public static void main(String[] args) throws Exception {
//定義路徑
Path inputPath=new Path("e:/mrinput/friend");
Path outputPath=new Path("e:/mroutput/friend");
Path finalOutputPath=new Path("e:/mroutput/finalfriend");
//作為整個Job的配置
Configuration conf1 = new Configuration();
conf1.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ":");
Configuration conf2 = new Configuration();
//保證輸出目錄不存在
FileSystem fs=FileSystem.get(conf1);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
if (fs.exists(finalOutputPath)) {
fs.delete(finalOutputPath, true);
}
// ①建立Job
Job job1 = Job.getInstance(conf1);
Job job2 = Job.getInstance(conf2);
// 設定Job名稱
job1.setJobName("index1");
job2.setJobName("index2");
// ②設定Job1
job1.setMapperClass(Example3Mapper1.class);
job1.setReducerClass(Example3Reducer.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
// 設定輸入目錄和輸出目錄
FileInputFormat.setInputPaths(job1, inputPath);
FileOutputFormat.setOutputPath(job1, outputPath);
job1.setInputFormatClass(KeyValueTextInputFormat.class);
// ②設定Job2
job2.setMapperClass(Example3Mapper2.class);
job2.setReducerClass(Example3Reducer2.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
// 設定輸入目錄和輸出目錄
FileInputFormat.setInputPaths(job2, outputPath);
FileOutputFormat.setOutputPath(job2, finalOutputPath);
// 設定job2的輸入格式
job2.setInputFormatClass(KeyValueTextInputFormat.class);
//--------------------------------------------------------
//構建JobControl
JobControl jobControl = new JobControl("friends");
//建立執行的Job
ControlledJob controlledJob1 = new ControlledJob(job1.getConfiguration());
ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
//指定依賴關係
controlledJob2.addDependingJob(controlledJob1);
// 向jobControl設定要執行哪些job
jobControl.addJob(controlledJob1);
jobControl.addJob(controlledJob2);
//執行JobControl
Thread jobControlThread = new Thread(jobControl);
//設定此執行緒為守護執行緒
jobControlThread.setDaemon(true);
jobControlThread.start();
//獲取JobControl執行緒的執行狀態
while(true) {
//判斷整個jobControl是否全部執行結束
if (jobControl.allFinished()) {
System.out.println(jobControl.getSuccessfulJobList());
return;
}
}
}
}