MapReduce中的多Job串聯

阿新 • • 發佈：2019-01-22

求共同好友：

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J,K

以上是資料：
A:B,C,D,F,E,O
表示：B,C,D,E,F,O是A使用者的好友。

1、求所有兩兩使用者之間的共同好友

CF1：

package mapreduce.exercise.cf;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *描述: 求共同好友的第一個MapRedcue 
 *
 */

public class CF1MR {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(CF1MR.class);
		
		job.setMapperClass(CF1MRMapper.class);
		job.setReducerClass(CF1MRReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		/**
		 * 設定輸入輸出
		 */
		Path inputPath = new Path(args[0]);
		Path outputPath = new Path(args[1]);
		FileInputFormat.setInputPaths(job, inputPath);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(outputPath)){
			fs.delete(outputPath,true);
		}
		FileOutputFormat.setOutputPath(job, outputPath);
		
		/**
		 * 提交任務
		 */
		boolean isDone = job.waitForCompletion(true);
		System.exit(isDone ? 0 : 1);

	}
	
	/**
	 * Mapper階段的業務邏輯
	 */
	private static class CF1MRMapper extends Mapper<LongWritable, Text, Text, Text>{
		
		private Text keyOut = new Text();
		private Text valueOut = new Text();
		
		/**
		 * value : A:B,C,D,F,E,O
		 */
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {

			String[] split = value.toString().split(":");
			String outValue = split[0];
			valueOut.set(outValue);
			
			String[] keys = split[1].split(",");
			for(String outKey : keys){
				keyOut.set(outKey);
				context.write(keyOut, valueOut);
			}
	
		}
	}
		
		/**
		 * Reducer階段的業務邏輯
		 */
		private static class CF1MRReducer extends Reducer<Text, Text, Text, Text>{
			
			private Text valueOut = new Text();
			/**
			 * 一次reduce方法呼叫的時候獲取的引數:
			 */

			@Override
			protected void reduce(Text key, Iterable<Text> values, Context context)
					throws IOException, InterruptedException {

				StringBuilder sb = new StringBuilder();
				
				for(Text t : values){
					sb.append(t.toString()).append("-");
				}
				String outValue = sb.toString().substring(0, sb.toString().length()-1);
				
				valueOut.set(outValue);
				/**
				 * key : E
				 * value : A-B-G-...
				 */
				context.write(key, valueOut);

			}
			
		}

}

CF2:

package mapreduce.exercise.cf;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class CF2MR {
	public static void main(String[] args) throws Exception {
		
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(CF2MR.class);
		
		job.setMapperClass(CF2MRMapper.class);
		job.setReducerClass(CF2MRReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		/**
		 * 設定輸入輸出
		 */
		Path inputPath = new Path(args[0]);
		Path outputPath = new Path(args[1]);
		FileInputFormat.setInputPaths(job, inputPath);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(outputPath)){
			fs.delete(outputPath,true);
		}
		FileOutputFormat.setOutputPath(job, outputPath);
		
		/**
		 * 提交任務
		 */
		boolean isDone = job.waitForCompletion(true);
		System.exit(isDone ? 0 : 1);
		
	}
	
	/**
	 * Mapper階段的業務邏輯
	 */
	private static class CF2MRMapper extends Mapper<LongWritable, Text, Text, Text>{
		
		private Text keyOut = new Text();
		private Text valueOut = new Text();
		/**
		 * key : 起始偏移量
		 * value : E A-B-G-...
		 * 
		 * A-B-G-H
		 */
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String[] split = value.toString().split("\t");
			String outValue = split[0];
			valueOut.set(outValue);
			
			List userList = new ArrayList<>();
			
			for(String t : split[1].split("-")){
				userList.add(t);
			}
			
			
			/**
			 * 為什麼要排序？
			 * 
			 * E A-B-G-H
			 * C B-A-H-G   -----排序------>  A-B-G-H
			 * 
			 * A-B E
			 * B-A C
			 * 
			 * A-B E
			 * A-B C
			 * 
			 * A-B === B-A
			 */
			Collections.sort(userList);
			
			int size = userList.size();
			for(int i = 0; i < size-1; i++){
				for(int j = i + 1; j <  size; j++){
					String outKey = userList.get(i) +"-"+userList.get(j);
					keyOut.set(outKey);
					
					/**
					 * key : A-B
					 * value : E
					 */
					context.write(keyOut, valueOut);	
				}
			}
			
		}
		
	}
		
		/**
		 * Reducer階段的業務邏輯
		 */
		private static class CF2MRReducer extends Reducer<Text, Text, Text, Text>{
			
			private Text valueOut = new Text();
			/**
			 * key : A-B
			 * values : E C G H
			 */

			@Override
			protected void reduce(Text key, Iterable<Text> values, Context context)
					throws IOException, InterruptedException {

				StringBuilder sb = new StringBuilder();
				for(Text t : values){
					sb.append(t.toString()).append(",");
				}
				
				String outValue = sb.toString().substring(0, sb.toString().length()-1);
				valueOut.set(outValue);
				context.write(key, valueOut);
			}
		}

	
}

串聯在一起Job的程式碼：

package mapreduce.exercise.cf;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *描述: 表示把多個MapReduce程式串聯成一個完整的任務 
 *
 */

public class CFMR {
	public static void main(String[] args) throws Exception {
		
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		System.setProperty("HADOOP_USER_NAME","hadoop");
		
		FileSystem fs = FileSystem.get(conf);
		
		Job cf1job = Job.getInstance(conf);
		cf1job.setJarByClass(CFMR.class);
		cf1job.setMapperClass(CF1MRMapper.class);
		cf1job.setReducerClass(CF1MRReducer.class);
		cf1job.setMapOutputKeyClass(Text.class);
		cf1job.setMapOutputValueClass(Text.class);
		cf1job.setOutputKeyClass(Text.class);
		cf1job.setOutputValueClass(Text.class);
		
		Path inputPath1 = new Path(args[0]);
		Path outputPath1 = new Path(args[1]);
		FileInputFormat.setInputPaths(cf1job, inputPath1);
		
		if(fs.exists(outputPath1)){
			fs.delete(outputPath1, true);
		}
		FileOutputFormat.setOutputPath(cf1job, outputPath1);
		
		Job cf2job = Job.getInstance(conf);
		cf2job.setJarByClass(CFMR.class);
		cf2job.setMapperClass(CF2MRMapper.class);
		cf2job.setReducerClass(CF2MRReducer.class);
		cf2job.setMapOutputKeyClass(Text.class);
		cf2job.setMapOutputValueClass(Text.class);
		cf2job.setOutputKeyClass(Text.class);
		cf2job.setOutputValueClass(Text.class);
		Path inputPath2 = new Path(args[1]);
		Path outputPath2 = new Path(args[2]);
		FileInputFormat.setInputPaths(cf2job, inputPath2);
		if(fs.exists(outputPath2)){
			fs.delete(outputPath2,true);
		}
		FileOutputFormat.setOutputPath(cf2job, outputPath2);
		
		/**
		 * 提交
		 * 改進提交方式: 讓多個具有依賴關係的任務進行串聯執行
		 * 
		 * 比如: job2的執行要依賴於job1的執行結果。那就表示job2的執行必須在job1的後面。而且也必須要等到job1執行完成之後才能提交任務執行
		 * 
		 * 使用一種新的方式去管理這些任務的依賴關係以及提交(完整提交)
		 */
		ControlledJob job1 = new ControlledJob(cf1job,null);
		List<ControlledJob> dpdsJobs = new ArrayList<>();
		dpdsJobs.add(job1);
		ControlledJob job2 = new ControlledJob(cf2job,dpdsJobs);
		
		JobControl jc  = new JobControl("cf mr");
		
		jc.addJob(job1);
		jc.addJob(job2);
		
		Thread cfThread = new Thread(jc);
		
		cfThread.start();
		
		while(true){
			boolean allFinished = jc.allFinished();
			Thread.sleep(2000);
			System.out.println("任務是否完成:"+allFinished);
			if(allFinished){
				break;
			}
				
		}
		System.exit(0);
		
	}
	
	private static class CF1MRMapper extends Mapper<LongWritable, Text, Text, Text>{
		
		private Text keyOut = new Text();
		private Text valueOut = new Text();
		
		/**
		 * value : A:B,C,D,F,E,O
		 */
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			
			String[] split = value.toString().split(":");
			
			String outValue = split[0];
			valueOut.set(outValue);
			
			String[] keys = split[1].split(",");
			for(String outKey : keys){
				keyOut.set(outKey);
				context.write(keyOut, valueOut);
			}
		}
	}
	private static class CF1MRReducer extends Reducer<Text, Text, Text, Text>{
		
		private Text valueOut = new Text();

		
		/**
		 * 一次reduce方法呼叫的時候獲取到的引數:
		 * 
		 * E A E B E G E ...
		 */
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			
			StringBuilder sb = new StringBuilder();
			
			for(Text t : values){
				sb.append(t.toString()).append("-");
			}
			String outValue = sb.toString().substring(0, sb.toString().length()-1);
			
			valueOut.set(outValue);
			
			/**
			 * key : E 
			 * value : A-B-G-...
			 */
			context.write(key, valueOut);
			
		}

	}

	
	private static class CF2MRMapper extends Mapper<LongWritable, Text, Text, Text>{
		
		private Text keyOut = new Text();
		private Text valueOut = new Text();
		
		
		/**
		 * key : 起始偏移量
		 * value : E	A-B-G-...
		 * 
		 * A-B-G-H
		 */
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			
			String[] split = value.toString().split("\t");
			
			String outValue = split[0];
			valueOut.set(outValue);
			
			List<String> userList = new ArrayList<>();
			
			for(String t : split[1].split("-")){
				userList.add(t);
			}
			/**
			 * 為什麼要排序？
			 * 
			 * E A-B-G-H C B-A-H-G -----排序------> A-B-G-H
			 * 
			 * A-B E B-A C
			 * 
			 * A-B E A-B C
			 * 
			 * A-B === B-A
			 */
			Collections.sort(userList);
			
			int size = userList.size();
			for(int i = 0; i < size-1; i++){
				for(int j = i+1; j < size; j++){
					String outKey = userList.get(i)+"-"+userList.get(j);
					keyOut.set(outKey);
					
					/**
					 * key : A-B
					 * value : E
					 */
					context.write(keyOut, valueOut);
				}
			}
		}
	}
		private static class CF2MRReducer extends Reducer<Text, Text, Text, Text>{
			
			private Text valueOut = new Text();

			/**
			 * key : A-B
			 * value : E C G H
			 */
			@Override
			protected void reduce(Text key, Iterable<Text> values, Context context)
					throws IOException, InterruptedException {

				StringBuilder sb = new StringBuilder();
				for(Text t : values){
					sb.append(t.toString()).append(",");
				}
				String outValue = sb.toString().substring(0, sb.toString().length()-1);
				valueOut.set(outValue);
				context.write(key, valueOut);

			}
	
		}
		
		
	}

MapReduce中的多Job串聯

求共同好友：A:B,C,D,F,E,O B:A,C,E,K C:F,A,D,I D:A,E,F,L E:B,C,D,M,L F:A,B,C,D,E,O,M G:A,C,D,E,F H:A,C,D,E,O I:A,O J:B,O K:A,C,D L:D,E,F M:E,F,G

【圖文解析】MapReduce 多 Job 串聯

MapReduce 多 Job 串聯 1、需求一個稍複雜點的處理邏輯往往需要多個 MapReduce 程式串聯處理，多 job 的串聯可以藉助 MapReduce 框架的 JobControl 實現 2、例項以下有兩個 MapReduce 任務，分別是 Flow

大資料（十四）：多job串聯與ReduceTask工作機制

一、多job串聯例項（倒索引排序） 1.需求查詢每個單詞分別在每個檔案中出現的個數預期第一次輸出(表示單詞分別在個個檔案中出現的次數) apple--a.txt 3 apple--b.txt 1 apple--c.txt 1 grape--a.txt

解決MapReduce中多個小檔案合併成大檔案問題

package inputformat; import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import

Azkaban實戰，Command型別單一job示例，任務中執行外部shell指令碼，Command型別多job工作flow,HDFS操作任務，MapReduce任務，HIVE任務

1.Azkaban實戰 Azkaba內建的任務型別支援command、java Command型別單一job示例 1、建立job描述檔案 vi command.job #command.job type=command

Hadoop 學習研究(八): 多Job任務和hadoop中的全域性變數

/* * 重寫Mapper的setup方法，獲取分散式快取中的檔案 */ @Override protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context)

MapReduce中job引數及設定map和reduce的個數

map的個數在map階段讀取資料前，FileInputFormat會將輸入檔案分割成split。split的個數決定了 map的個數。影響map個數，即split個數的因素主要有： 1）HDF

MapReduce中Job的幾種執行模式

MapReduce中job的幾種執行模式1、在eclipse中開發好mr程式（windows或linux下都可以），然後打成jar包(wc.jar)，上傳到伺服器執行命令 hadoop jar wc.jar cn.itheima.hadoop.MainCla

Mapreduce中使用多執行緒的問題

在專案中遇到了Mapreduce使用多執行緒的問題，在此記錄、討論一下。需實現流程是讀取關鍵詞檔案中的關鍵詞，根據關鍵詞搜尋圖片，爬取相關的圖片的地址，開啟執行緒下載、轉換圖片。每個關鍵詞開啟一個maptask，搜尋獲取多圖片地址，開啟多執行緒執行下載和轉化過程，由於Ma

C++11中多線程庫

標準 value 生命周期通過死鎖 strong () 四種 ... 一、linux 線程同步線程是在操作系統層面支持的，所以多線程的學習建議還是先找一本linux系統編程類的書，了解linux提供多線程的API。完全完全使用系統調用編寫多線程程序是痛苦，現

android Service中多線程交互

font set space ont detail const sel 子線程啟動 android 的service和activity是執行在UI主線程的。在android線程中，僅僅有主線程即UI線程有自己的默認的消息隊列。子線程須要創建自己的消息隊列。並把消息發給

mapreduce中reduce中的叠代器只能調用一次！

new resultset row reducer style prot category nds 重復親測，只能調用一次，如果想想在一次reduce重復使用叠代器中的數據，得先取出來放在list中然後在從list中取出來！！多次讀取reduce函數中叠代器的數據

在vi中打開多個文件，復制一個文件中多行到另一個文件中

class pan 上一個復制 family clas font style -s ：set number 查看行號1、vi a.txt b.txt或者vi *.txt 2、文件間切換 :n切換到下一個文件，:wn保存再切換 :N到上一個文件，:wN保存再切換 :.=看當

SQL中多條件查詢括號的用途

public -c cati price 情況如果 cat abi pen 界面: 代碼 select id,routeName,routeCharacteristic,routeIntroductions,costDetail,participate,click,rou

hibernate中多表映射關系配置

-- 實體映射 user roles 實體 name 聯系人冗余表名 1.one-to-many一對多關系的映射配置(在一的一方實體映射文件中配置) <!-- cascade屬性:級聯操作屬性

MapReduce中combine、partition、shuffle的作用是什麽

rgs 輸出 microsoft ted pop .com int ack 結果 http://www.aboutyun.com/thread-8927-1-1.html Mapreduce在hadoop中是一個比較難以的概念。以下須要用心

SQL Server中多表連接時驅動順序對性能的影響

left create 現在輸出結果 tar 表連接邏輯 itl 信息本文出處：http://www.cnblogs.com/wy123/p/7106861.html （保留出處並非什麽原創作品權利，本人拙作還遠遠達不到，僅僅是為了鏈接到原文，因為後續對可能存在

js 刪除數組中多個元素

true arr return contain index var nbsp log dex js中如果使用forEach的話 , splice刪除的時候會導致js的數組的length發生改變 , 因此會出現本應該刪除的元素沒有被刪除 . 因此 , 我的想法是 : 使用i

Chromium中多線程及並發技術要點(C/C++)

double down stat i/o and lte hbm fyi term 類別類說明演示樣例線程機制Thread(參考:線程模型及應用指南) MessagePump MessageQueue SequencedWorkerPool它是一個線程池，用於運行須要串

iOS開發中多線程基礎

推斷傳遞 cti self reads priority cal default value 耗時操作演練代碼演練編寫耗時方法 - (void)longOperation { for (int i = 0; i <

MapReduce中的多Job串聯

相關推薦