hadoop提交作業------>yarn提交job的原始碼跟蹤
阿新 • • 發佈:2019-01-08
一、流程分析圖
二 、程式碼跟進
waitForCompletion(){
submit()}
------>
class job{
private Cluster cluster;
submit(){
connect(){
cluser = new Cluster(getConfiguration() //持有Client的引用,作為job的成員變數
}
JobSubmitter submitter = getJobSubmitter(cluster.getFileSystem(), cluster.getClient()) //cluster.getClient()得到Cluster中的client成員變數,
//而client中具有RPCserver的代理物件的引用,所以submitter是一個可以連線RPCserver的提交器
submitter.submitJobInternal(Job.this, cluser)
}
}
------>
Cluster{
private ClientProtocol client;
public Cluster(InetSocketAddress jobTrackAddr, Configuration conf){
this.initialize(jobTrackAddr, conf);
}
initialize(){
client = private ClientProtocol //得到RPCserver(即ResourceManager)的代理物件,作為Cluster的成員變數
}
}
------>
submitter.submitJobInternal(Job, cluser){
Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf); //得到ResourceManager傳送的路徑
JobID jobId = this.submitClient.getNewJobID(); //得到ResourceManager傳送的JobId
Path submitJobDir = new Path(jobStagingArea, jobId.toString()); //將得到的路徑作為字首和JobId作為字尾,拼成一個新的路徑
copyAndConfigureFiles(job, submitJobDir); //上傳jar包
int maps = writeSplits(job, submitJobDir); //獲取切片資訊
writeConf(conf, submitJobFile); //寫描述檔案xml
}