MR任務之後提交Hadoop批量索引任務
阿新 • • 發佈:2019-01-03
MR程式執行成功之後,生成的JSON資料放入指定的目錄,然後利用HTTP的POST,向druid.io的overload上提交任務
private static void submitHadoopIndexTask(FileSystem fileSystem, String dataSource, String
intervals, List<Path> outputPaths, String segmentGranularity) {
// 任務完成之後,提交一個hadoopIndex任務
List<String > paths = new ArrayList<>();
for (Path outputPath : outputPaths) {
String tmpPath = getPaths(fileSystem, outputPath);
if (tmpPath != null) paths.add(tmpPath);
}
String path = String.join(",", paths);
//增加延遲到當前時間戳計算的資料
if (path != null && !path.equals("")) {
String json = HadoopIndexClient.easyJson(dataSource, path, intervals, segmentGranularity, queryGranularity);
HttpClientUtil.post(druidTask, json);
} else {
System.out.println("Paths are null");
}
}
其hadoop-index的json描述檔案可以根據設定的引數進行修改:
package net.icsoc.cti.report.druid;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import net.icsoc.cti.report.CtiReportBatch;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.*;
/*******************************************************************************
* 版權資訊:北京中通天鴻武漢分公司
* @author xuchang
* Copyright: Copyright (c) 2007北京中通天鴻武漢分公司,Inc.All Rights Reserved.
* Description:
******************************************************************************/
public class HadoopIndexClient {
public static String easyJson(String dataSource, String paths, String interval, String segmentGranularity, String queryGranularity) {
InputStream inputStream = null;
BufferedReader reader = null;
try {
inputStream = CtiReportBatch.class.getClassLoader().getResourceAsStream("index-hadoop.json");
reader = new BufferedReader(new InputStreamReader(inputStream));
StringBuffer json = new StringBuffer();
String s;
while ((s = reader.readLine()) != null) {
json.append(s);
}
Map<String, Object> map = JSON.parseObject(json.toString(), new TypeReference<Map<String, Object>>() {
});
//對就送資料做一些修改
Map<String, Object> spec = (Map<String, Object>) map.get("spec");
Map<String, Object> dataSchema = (Map<String, Object>) spec.get("dataSchema");
dataSchema.put("dataSource", dataSource);
Map<String, Object> granularity = (Map<String, Object>) dataSchema.get("granularitySpec");
List<String> intervals = new ArrayList<>();
intervals.add(interval);
granularity.put("intervals", intervals);
granularity.put("segmentGranularity", segmentGranularity);
granularity.put("queryGranularity", queryGranularity);
Map<String, Object> ioConfig = (Map<String, Object>) spec.get("ioConfig");
Map<String, Object> inputSpec = (Map<String, Object>) ioConfig.get("inputSpec");
inputSpec.put("paths", paths);
return JSON.toJSONString(map);
} catch (Exception e) {
//處理異常
System.out.println(e.getMessage());
} finally {
//關閉流
if (inputStream != null) {
try {
inputStream.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
if (reader != null) {
try {
reader.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
}
return null;
}
public static void main(String[] args) {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
format.setCalendar(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
System.out.println(format.format(new Date()));
}
}
利用HTTPClient實現任務提交:
package net.icsoc.cti.report.utils;
import net.icsoc.cti.report.druid.HadoopIndexClient;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.CharArrayBuffer;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
/*******************************************************************************
* 版權資訊:北京中通天鴻武漢分公司
* @author xuchang
* Copyright: Copyright (c) 2007北京中通天鴻武漢分公司,Inc.All Rights Reserved.
* Description: MapReduce使用oss的工具類
******************************************************************************/
public class HttpClientUtil {
private static Logger logger = LoggerFactory.getLogger(HttpClientUtil.class);
public static String post(String url, String json) {
HttpClient httpclient = new DefaultHttpClient();
String content = null;
try {
HttpPost httppost = new HttpPost(url);
StringEntity postEntity = new StringEntity(json);
postEntity.setContentEncoding("UTF-8");
postEntity.setContentType("application/json");//傳送json資料需要設定contentType
httppost.setEntity(postEntity);
// 執行
HttpResponse response = httpclient.execute(httppost);
HttpEntity resEntity = response.getEntity();
content = EntityUtils.toString(resEntity);//返回json格式資料
if (StringUtils.isEmpty(content)) {
logger.error("[httpUtils] 返回的結果型別不包含結果 返回的結果為空");
}
logger.debug("[httpUtils] load TaskInfo success");
// 關閉連線,釋放資源
} catch (Exception e) {
e.printStackTrace();
logger.info("access api url wrong!!", e.getMessage());
} finally {
httpclient.getConnectionManager().shutdown();
}
return content;
}
}