1. 程式人生 > >MR任務之後提交Hadoop批量索引任務

MR任務之後提交Hadoop批量索引任務

MR程式執行成功之後,生成的JSON資料放入指定的目錄,然後利用HTTP的POST,向druid.io的overload上提交任務

    private static void submitHadoopIndexTask(FileSystem fileSystem, String dataSource, String
            intervals, List<Path> outputPaths, String segmentGranularity) {
        // 任務完成之後,提交一個hadoopIndex任務
        List<String
> paths = new ArrayList<>(); for (Path outputPath : outputPaths) { String tmpPath = getPaths(fileSystem, outputPath); if (tmpPath != null) paths.add(tmpPath); } String path = String.join(",", paths); //增加延遲到當前時間戳計算的資料 if (path !=
null && !path.equals("")) { String json = HadoopIndexClient.easyJson(dataSource, path, intervals, segmentGranularity, queryGranularity); HttpClientUtil.post(druidTask, json); } else { System.out.println("Paths are null"); } }

其hadoop-index的json描述檔案可以根據設定的引數進行修改:

package net.icsoc.cti.report.druid;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import net.icsoc.cti.report.CtiReportBatch;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.*;

/*******************************************************************************
 * 版權資訊:北京中通天鴻武漢分公司
 * @author xuchang
 * Copyright: Copyright (c) 2007北京中通天鴻武漢分公司,Inc.All Rights Reserved.
 * Description:
 ******************************************************************************/
public class HadoopIndexClient {

    public static String easyJson(String dataSource, String paths, String interval, String segmentGranularity, String queryGranularity) {
        InputStream inputStream = null;
        BufferedReader reader = null;
        try {
            inputStream = CtiReportBatch.class.getClassLoader().getResourceAsStream("index-hadoop.json");
            reader = new BufferedReader(new InputStreamReader(inputStream));
            StringBuffer json = new StringBuffer();
            String s;
            while ((s = reader.readLine()) != null) {
                json.append(s);
            }
            Map<String, Object> map = JSON.parseObject(json.toString(), new TypeReference<Map<String, Object>>() {
            });
            //對就送資料做一些修改
            Map<String, Object> spec = (Map<String, Object>) map.get("spec");
            Map<String, Object> dataSchema = (Map<String, Object>) spec.get("dataSchema");
            dataSchema.put("dataSource", dataSource);
            Map<String, Object> granularity = (Map<String, Object>) dataSchema.get("granularitySpec");
            List<String> intervals = new ArrayList<>();
            intervals.add(interval);
            granularity.put("intervals", intervals);
            granularity.put("segmentGranularity", segmentGranularity);
            granularity.put("queryGranularity", queryGranularity);
            Map<String, Object> ioConfig = (Map<String, Object>) spec.get("ioConfig");
            Map<String, Object> inputSpec = (Map<String, Object>) ioConfig.get("inputSpec");
            inputSpec.put("paths", paths);
            return JSON.toJSONString(map);
        } catch (Exception e) {
            //處理異常
            System.out.println(e.getMessage());
        } finally {
            //關閉流
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (Exception e) {
                    System.out.println(e.getMessage());
                }
            }
            if (reader != null) {
                try {
                    reader.close();
                } catch (Exception e) {
                    System.out.println(e.getMessage());
                }
            }
        }
        return null;
    }

    public static void main(String[] args) {
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
        format.setCalendar(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
        System.out.println(format.format(new Date()));
    }
}

利用HTTPClient實現任務提交:

package net.icsoc.cti.report.utils;

import net.icsoc.cti.report.druid.HadoopIndexClient;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.CharArrayBuffer;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

/*******************************************************************************
 * 版權資訊:北京中通天鴻武漢分公司
 * @author xuchang
 * Copyright: Copyright (c) 2007北京中通天鴻武漢分公司,Inc.All Rights Reserved.
 * Description: MapReduce使用oss的工具類
 ******************************************************************************/
public class HttpClientUtil {
    private static Logger logger = LoggerFactory.getLogger(HttpClientUtil.class);

    public static String post(String url, String json) {
        HttpClient httpclient = new DefaultHttpClient();
        String content = null;
        try {
            HttpPost httppost = new HttpPost(url);
            StringEntity postEntity = new StringEntity(json);
            postEntity.setContentEncoding("UTF-8");
            postEntity.setContentType("application/json");//傳送json資料需要設定contentType
            httppost.setEntity(postEntity);
            // 執行
            HttpResponse response = httpclient.execute(httppost);
            HttpEntity resEntity = response.getEntity();
            content = EntityUtils.toString(resEntity);//返回json格式資料
            if (StringUtils.isEmpty(content)) {
                logger.error("[httpUtils] 返回的結果型別不包含結果  返回的結果為空");
            }
            logger.debug("[httpUtils] load TaskInfo success");
            // 關閉連線,釋放資源
        } catch (Exception e) {
            e.printStackTrace();
            logger.info("access api url wrong!!", e.getMessage());
        } finally {
            httpclient.getConnectionManager().shutdown();
        }
        return content;
    }
}