1. 程式人生 > 其它 >Hudi-通過Hive查詢hudi表資料

Hudi-通過Hive查詢hudi表資料

環境準備

整合jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目錄下

建外部表

create database db_hudi;

use db_hudi;

CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi(
    order_id BIGINT,
    product_id INT,
    city_id INT,
    district INT,
    county INT,
    type INT,
    combo_type INT,
    traffic_type INT,
    passenger_count INT,
    driver_product_id INT,
    start_dest_distance INT,
    arrive_time STRING,
    departure_time STRING,
    pre_total_fee DOUBLE,
    normal_time STRING,
    bubble_trace_id STRING,
    product_1level INT,
    dest_lng DOUBLE,
    dest_lat DOUBLE,
    starting_lng DOUBLE,
    starting_lat DOUBLE,
    ts BIGINT,
    partitionpath STRING
)
PARTITIONED BY(
    date_str 
string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION '/hudi-warehouse/tbl_didi_haikou';

手動加入分割槽

--手動新增分割槽
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str 
= '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';

檢視分割槽

SHOW PARTITIONS db_hudi.tbl_hudi_didi;

指標統計

-- 開發測試,設定執行模式為本地模式
set hive.exec.mode.local.auto = true;

set hive.exec.mode.local.auto.tasks.max = 10;
set hive.exec.mode.local.auto.inputbytes.max=88801103;
set hive.exec.mode.local.auto.input.files.max=50;
SET hive.mapred.mode=nonstrict;
-- 指標一:訂單型別統計
WITH tmp as (
    SELECT
        product_id,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY product_id
)
SELECT
    CASE product_id
        WHEN 1 THEN "滴滴專車"
        WHEN 2 THEN "滴滴企業專車"
        WHEN 3 THEN "滴滴快車"
        WHEN 4 THEN "滴滴企業快車"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

-- 指標二:訂單時效性統計
WITH tmp as (
    SELECT
        type,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY type
)
SELECT
    CASE type
        WHEN 0 THEN "實時"
        WHEN 1 THEN "預約"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

--指標三:訂單交通型別統計
SELECT
    traffic_type,
    COUNT(1) AS total
FROM db_hudi.tbl_hudi_didi
GROUP BY traffic_type;

-- 指標五:訂單價格統計,先將價格劃分區間,再統計,此處使用WHEN函式和SUM函式
SELECT
    SUM(
        CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END
    ) AS 0_15,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END
    ) AS 16_30,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END
    ) AS 31_50,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END
    ) AS 51_100,
    SUM(
        CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END
    ) AS 100_
FROM db_hudi.tbl_hudi_didi;