hive 外部表 建立示例
hdfs dfs -mkdir -p /external/sr/sr_assign
hdfs dfs -mkdir -p /external/sr/sr_cancelled
hdfs dfs -mkdir -p /external/sr/sr_handle
hdfs dfs -mkdir -p /external/sr/sr_received
hdfs dfs -put sr_created.txt /external/sr/sr_created/
hdfs dfs -put sr_assign.txt /external/sr/sr_assign/
hdfs dfs -put sr_cancelled.txt /external/sr/sr_cancelled/
hdfs dfs -put sr_handle.txt /external/sr/sr_handle/
hdfs dfs -put sr_received.txt /external/sr/sr_received/
create external table sr_created(
ticket_id string,
phone string,
event_time string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' location '/external/sc/sr_created';
create external table sr_assgin(
ticket_id string,
phone string,
event_time string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' location '/external/sc/sr_assgin';
create external table sr_handle(
ticket_id string,
phone string,
event_time string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' location '/external/sc/sr_handle';
create external table sr_cancelled(
ticket_id string,
phone string,
event_time string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' location '/external/sc/sr_cancelled';
create external table sr_received(
ticket_id string,
phone string,
event_time string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' location '/external/sc/sr_received';
--- 預設開啟map join,但未開啟並行的情況下
未設定
select t1.event_time,t2.event_time,t3.event_time from (
select ticket_id,max(event_time) as event_time from sr_created group by ticket_id)
t1 left outer join (
select ticket_id ,max(event_time) as event_time from sr_assgin group by ticket_id
) t2 on t1.ticket_id = t2.ticket_id
left outer join (
select ticket_id ,max(event_time) as event_time from sr_handle group by ticket_id
) t3 on t1.ticket_id = t3.ticket_id ;
當join 條件一致時,會合併成一個
Time taken: 74.025 seconds 執行時間 74.025s 虛擬機器配置低,生產環境會所不同
---開啟並行,預設開啟map join 的情況下
hive.exec.parallel =true
Launching Job 1 out of 5
Launching Job 2 out of 5
Launching Job 3 out of 5
但是 開啟並行後 任務是一起跑的 ,上面的為啥並行執行? 因為有3個 group by 操作
Time taken: 74.492 seconds 執行時間 74.492 s;
-----開啟並行並且關閉 map join的情況下
Launching Job 1 out of 4
Launching Job 2 out of 4
Launching Job 3 out of 4
74.437 seconds