1. 程式人生 > >利用排名函式進行Hive資料由豎到橫計算示例

利用排名函式進行Hive資料由豎到橫計算示例

1、源資料表結構、樣例資料及說明

CREATE TABLE `karaoke_room_actor_snapshot_0` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主鍵',
  `actor_id` bigint(20) NOT NULL COMMENT '演員 ID',
  `room_id` int(11) NOT NULL COMMENT '直播間 ID',
  `live_status_id` int(11) NOT NULL COMMENT '直播狀態 ID',
  `uid` bigint(20) NOT NULL COMMENT '表演者 Uid',
  `music_id` int(11) NOT NULL DEFAULT '-1' COMMENT '伴奏 ID',
  `identifier` varchar(190) NOT NULL DEFAULT '' COMMENT '裝置號',
  `score` bigint(20) NOT NULL DEFAULT '0' COMMENT '積分,輔助排序',
  `state` tinyint(4) NOT NULL DEFAULT '0' COMMENT '狀態:0排麥中,1準備中,2表演中,-1表演結束,-2主動下麥,-3房主強制下麥,-4系統強制下麥',
  `created_time` datetime(6) NOT NULL COMMENT '建立時間',
  PRIMARY KEY (`id`),
  KEY `Index_roomId_lsId_uid` (`room_id`,`live_status_id`,`uid`),
  KEY `idx_actor_created` (`actor_id`,`created_time`),
  KEY `idx_room_live_state` (`room_id`,`live_status_id`,`state`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='麥序演員快照,按 room_id 256分表'
id	actor_id	room_id	live_status_id	uid	music_id	identifier	score	state	created_time
1	728	37194071	292	200134	62348	889134744	1531137151093	0	2018-07-09 19:52:31.000000
2	728	37194071	292	200134	62348	889134744	1531137151093	1	2018-07-09 19:52:33.000000
3	728	37194071	292	200134	62348	889134744	1531137151093	-3	2018-07-09 19:52:52.000000
4	729	37194071	292	200134	26081	889134744	1531137204230	0	2018-07-09 19:53:24.000000
5	729	37194071	292	200134	26081	889134744	1531137204230	1	2018-07-09 19:53:26.000000
6	729	37194071	292	200134	26081	889134744	1531137204230	-3	2018-07-09 19:53:44.000000
7	730	37194071	292	200134	26081	889134744	1531137395308	0	2018-07-09 19:56:35.000000
8	730	37194071	292	200134	26081	889134744	1531137395308	1	2018-07-09 19:56:38.000000
9	730	37194071	292	200134	26081	889134744	1531137395308	2	2018-07-09 19:56:45.000000
10	731	37194071	292	200119	84874	BBA67879-E1FB-4B0E-82E6-3841D8301EB2	1531137444615	0	2018-07-09 19:57:24.000000
11	732	37194071	292	330451222	114810	379085366	1531137497910	0	2018-07-09 19:58:17.000000
12	732	37194071	292	330451222	114810	379085366	1531137497910	-2	2018-07-09 19:58:38.000000
13	730	37194071	292	200134	26081	889134744	1531137395308	-3	2018-07-09 19:59:52.000000

說明:
使用者進入、等待、退出等分別有一條記錄;一個使用者可以多次進行同一個房間;沒有標記其哪幾條(進入、退出)是同一次,只是根據靠近的時間來判斷;現在要求根據豎狀表整成橫行表,計算出使用者的表演時間、等待時間等。
2、問題分析及思路說明
從資料上觀察,使用者可以沒有準備中及表演中的記錄,但一定會有排麥中及退出(小於0)的記錄。可以由此排名確定同一次,然後根據時間範圍鎖定其其他活動記錄。
所以,先進行使用者資料排名,之後進行其他資料的相關計算。

3、具體sql語句
為更好的清晰思路,主要採用多臨時表的方式進行處理。

drop table if exists xxxlv_mic_range;
create table xxxlv_mic_range as
with tab_mic_start as (
select pt_day,room_id,uid,state,created_time,row_number()over(partition by room_id,uid order by created_time asc) mic_start_rn
from oss_all_karaoke_room_actor_snapshot
where pt_day between '2018-07-18' and '2018-07-26'
  and state=0),
tab_mic_over as (
select pt_day,room_id,uid,state,created_time,row_number()over(partition by room_id,uid order by created_time asc) mic_over_rn
from oss_all_karaoke_room_actor_snapshot
where pt_day between '2018-07-18' and '2018-07-26'
  and state<0)
select a1.pt_day,a1.room_id,a1.uid,a1.created_time mic_start_time,a2.created_time mic_over_time,a1.mic_start_rn
from tab_mic_start a1
left join tab_mic_over a2 on a1.pt_day=a2.pt_day and a1.room_id=a2.room_id and a1.uid=a2.uid and a1.mic_start_rn=a2.mic_over_rn;

drop table if exists xxxlv_mic_ready;
create table xxxlv_mic_ready as
select a1.pt_day,a1.room_id,a1.uid,a1.state,a1.created_time ready_time,a2.mic_start_time,a2.mic_over_time
from oss_all_karaoke_room_actor_snapshot a1
inner join xxxlv_mic_range a2 on a1.pt_day=a2.pt_day and a1.room_id=a2.room_id and a1.uid=a2.uid
where a1.pt_day between '2018-07-18' and '2018-07-26'
  and a1.state=1
  and a1.created_time between a2.mic_start_time and a2.mic_over_time;

drop table if exists xxxlv_mic_runing;
create table xxxlv_mic_runing as
select a1.pt_day,a1.room_id,a1.uid,a1.state,a1.created_time runing_time,a2.mic_start_time,a2.mic_over_time
from oss_all_karaoke_room_actor_snapshot a1
inner join xxxlv_mic_range a2 on a1.pt_day=a2.pt_day and a1.room_id=a2.room_id and a1.uid=a2.uid
where a1.pt_day between '2018-07-18' and '2018-07-26'
  and a1.state=2
  and a1.created_time between a2.mic_start_time and a2.mic_over_time;

drop table if exists xxxlv_mic_basic;
create table xxxlv_mic_basic as
select a0.pt_day,a0.room_id,a0.uid,a0.mic_start_time,a1.ready_time,a2.runing_time,a0.mic_over_time
from xxxlv_mic_range a0
left join xxxlv_mic_ready a1 on a0.pt_day=a1.pt_day and a0.room_id=a1.room_id and a0.uid=a1.uid and a0.mic_start_time=a1.mic_start_time and a0.mic_over_time=a1.mic_over_time
left join xxxlv_mic_runing a2 on a0.pt_day=a2.pt_day and a0.room_id=a2.room_id and a0.uid=a2.uid and a0.mic_start_time=a2.mic_start_time and a0.mic_over_time=a2.mic_over_time
;

drop table if exists xxxlv_mic_timelog;
create table xxxlv_mic_timelog as
select a1.pt_day,a1.uid,a3.nickname,a1.room_id,a1.runing_time,a1.mic_over_time,(unix_timestamp(a1.mic_over_time)-unix_timestamp(coalesce(a1.runing_time,a1.mic_over_time)))/60 act_times,(unix_timestamp(coalesce(a1.runing_time,a1.mic_over_time))-unix_timestamp(a1.mic_start_time))/60 wait_times,a1.mic_start_time
from xxxlv_mic_basic a1
inner join xxxlv_user_info a2 on a1.room_id=a2.room_id
left join (select uid,nickname from oss_bi_all_user_profile where pt_day='2018-07-22') a3 on a1.uid=a3.uid;


drop table if exists xxxlv_gift_record_info21;
create table xxxlv_gift_record_info21 as
select a1.pt_day,a1.receive_uid,a1.room_id,a2.mic_start_time,a2.mic_over_time,sum(case when a1.source=1 then a1.gift_count else 0 end) rechargeable_cnt,sum(case when a1.source=2 then a1.gift_count else 0 end) free_cnt,sum(a1.gift_count) all_cnt 
from oss_all_karaoke_gift_record a1
left join (select a1.pt_day,a1.uid,a1.nickname,a1.room_id,a1.runing_time,a1.mic_over_time,a1.mic_start_time
from xxxlv_mic_timelog a1
where a1.runing_time is not null) a2 on a1.pt_day=a2.pt_day and a1.receive_uid=a2.uid and a1.room_id=a2.room_id
where a1.pt_day between '2018-07-18' and '2018-07-26'
  and a1.created_time between a2.runing_time and a2.mic_over_time
group by a1.pt_day,a1.receive_uid,a1.room_id,a2.mic_start_time,a2.mic_over_time
;
-------------------------------------------------------------------------------------------
drop table if exists xxxlv_user_result21;
create table xxxlv_user_result21 as
select a1.pt_day,a1.uid,a1.nickname,a1.room_id,a1.runing_time,a1.mic_over_time,a1.act_times,a1.wait_times,a2.rechargeable_cnt,a2.free_cnt,a2.all_cnt
from xxxlv_mic_timelog a1
left join xxxlv_gift_record_info21 a2 on a1.pt_day=a2.pt_day and a1.uid=a2.receive_uid and a1.room_id=a2.room_id and a1.mic_start_time=a2.mic_start_time and a1.mic_over_time=a2.mic_over_time
;
-------------------------------------------------------------------------------------------
--2.2是2.1的彙總
drop table if exists xxxlv_user_result22;
create table xxxlv_user_result22 as
select a1.pt_day,a1.uid,a1.nickname,a1.room_id,sum(a1.act_times) act_times,sum(a1.wait_times) wait_times,count(*) mic_cnt,count(case when a1.runing_time is not null then a1.uid else null end) act_cnt,
sum(a1.rechargeable_cnt) rechargeable_cnt,sum(a1.free_cnt) free_cnt,sum(a1.all_cnt) all_cnt
from xxxlv_user_result21 a1
group by a1.pt_day,a1.uid,a1.nickname,a1.room_id;