1. 程式人生 > 其它 >hive_面試題_行轉列&列轉行

hive_面試題_行轉列&列轉行

1.列轉行

1.說明

-- 說明 : 將 一列資料 轉換成一行資料
-- 使用函式 : collect_set : 返回分組內元素 的迭代器(對元素去重)
            collect_list : 返回分組內元素 的迭代器(對元素不去重)
            concat_ws('指定分隔符',iter) : 返回 將所有元素用指定分隔符拼接的字串 類似 iter.mkString("分隔符")

2.示例

-- 資料準備
-- DDL
create table logintab (
`user_id` string comment '使用者id',
`login_date` string comment 
'登入日期') comment '使用者登入記錄表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab select '1001' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-20' as occur_date union all select '1001' as id,'2022-02-10' as occur_date union
all select '1001' as id,'2021-12-20' as occur_date union all select '1002' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1002' as id,'2021-12-14
' as occur_date union all select '1001' as id,'2021-12-14' as occur_date union all select '1002' as id,'2021-12-15' as occur_date union all select '1001' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-16' as occur_date union all select '1003' as id,'2021-12-17' as occur_date union all select '1003' as id,'2021-12-18' as occur_date union all select '1003' as id,'2021-12-29' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-03' as occur_date union all select '1001' as id,'2022-01-05' as occur_date union all select '1001' as id,'2022-01-06' as occur_date union all select '1003' as id,'2021-12-19' as occur_date ; -- 操作sql select user_id ,collect_list(login_date) as list1 ,collect_set(login_date) as set1 ,concat_ws('*',collect_set(login_date)) as ws_set from logintab group by user_id; -- 查詢結果 user_id list1 set1 ws_set 1001 ["2021-12-12","2021-12-20","2022-02-10","2021-12-20","2021-12-13","2021-12-13","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-01","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] ["2021-12-12","2021-12-20","2022-02-10","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] 2021-12-12*2021-12-20*2022-02-10*2021-12-13*2021-12-14*2021-12-15*2022-01-01*2022-01-03*2022-01-05*2022-01-06 1002 ["2021-12-12","2021-12-14","2021-12-15"] ["2021-12-12","2021-12-14","2021-12-15"] 2021-12-12*2021-12-14*2021-12-15 1003 ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] 2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19 Time taken: 9.349 seconds, Fetched: 3 row(s)

2.行轉列

1.說明

-- 說明 : 將一行資料 轉換成 一列資料
-- 使用函式 : explode、lateral view

2.示例

-- DDL
create table logintab1 (
`user_id` string comment '使用者id',
`login_dates` string comment '登入日期')
 comment '使用者登入記錄表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table logintab1
select '1001' as id,'2021-12-12*2021-12-20*2022-02-10' as occur_date union all
select '1002' as id,'2021-12-12*2021-12-14*2021-12-15' as occur_date union all
select '1003' as id,'2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19' as occur_date
;

-- 執行sql
select
user_id
,t1.login_date
from logintab1
lateral view explode(split(login_dates,'\\*')) t1 as login_date;

-- 查詢結果
user_id t1.login_date
1001    2021-12-12
1001    2021-12-20
1001    2022-02-10
1002    2021-12-12
1002    2021-12-14
1002    2021-12-15
1003    2021-12-15
1003    2021-12-16
1003    2021-12-17
1003    2021-12-18
1003    2021-12-29
1003    2021-12-19
Time taken: 0.032 seconds, Fetched: 12 row(s)

3.需求 : 求出uid,name, 琴_成績,棋_成績,書_成績,畫_成績, 如果沒有引數某一門考試,結果成績為0

1.資料準備

-- 資料準備
create table userinfo(
uid string comment '學號',
name string comment '姓名',
city string comment '所在城市')
comment '學生基本資訊表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DDL
create table scores (
   uid  string comment '學號',
   courseid string comment '課程id',
   score string comment '得分分數')
comment '考試分數記錄表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table userinfo
select '1' as uid,'劉備' as name,'保定' as city union all
select '2' as uid,'關羽' as name,'山西' as city union all
select '3' as uid,'趙雲' as name,'常山' as city union all
select '4' as uid,'張飛' as name,'涿州' as city ;

insert overwrite table scores
select '1' as uid,'' as courseid,'100' as score union all
select '1' as uid,'' as courseid,'99' as score union all
select '1' as uid,'' as courseid,'88' as score union all
select '1' as uid,'' as courseid,'77' as score union all
select '2' as uid,'' as courseid,'60' as score union all
select '2' as uid,'' as courseid,'50' as score union all
select '3' as uid,'' as courseid,'70' as score union all
select '4' as uid,'' as courseid,'99' as score union all
select '4' as uid,'' as courseid,'77' as score union all
select '4' as uid,'' as courseid,'100' as score union all
select '4' as uid,'' as courseid,'80' as score
;

-- 資料說明
-- userinfo 記錄了學生的基本資訊
-- scores 記錄了考試的分數資訊
--        共考試四門功課 琴、棋、書、畫,如果有人沒有參加某一門的考試,則scores裡不會有記錄

2.執行sql 

--需求1: 求出uid,name, 琴_成績,棋_成績,書_成績,畫_成績, 如果沒有引數某一門考試,結果成績為0
--輸出結果樣例
--+------+---------+------+------+------+------+--+
--| uid  |  name   | 琴_成績  |棋_成績  | 書_成績  | 畫_成績  |
--+------+---------+------+------+------+------+--+
--| 1    | 劉備     | 95   | 60   | 95   | 70   |
--| 2    | 關羽     | 70   | 85   | 80   | 80   |

-- 執行sql
select
t1.uid
,name
,max(if(courseid='',score,0)) as `琴_成績`
,max(if(courseid='',score,0)) as `棋_成績`
,max(if(courseid='',score,0)) as `書_成績`
,max(if(courseid='',score,0)) as `畫_成績`

from scores t1
left outer join userinfo t2
on t1.uid = t2.uid
group by t1.uid
,name
;

--查詢結果
t1.uid  name    琴_成績 棋_成績 書_成績 畫_成績
1       劉備    100     99      88      77
2       關羽    60      50      0       0
3       趙雲    70      0       0       0
4       張飛    99      77      100     80