hive_面試題_行轉列&列轉行
阿新 • • 發佈:2022-03-30
1.列轉行
1.說明
-- 說明 : 將 一列資料 轉換成一行資料 -- 使用函式 : collect_set : 返回分組內元素 的迭代器(對元素去重) collect_list : 返回分組內元素 的迭代器(對元素不去重) concat_ws('指定分隔符',iter) : 返回 將所有元素用指定分隔符拼接的字串 類似 iter.mkString("分隔符")
2.示例
-- 資料準備 -- DDL create table logintab ( `user_id` string comment '使用者id', `login_date` string comment'登入日期') comment '使用者登入記錄表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab select '1001' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-20' as occur_date union all select '1001' as id,'2022-02-10' as occur_date unionall select '1001' as id,'2021-12-20' as occur_date union all select '1002' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1002' as id,'2021-12-14' as occur_date union all select '1001' as id,'2021-12-14' as occur_date union all select '1002' as id,'2021-12-15' as occur_date union all select '1001' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-16' as occur_date union all select '1003' as id,'2021-12-17' as occur_date union all select '1003' as id,'2021-12-18' as occur_date union all select '1003' as id,'2021-12-29' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-03' as occur_date union all select '1001' as id,'2022-01-05' as occur_date union all select '1001' as id,'2022-01-06' as occur_date union all select '1003' as id,'2021-12-19' as occur_date ; -- 操作sql select user_id ,collect_list(login_date) as list1 ,collect_set(login_date) as set1 ,concat_ws('*',collect_set(login_date)) as ws_set from logintab group by user_id; -- 查詢結果 user_id list1 set1 ws_set 1001 ["2021-12-12","2021-12-20","2022-02-10","2021-12-20","2021-12-13","2021-12-13","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-01","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] ["2021-12-12","2021-12-20","2022-02-10","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] 2021-12-12*2021-12-20*2022-02-10*2021-12-13*2021-12-14*2021-12-15*2022-01-01*2022-01-03*2022-01-05*2022-01-06 1002 ["2021-12-12","2021-12-14","2021-12-15"] ["2021-12-12","2021-12-14","2021-12-15"] 2021-12-12*2021-12-14*2021-12-15 1003 ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] 2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19 Time taken: 9.349 seconds, Fetched: 3 row(s)
2.行轉列
1.說明
-- 說明 : 將一行資料 轉換成 一列資料 -- 使用函式 : explode、lateral view
2.示例
-- DDL create table logintab1 ( `user_id` string comment '使用者id', `login_dates` string comment '登入日期') comment '使用者登入記錄表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab1 select '1001' as id,'2021-12-12*2021-12-20*2022-02-10' as occur_date union all select '1002' as id,'2021-12-12*2021-12-14*2021-12-15' as occur_date union all select '1003' as id,'2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19' as occur_date ; -- 執行sql select user_id ,t1.login_date from logintab1 lateral view explode(split(login_dates,'\\*')) t1 as login_date; -- 查詢結果 user_id t1.login_date 1001 2021-12-12 1001 2021-12-20 1001 2022-02-10 1002 2021-12-12 1002 2021-12-14 1002 2021-12-15 1003 2021-12-15 1003 2021-12-16 1003 2021-12-17 1003 2021-12-18 1003 2021-12-29 1003 2021-12-19 Time taken: 0.032 seconds, Fetched: 12 row(s)
3.需求 : 求出uid,name, 琴_成績,棋_成績,書_成績,畫_成績, 如果沒有引數某一門考試,結果成績為0
1.資料準備
-- 資料準備 create table userinfo( uid string comment '學號', name string comment '姓名', city string comment '所在城市') comment '學生基本資訊表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DDL create table scores ( uid string comment '學號', courseid string comment '課程id', score string comment '得分分數') comment '考試分數記錄表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table userinfo select '1' as uid,'劉備' as name,'保定' as city union all select '2' as uid,'關羽' as name,'山西' as city union all select '3' as uid,'趙雲' as name,'常山' as city union all select '4' as uid,'張飛' as name,'涿州' as city ; insert overwrite table scores select '1' as uid,'琴' as courseid,'100' as score union all select '1' as uid,'棋' as courseid,'99' as score union all select '1' as uid,'書' as courseid,'88' as score union all select '1' as uid,'畫' as courseid,'77' as score union all select '2' as uid,'琴' as courseid,'60' as score union all select '2' as uid,'棋' as courseid,'50' as score union all select '3' as uid,'琴' as courseid,'70' as score union all select '4' as uid,'琴' as courseid,'99' as score union all select '4' as uid,'棋' as courseid,'77' as score union all select '4' as uid,'書' as courseid,'100' as score union all select '4' as uid,'畫' as courseid,'80' as score ; -- 資料說明 -- userinfo 記錄了學生的基本資訊 -- scores 記錄了考試的分數資訊 -- 共考試四門功課 琴、棋、書、畫,如果有人沒有參加某一門的考試,則scores裡不會有記錄
2.執行sql
--需求1: 求出uid,name, 琴_成績,棋_成績,書_成績,畫_成績, 如果沒有引數某一門考試,結果成績為0 --輸出結果樣例 --+------+---------+------+------+------+------+--+ --| uid | name | 琴_成績 |棋_成績 | 書_成績 | 畫_成績 | --+------+---------+------+------+------+------+--+ --| 1 | 劉備 | 95 | 60 | 95 | 70 | --| 2 | 關羽 | 70 | 85 | 80 | 80 | -- 執行sql select t1.uid ,name ,max(if(courseid='琴',score,0)) as `琴_成績` ,max(if(courseid='棋',score,0)) as `棋_成績` ,max(if(courseid='書',score,0)) as `書_成績` ,max(if(courseid='畫',score,0)) as `畫_成績` from scores t1 left outer join userinfo t2 on t1.uid = t2.uid group by t1.uid ,name ; --查詢結果 t1.uid name 琴_成績 棋_成績 書_成績 畫_成績 1 劉備 100 99 88 77 2 關羽 60 50 0 0 3 趙雲 70 0 0 0 4 張飛 99 77 100 80