經典Hive-SQL面試題及答案
阿新 • • 發佈:2020-12-18
目錄
第一題 求分割槽累加值
我們有如下的使用者訪問資料
userId visitDate visitCount
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
U02 2017/1/23 6
U01 2017/2/22 4
要求使用SQL統計出每個使用者的累積訪問次數,如下表所示:
使用者id 月份 小計 累積 u01 2017-01 11 11 u01 2017-02 12 23 u02 2017-01 12 12 u03 2017-01 8 8 u04 2017-01 3 3
建立表,準備資料,使用mysql8.0
CREATE TABLE `user_visit` ( `userId` varchar(255) NOT NULL, `visitDate` varchar(255) NOT NULL, `visitCount` tinyint DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("u01","2017/1/21",5); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("u02","2017/1/23",6); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("u03","2017/1/22",8); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("u04","2017/1/20",3); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("u01","2017/1/23",6); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("u01","2017/2/21",8); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("U02","2017/1/23",6); INSERT INTO user_visit (userId,visitDate,visitCount)VALUES("U01","2017/2/22",4);
解法:
SELECT t.userId as 使用者id,t.month as 月份,t.subtotal as 小計,
sum(subtotal) over (PARTITION BY t.userId ORDER BY userId,month) as 累積
FROM
(
select userId,DATE_FORMAT(visitDate,'%Y-%m') as month,sum(visitCount) as subtotal
FROM user_visit GROUP BY userId,month
) t;
第二題 UV和每個店鋪訪問量top3資訊
有50W個京東店鋪,每個顧客訪客訪問任何一個店鋪的任何一個商品時都會產生一條訪問日誌, 訪問日誌儲存的表名為Visit,訪客的使用者id為user_id,被訪問的店鋪名稱為shop,資料如下: u1 a u2 b u1 b u1 a u3 c u4 b u1 a u2 c u5 b u4 b u6 c u2 c u1 b u2 a u2 a u3 a u5 a u5 a u5 a 請統計: (1)每個店鋪的UV(訪客數) (2)每個店鋪訪問次數top3的訪客資訊。輸出店鋪名稱、訪客id、訪問次數
建立表,準備資料,使用mysql8.0
CREATE TABLE `Visit` (
`user_id` varchar(255) NOT NULL,
`shop` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
INSERT INTO Visit (user_id,shop)VALUES
("u1","a"),("u2","b"),("u1","b"),("u1","a"),("u3","c"),
("u4","b"),("u1","a"),("u2","c"),("u5","b"),("u4","b"),
("u6","c"),("u2","c"),("u1","b"),("u2","a"),("u2","a"),
("u3","a"),("u5","a"),("u5","a"),("u5","a");
(1)
-- DISTINCT去重
SELECT shop,count(DISTINCT(user_id)) UV FROM Visit GROUP BY shop;
-- GROUP BY去重
SELECT t.shop,count(t.user_id) UV FROM
(
SELECT shop,user_id FROM Visit GROUP BY shop,user_id
) t
GROUP BY t.shop;
結果:
(2)
SELECT t1.shop,t1.user_id,t1.user_shop_count FROM
(
SELECT t.*,row_number()over(PARTITION BY t.shop ORDER BY t.user_shop_count DESC) shop_top FROM
(
SELECT shop,user_id,COUNT(*) user_shop_count FROM Visit GROUP BY shop,user_id
) t
)t1
WHERE
t1.shop_top <=3;
結果:
Hive sql解答
-- 第1題
CREATE TABLE user_visit (
userId string,
visitDate string ,
visitCount INT )
ROW format delimited FIELDS TERMINATED BY "\t";
INSERT INTO TABLE user_visit VALUES
( 'u01', '2017/1/21', 5 ),( 'u02', '2017/1/23', 6 ),
( 'u03', '2017/1/22', 8 ),( 'u04', '2017/1/20', 3 ),
( 'u01', '2017/1/23', 6 ),( 'u01', '2017/2/21', 8 ),
( 'u02', '2017/1/23', 6 ),( 'u01', '2017/2/22', 4 );
select DATE_FORMAT(regexp_replace(visitDate,'/','-'),'YYYY-MM') from user_visit;
select userId,
DATE_FORMAT(regexp_replace(visitDate,'/','-'),'YYYY-MM') as visitMonth,
visitCount
FROM user_visit;
select userId,visitMonth,sum(visitCount) as subtotal
FROM
(
select userId,
DATE_FORMAT(regexp_replace(visitDate,'/','-'),'YYYY-MM') as visitMonth,
visitCount
FROM user_visit
)t1
GROUP BY userId,visitMonth;
-- 最終答案
SELECT t.userId as userid,t.visitMonth,t.subtotal,sum(t.subtotal) over (PARTITION BY t.userId ORDER BY t.userId,t.visitMonth) as totals
FROM
(
select userId,visitMonth,sum(visitCount) as subtotal
FROM
(
select userId,
DATE_FORMAT(regexp_replace(visitDate,'/','-'),'YYYY-MM') as visitMonth,
visitCount
FROM user_visit
)t1
GROUP BY userId,visitMonth
) t;
-- 第2題
CREATE TABLE Visit (
user_id string, shop string )
ROW format delimited FIELDS TERMINATED BY '\t';
INSERT INTO TABLE Visit VALUES
( 'u1', 'a' ),( 'u2', 'b' ),( 'u1', 'b' ),( 'u1', 'a' ),( 'u3', 'c' ),
( 'u4', 'b' ),( 'u1', 'a' ),( 'u2', 'c' ),( 'u5', 'b' ),( 'u4', 'b' ),
( 'u6', 'c' ),( 'u2', 'c' ),( 'u1', 'b' ),( 'u2', 'a' ),( 'u2', 'a' ),
( 'u3', 'a' ),( 'u5', 'a' ),( 'u5', 'a' ),( 'u5', 'a' );
(1)
-- DISTINCT去重
SELECT shop,count(DISTINCT(user_id)) UV FROM Visit GROUP BY shop;
-- GROUP BY去重
SELECT t.shop,count(t.user_id) UV FROM
(
SELECT shop,user_id FROM Visit GROUP BY shop,user_id
) t
GROUP BY t.shop;
(2)
SELECT t1.shop,t1.user_id,t1.user_shop_count FROM
(
SELECT t.*,row_number()over(PARTITION BY t.shop ORDER BY t.user_shop_count DESC) shop_top FROM
(
SELECT shop,user_id,COUNT(*) user_shop_count FROM Visit GROUP BY shop,user_id
) t
)t1
WHERE
t1.shop_top <=3;