1. 程式人生 > 其它 >hive 使用者訪問時長分析問題

hive 使用者訪問時長分析問題

需求描述

資料如下

問題:

  1. 使用者總量,使用者平均年齡,使用者平均觀看時長
  2. 每10歲一個分段,統計每個區間的使用者總量,使用者平均觀看時長
  3. 每個使用者最喜歡的節目
  4. 觀看時長大於5min的使用者總量,只要有一個節目使用者觀看時間小於5min就不能算

資料準備

create table temp_userlook_0305
(
    view_date bigint,
    user_id   string,
    age       int,
    programid string,
    playtime  int
) stored as orc
    tblproperties (
'orc.compress' = 'snappy'); insert into temp_userlook_0305 values (20220221, 'u1', 30, 'a', 4), (20220221, 'u1', 30, 'b', 10), (20220221, 'u1', 30, 'a', 2), (20220221, 'u2', 23, 'c', 1), (20220222, 'u3', 26, 'd', 3), (20220223, 'u2', 23, 'a', 2); select * from temp_userlook_0305;
需求一
、使用者總量,使用者平均年齡,使用者平均觀看時長
select count(user_id) user_cnt, avg(age) age_avg, avg(playtime_sum) playtime_sum_avg
from (select user_id, age, sum(playtime) playtime_sum
      from temp_userlook_0305
      group by user_id, age) t;

需求二、每10歲一個分段,統計每個區間的使用者總量,使用者平均觀看時長

select count(user_id) user_cnt, avg
(playtime_sum) playtime_sum_avg from (select user_id, age, sum(playtime) playtime_sum from temp_userlook_0305 group by user_id, age) t group by int(age / 10);

需求三、每個使用者最喜歡的節目

select  user_id, programid, age
from (select *, dense_rank() over (partition by user_id order by playtime_sum desc) rn
      from (select user_id, programid, age, sum(playtime) playtime_sum
            from temp_userlook_0305
            group by user_id, programid, age) i) o
where rn = 1;

需求四、觀看時長大於5min的使用者總量,只要有一個節目使用者觀看時間小於5min就不能算

select count(distinct t.user_id) user_cnt
from (select user_id
      from temp_userlook_0305) t
         left join
     (select user_id
      from temp_userlook_0305
      where playtime < 5) t1
where t1.user_id is null;

create table temp_userlook_0305 ( view_date bigint, user_id string, age int, programid string, playtime int ) stored as orc tblproperties ('orc.compress' = 'snappy'); insert into temp_userlook_0305 values (20220221, 'u1', 30, 'a', 4), (20220221, 'u1', 30, 'b', 10), (20220221, 'u1', 30, 'a', 2), (20220221, 'u2', 23, 'c', 1), (20220222, 'u3', 26, 'd', 3), (20220223, 'u2', 23, 'a', 2); select * from temp_userlook_0305;