1. 程式人生 > 實用技巧 >hive 編寫連續N天登入的總人數

hive 編寫連續N天登入的總人數

1、資料檔案:

1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-
17 1 3 2019-07-18 1

2、建表

create table if not exists login(
uid string,
dt string,
login_status int
)
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile
;
load data local inpath '/root/hivedata/login.txt' into table login;

3、sql操作

select
count(1)
from
(
select
uid,
dt
from ( select t1.uid uid, date_sub(t1.dt,t1.r) dt from (select uid, dt, row_number() over(distribute by uid sort by dt) r from login where login_status=1) t1 ) t2 group by uid,dt having count(1)>7 ) t3;

SELECT uid,date_sub(dt,rank) as login_group,min(dt) as start_login_time
,max(dt) as end_login_time,count
(1) as continuous_days FROM ( SELECT uid,dt,row_number() OVER(PARTITION BY uid order by dt) as rank FROM login where login_status = 1 ) a group by uid,date_sub(dt,rank);