1. 程式人生 > >HiveSQL找出連續日期及連續的天數

HiveSQL找出連續日期及連續的天數

參考:https://www.cnblogs.com/Joetao/p/3842242.html

參考關鍵程式碼:

select 本期起始日期=min(rq),本期終止日期=max(rq),
       持續天數=max(id1)-min(id1)+1,
       距上一期天數=case a.id1-a.id2 when -1 then 0 else max(datediff(d,rq2,rq)) end 
from 
(
 select id1 = datediff(d,'2010-01-01',rq),id2=(select count(1) from tmptable where rq <= a.rq),
        rq2=(select max(rq) from tmptable where rq < a.rq),* 
 from tmptable a
) a
group by a.id1-a.id2

測試資料:

use xxx;
drop table test_serialdate;
create table if not exists test_serialdate (
  rq  string comment '日期'
) stored as rcfile
;

insert into table test_serialdate 
select '2019-01-01' as rq from dual union all 
select '2019-01-02' as rq from dual union all 
select '2019-01-05' as rq from dual union all 
select '2019-01-06' as rq from dual union all 
select '2019-01-08' as rq from dual union all 
select '2019-01-09' as rq from dual union all 
select '2019-01-10' as rq from dual union all 
select '2019-01-11' as rq from dual union all 
select '2019-01-17' as rq from dual union all 
select '2019-01-18' as rq from dual ;

 

程式碼

select b.gp,b.startdate,b.enddate,b.days,(case when b.gp = 0 then 0 else b.missingdays end)
from 
(
select a.gp,min(a.rq) as startdate,max(a.rq) as enddate,
       (max(a.id1)-min(a.id1)+1) as days,
       max(datediff(a.rq,a.rq2)) as missingdays 
from 
(
  select ta.rq,
         datediff(ta.rq,'2019-01-01') as id1,    --距離初始日期的天數 
         nvl(tb.id2,0) as id2,                   --比本日期小的天數
         tc.rq2,                                 --比本日期小的最大日期 
         nvl((datediff(ta.rq,'2019-01-01')-tb.id2),0) as gp  --比本日期小的缺失天數 
  from test_serialdate ta 
  left join 
  ( --記錄中比本日期小的資料量 
    select t11.rq,count(1) as id2
    from test_serialdate t11 
    inner join test_serialdate t12 
    where t11.rq > t12.rq 
    group by t11.rq 
  ) tb 
  on ta.rq = tb.rq 
  left join 
  ( --記錄中比本日起小的最大日期 
    select t21.rq,max(t22.rq) as rq2
    from test_serialdate t21 
    inner join test_serialdate t22 
    where t21.rq > t22.rq 
    group by t21.rq
  ) tc 
  on ta.rq = tc.rq 
) a 
group by a.gp
) b
;

後記:大牛解決這個問題的核心在於缺失天數,大寫的服。