1. 程式人生 > 實用技巧 >大資料實戰(三十六):電商數倉(二十九)之使用者行為資料倉庫(十五)本週迴流使用者數

大資料實戰(三十六):電商數倉(二十九)之使用者行為資料倉庫(十五)本週迴流使用者數

本週迴流=本週活躍-本週新增-上週活躍

1 DWS

使用日活明細表dws_uv_detail_day作為DWS層資料

2 ADS

1)

hive (gmall)>
drop table if exists ads_back_count;
create external table ads_back_count( 
    `dt` string COMMENT '統計日期',
    `wk_dt` string COMMENT '統計日期所在周',
    `wastage_count` bigint COMMENT '迴流裝置數'
) 
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_back_count';

2)

=============================主題:本週迴流使用者數=========================
本週迴流使用者:上週沒有使用應用,上週之前使用了應用,本週使用了應用
本週迴流使用者=本週日活-本週新增使用者-上週日活使用者
-----------------------------需求-----------------------
-----------------------------相關表---------------------
dws_uv_detail_wk: 周活表
dws_new_mid_day: 每日新增使用者表
-----------------------------思路-----------------------

三個結果集做差: a left join b on a.x=b.x where b.x is null
with
臨時表名 as (),
臨時表名 as (),
臨時表名 as ()
select 語句
-----------------------------SQL------------------------
with t1 as
(SELECT
mid_id
FROM dws_uv_detail_wk
where wk_dt=concat(date_sub(next_day('2020-02-14','mo'),7),'-',date_sub(next_day('2020-02-14','mo'),1))),
t2 as
(SELECT
mid_id
from dws_new_mid_day
where create_date BETWEEN date_sub(next_day('2020-02-14','mo'),7) and '2020-02-14'),
t3 as
(SELECT
mid_id
FROM dws_uv_detail_wk
where wk_dt=concat(date_sub(next_day('2020-02-14','mo'),14),'-',date_sub(next_day('2020-02-14','mo'),8)))
insert into table ads_back_count
select
'2020-02-14',
concat(date_sub(next_day('2020-02-14','mo'),7),'-',date_sub(next_day('2020-02-14','mo'),1)),
count(*)
from
t1 left join t2 on t1.mid_id=t2.mid_id
left join t3 on t1.mid_id=t3.mid_id
where t2.mid_id is null and t3.mid_id is null

4)執行指令碼

#!/bin/bash
if [ -n "$1" ]
then
     do_date=$1
else
    do_date=$(date -d yesterday +%F)
fi

echo ===日誌日期為$do_date===


sql="

use gmall;

with t1 as 
(SELECT 
    mid_id
FROM dws_uv_detail_wk
where wk_dt=concat(date_sub(next_day('$do_date','mo'),7),'-',date_sub(next_day('$do_date','mo'),1))),
t2 as
(SELECT
    mid_id
from dws_new_mid_day
where create_date BETWEEN date_sub(next_day('$do_date','mo'),7) and  '$do_date'),
t3 as
(SELECT 
    mid_id
FROM dws_uv_detail_wk
where wk_dt=concat(date_sub(next_day('$do_date','mo'),14),'-',date_sub(next_day('$do_date','mo'),8)))
insert into table ads_back_count
select 
    '$do_date',
    concat(date_sub(next_day('$do_date','mo'),7),'-',date_sub(next_day('$do_date','mo'),1)),
    count(*)
from
t1 left join t2 on t1.mid_id=t2.mid_id 
left join t3 on t1.mid_id=t3.mid_id
where t2.mid_id is null and t3.mid_id is null

"
hive  -e "$sql"
#!/bin/bash
if [ -n "$1" ]
then
     do_date=$1
else
    do_date=$(date -d yesterday +%F)
fi

echo ===日誌日期為$do_date===


sql="

use gmall;

with t1 as 
(SELECT 
    mid_id
FROM dws_uv_detail_wk
where wk_dt=concat(date_sub(next_day('$do_date','mo'),7),'-',date_sub(next_day('$do_date','mo'),1))),
t2 as
(SELECT
    mid_id
from dws_new_mid_day
where create_date BETWEEN date_sub(next_day('$do_date','mo'),7) and  '$do_date'),
t3 as
(SELECT 
    mid_id
FROM dws_uv_detail_wk
where wk_dt=concat(date_sub(next_day('$do_date','mo'),14),'-',date_sub(next_day('$do_date','mo'),8)))
insert into table ads_back_count
select 
    '$do_date',
    concat(date_sub(next_day('$do_date','mo'),7),'-',date_sub(next_day('$do_date','mo'),1)),
    count(*)
from
t1 left join t2 on t1.mid_id=t2.mid_id 
left join t3 on t1.mid_id=t3.mid_id
where t2.mid_id is null and t3.mid_id is null

"
hive  -e "$sql"