1. 程式人生 > 實用技巧 >大資料實戰(七十六):電商數倉(六十)數倉之業務數倉搭建(三)DWD層

大資料實戰(七十六):電商數倉(六十)數倉之業務數倉搭建(三)DWD層

1 DWD

對ODS層資料進行判空過濾。對商品分類表進行維度退化(降維)。

1 建立訂單表

hive (gmall)>
drop table if exists dwd_order_info;
create external table dwd_order_info ( 
   `id` string COMMENT '',
   `total_amount` decimal(10,2) COMMENT '', 
   `order_status` string COMMENT ' 1  2  3  4  5', 
  `user_id` string COMMENT 'id' ,
 `payment_way` 
string COMMENT '', `out_trade_no` string COMMENT '', `create_time` string COMMENT '', `operate_time` string COMMENT '' ) PARTITIONED BY ( `dt` string) stored as parquet location '/warehouse/gmall/dwd/dwd_order_info/' ;

2建立訂單詳情

hive (gmall)>
drop table if exists dwd_order_detail;
create external table dwd_order_detail( 
   `id` 
string COMMENT '', `order_id` decimal(10,2) COMMENT '', `user_id` string COMMENT 'id' , `sku_id` string COMMENT 'id', `sku_name` string COMMENT '', `order_price` string COMMENT '', `sku_num` string COMMENT '', `create_time` string COMMENT '' ) PARTITIONED BY (`dt` string) stored as parquet location
'/warehouse/gmall/dwd/dwd_order_detail/' ;

3建立使用者

hive (gmall)>
drop table if exists dwd_user_info;
create external table dwd_user_info( 
   `id` string COMMENT 'id',
   `name`  string COMMENT '', 
  `birthday` string COMMENT '' ,
 `gender` string COMMENT '', 
 `email` string COMMENT '', 
 `user_level` string COMMENT '', 
 `create_time` string COMMENT ''
) 
PARTITIONED BY (`dt` string)
stored as  parquet
location '/warehouse/gmall/dwd/dwd_user_info/'
;

4建立支付流水

hive (gmall)>
drop table if exists `dwd_payment_info`;
create external  table  `dwd_payment_info`(
`id`  bigint COMMENT '',
`out_trade_no`   string COMMENT '',
`order_id`  string COMMENT '',
`user_id`  string COMMENT '',
`alipay_trade_no` string COMMENT '',
`total_amount`  decimal(16,2) COMMENT '',
`subject`  string COMMENT '',
`payment_type`    string COMMENT '',
`payment_time`   string COMMENT ''
 )  
PARTITIONED BY ( `dt` string)
stored as  parquet
location '/warehouse/gmall/dwd/dwd_payment_info/'
;

5建立商品(增加分類

hive (gmall)>
drop table if exists dwd_sku_info;
create external table dwd_sku_info( 
   `id` string COMMENT 'skuId',
   `spu_id` string COMMENT 'spuid', 
  `price` decimal(10,2) COMMENT '' ,
 `sku_name` string COMMENT '', 
 `sku_desc` string COMMENT '', 
 `weight` string COMMENT '', 
 `tm_id` string COMMENT 'id', 
 `category3_id` string COMMENT '1id', 
 `category2_id` string COMMENT '2id', 
 `category1_id` string COMMENT '3id', 
 `category3_name` string COMMENT '3', 
 `category2_name` string COMMENT '2', 
 `category1_name` string COMMENT '1', 
 `create_time` string COMMENT ''
) 
PARTITIONED BY ( `dt` string)
stored as  parquet
location '/warehouse/gmall/dwd/dwd_sku_info/'
;

6 DWD層資料匯入指令碼

1)在/root/bin目錄下建立指令碼dwd_db.sh

[root@hadoop102 bin]$ vim dwd_db.sh

指令碼中填寫如下內容

#!/bin/bash

# 定義變數方便修改
APP=gmall

# 如果是輸入的日期按照取輸入日期;如果沒輸入日期取當前時間的前一天
if [ -n "$1" ] ;then
    do_date=$1
else 
    do_date=`date -d "-1 day" +%F`  
fi 

sql="

set hive.exec.dynamic.partition.mode=nonstrict;

insert overwrite table   "$APP".dwd_order_info partition(dt)
select * from "$APP".ods_order_info 
where dt='$do_date'  and id is not null;
 
insert overwrite table   "$APP".dwd_order_detail partition(dt)
select * from "$APP".ods_order_detail 
where dt='$do_date'   and id is not null;

insert overwrite table   "$APP".dwd_user_info partition(dt)
select * from "$APP".ods_user_info
where dt='$do_date'   and id is not null;
 
insert overwrite table   "$APP".dwd_payment_info partition(dt)
select * from "$APP".ods_payment_info
where dt='$do_date'  and id is not null;

insert overwrite table   "$APP".dwd_sku_info partition(dt)
select  
    sku.id,
    sku.spu_id, 
    sku.price,
    sku.sku_name,  
    sku.sku_desc,  
    sku.weight,  
    sku.tm_id,  
    sku.category3_id,  
    c2.id category2_id ,  
    c1.id category1_id,  
    c3.name category3_name,  
    c2.name category2_name,  
    c1.name category1_name,  
    sku.create_time,
    sku.dt
from
    "$APP".ods_sku_info sku 
join "$APP".ods_base_category3 c3 on sku.category3_id=c3.id 
    join "$APP".ods_base_category2 c2 on c3.category2_id=c2.id 
    join "$APP".ods_base_category1 c1 on c2.category1_id=c1.id 
where sku.dt='$do_date'  and c2.dt='$do_date'  
and  c3.dt='$do_date' and  c1.dt='$do_date' 
and sku.id is not null;
"
beeline -u "jdbc:hive2://hadoop102:10000/" -n hive -e "$sql"

2)增加指令碼執行許可權

[root@hadoop102 bin]$ chmod 777 dwd_db.sh

3)採用指令碼匯入資料

[root@hadoop102 bin]$ dwd_db.sh 2019-02-10