1. 程式人生 > >維度模型資料倉庫(十四) —— 雜項維度

維度模型資料倉庫(十四) —— 雜項維度

(五)進階技術
        9. 雜項維度
        本篇討論雜項維度。簡單地說,雜項維度就是一種包含的資料具有很少可能值的維度。例如銷售訂單,它可能有很多離散資料(yes-no這種型別的值),如
  • verification_ind(如果訂單已經被稽核,值為yes)
  • credit_check_flag(表示此訂單的客戶信用狀態是否已經檢查)
  • new_customer_ind(如果這是新客戶的首個訂單,值為yes)
  • web_order_flag(表示此訂單是否是線上下的訂單)
        這類資料常被用於增強銷售分析,應該用稱為雜項維度的特殊維度型別儲存。

        新增銷售訂單屬性雜項維度 給現有的資料倉庫新增一個銷售訂單雜項維度,需要新增一個名為sales_order_attribute_dim的維度表。圖(五)- 9-1顯示了增加雜項維度表後的資料倉庫模式(這裡只顯示了和銷售訂單屬性相關的表)。
圖(五)- 9-1
        新的維度表包括四個yes-no列:verification_ind、credit_check_flag、new_customer_ind和web_order_flag。每個列可以有兩個可能值中的一個(Y 或 N),因此sales_order_attribute_dim表最多有16(2^4)行。可以預裝載這個維度,並且只需裝載一次。
        注意 如果知道某種組合是不可能出現的,就不需要裝載這種組合。執行清單(五)- 9-1裡的指令碼修改資料庫模式。這個指令碼做了四項工作:建立sales_order_attribute_dim表,向表中預裝載全部16種可能的組合,給銷售訂單事實表新增雜項維度代理鍵,給源資料庫裡的sales_order表增加對應的四個屬性列。
USE dw;

-- 建立雜項維度表
CREATE TABLE sales_order_attribute_dim (
    sales_order_attribute_sk INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
    verification_ind CHAR(1),
    credit_check_flag CHAR(1),
    new_customer_ind CHAR(1),
    web_order_flag CHAR(1),
    version int,
    effective_date DATE,
    expiry_date DATE
);

-- 生成雜項維度資料
INSERT INTO sales_order_attribute_dim VALUES
  (NULL, 'Y', 'N', 'N', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'Y', 'N', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'Y', 'Y', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'Y', 'Y', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'N', 'Y', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'N', 'Y', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'N', 'N', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'Y', 'Y', 'N', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'N', 'N', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'Y', 'N', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'Y', 'Y', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'Y', 'Y', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'N', 'Y', 'N', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'N', 'Y', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'N', 'N', 'Y', 1,'1900-00-00', '2200-01-01')
, (NULL, 'N', 'Y', 'N', 'Y', 1,'1900-00-00', '2200-01-01');

COMMIT;

-- 建立雜項維度外來鍵
ALTER TABLE sales_order_fact ADD sales_order_attribute_sk INT AFTER product_sk;
ALTER TABLE sales_order_fact ADD FOREIGN KEY (sales_order_attribute_sk) 
REFERENCES sales_order_attribute_dim(sales_order_attribute_sk )  ON DELETE CASCADE ON UPDATE CASCADE ;

-- 給源庫的銷售訂單表增加對應的屬性
USE source; 
ALTER TABLE sales_order
  ADD verification_ind CHAR (1) AFTER product_code
, ADD credit_check_flag CHAR (1) AFTER verification_ind
, ADD new_customer_ind CHAR (1) AFTER credit_check_flag
, ADD web_order_flag CHAR (1) AFTER new_customer_ind ;
清單(五)- 9-1
        修改定期裝載指令碼
        由於有了一個新的維度,必須修改定期裝載指令碼。清單(五)- 9-2顯示修改後的指令碼。
USE dw;

-- 設定SCD的截止時間和生效時間
SET @pre_date = SUBDATE(CURRENT_DATE,1) ;

-- 設定CDC的上限時間
UPDATE cdc_time SET current_load = CURRENT_DATE ;

-- 裝載客戶維度
TRUNCATE TABLE customer_stg;
INSERT INTO customer_stg
SELECT 
  customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state
FROM source.customer ;
/* 在所有地址列上 SCD2                           */
/* 置過期                          */
UPDATE customer_dim a,
    customer_stg b 
SET 
    expiry_date = @pre_date
WHERE
    a.customer_number = b.customer_number
        AND (a.customer_street_address <> b.customer_street_address
        OR a.customer_city <> b.customer_city
        OR a.customer_zip_code <> b.customer_zip_code
        OR a.customer_state <> b.customer_state
        OR a.shipping_address <> b.shipping_address
        OR a.shipping_city <> b.shipping_city
        OR a.shipping_zip_code <> b.shipping_zip_code
        OR a.shipping_state <> b.shipping_state
        OR a.shipping_address IS NULL
        OR a.shipping_city IS NULL
        OR a.shipping_zip_code IS NULL
        OR a.shipping_state IS NULL)
        AND expiry_date = '2200-01-01';
/* 加新行                          */
INSERT INTO customer_dim
SELECT
  NULL
, b.customer_number
, b.customer_name
, b.customer_street_address
, b.customer_zip_code
, b.customer_city
, b.customer_state
, b.shipping_address
, b.shipping_zip_code
, b.shipping_city
, b.shipping_state
, a.version + 1
, @pre_date
, '2200-01-01'
FROM
  customer_dim a
, customer_stg b
WHERE
    a.customer_number = b.customer_number
AND ( a.customer_street_address <> b.customer_street_address
     OR a.customer_city <> b.customer_city
     OR a.customer_zip_code <> b.customer_zip_code
     OR a.customer_state <> b.customer_state
     OR a.shipping_address <> b.shipping_address
     OR a.shipping_city <> b.shipping_city
     OR a.shipping_zip_code <> b.shipping_zip_code
     OR a.shipping_state <> b.shipping_state
     OR a.shipping_address IS NULL
     OR a.shipping_city IS NULL
     OR a.shipping_zip_code IS NULL
     OR a.shipping_state IS NULL)
AND EXISTS(
SELECT *
FROM customer_dim x
WHERE
    b.customer_number=x.customer_number
AND a.expiry_date = @pre_date )
AND NOT EXISTS (
SELECT *
FROM customer_dim y
WHERE
    b.customer_number = y.customer_number
AND y.expiry_date = '2200-01-01') ;
/* 在 customer_name 列上 SCD1                                             */
UPDATE customer_dim a, customer_stg b
SET a.customer_name = b.customer_name
WHERE a.customer_number = b.customer_number
      AND a.customer_name <> b.customer_name ;
/* 新增的客戶                                                   */
INSERT INTO customer_dim
SELECT
  NULL
, customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state
, 1
, @pre_date
,'2200-01-01'
FROM customer_stg
WHERE customer_number NOT IN(
SELECT y.customer_number
FROM customer_dim x, customer_stg y
WHERE x.customer_number = y.customer_number) ;

/* 重建PA客戶維度                               */
TRUNCATE pa_customer_dim;
INSERT INTO pa_customer_dim
SELECT
  customer_sk
, customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state
, version
, effective_date
, expiry_date
FROM customer_dim
WHERE customer_state = 'PA' ;

/* 裝載產品維度                                           */
TRUNCATE TABLE product_stg ;
INSERT INTO product_stg
SELECT 
  product_code
, product_name
, product_category
FROM source.product ;
/* 在 product_name 和 product_category 列上 SCD2                                    */
/* 置過期                                 */
UPDATE
  product_dim a
, product_stg b
SET
  expiry_date = @pre_date
WHERE
    a.product_code = b.product_code
AND (   a.product_name <> b.product_name
     OR a.product_category <> b.product_category)
AND expiry_date = '2200-01-01';
/* 加新行                                */
INSERT INTO product_dim
SELECT
  NULL
, b.product_code
, b.product_name
, b.product_category
, a.version + 1
, @pre_date
,'2200-01-01'
FROM
  product_dim a
, product_stg b
WHERE
    a.product_code = b.product_code
AND (   a.product_name <> b.product_name
     OR a.product_category <> b.product_category)
AND EXISTS(
SELECT *
FROM product_dim x
WHERE     b.product_code = x.product_code
      AND a.expiry_date = @pre_date)
AND NOT EXISTS (
SELECT *
FROM product_dim y
WHERE     b.product_code = y.product_code
      AND y.expiry_date = '2200-01-01') ;
/* 新增的產品                                                    */
INSERT INTO product_dim
SELECT
  NULL
, product_code
, product_name
, product_category
, 1
, @pre_date
, '2200-01-01'
FROM product_stg
WHERE product_code NOT IN(
SELECT y.product_code
FROM product_dim x, product_stg y
WHERE x.product_code = y.product_code) ;

-- 裝載事實表,新增前一天的訂單
INSERT INTO sales_order_fact
SELECT
  customer_sk
, product_sk
, g.sales_order_attribute_sk
, e.order_date_sk
, NULL
, NULL
, NULL
, NULL
, a.order_number
, f.request_delivery_date_sk
, order_amount
, quantity
, NULL
, NULL
, NULL
, NULL
FROM
  source.sales_order a
, customer_dim c
, product_dim d
, order_date_dim e
, request_delivery_date_dim f
, sales_order_attribute_dim g
, cdc_time h
WHERE
	a.order_status = 'N'
AND a.customer_number = c.customer_number
AND a.status_date >= c.effective_date
AND a.status_date < c.expiry_date
AND a.product_code = d.product_code
AND a.status_date >= d.effective_date
AND a.status_date < d.expiry_date
AND a.status_date = e.order_date
AND a.request_delivery_date = f.request_delivery_date
AND a.verification_ind = g.verification_ind
AND a.credit_check_flag = g.credit_check_flag
AND a.new_customer_ind = g.new_customer_ind
AND a.web_order_flag = g.web_order_flag
AND a.entry_date >= h.last_load AND a.entry_date < h.current_load ;

/* UPDATING the new sales order to Allocated status           */
UPDATE sales_order_fact a,
    source.sales_order b,
    allocate_date_dim c,
    cdc_time h
SET 
    a.allocate_date_sk = c.allocate_date_sk,
    a.allocate_quantity = b.quantity
WHERE
    order_status = 'A'
        AND b.entry_date >= h.last_load AND b.entry_date < h.current_load
        AND b.order_number = a.order_number
        AND c.allocate_date = b.status_date ;

/* UPDATING the allocated order to Packed status              */
UPDATE sales_order_fact a,
    source.sales_order b,
    packing_date_dim d,
    cdc_time h
SET 
    a.packing_date_sk = d.packing_date_sk,
    a.packing_quantity = b.quantity
WHERE
    order_status = 'P'
        AND b.entry_date >= h.last_load AND b.entry_date < h.current_load
        AND b.order_number = a.order_number
        AND d.packing_date = b.status_date ;

/* UPDATING the packed order to Shipped status                */
UPDATE sales_order_fact a,
    source.sales_order b,
    ship_date_dim e,
    cdc_time h
SET 
    a.ship_date_sk = e.ship_date_sk,
    a.ship_quantity = b.quantity
WHERE
    order_status = 'S'
        AND b.entry_date >= h.last_load AND b.entry_date < h.current_load
        AND b.order_number = a.order_number
        AND e.ship_date = b.status_date ;

/* UPDATING the shipped order to Received status              */
UPDATE sales_order_fact a,
    source.sales_order b,
    receive_date_dim f,
    cdc_time h
SET 
    a.receive_date_sk = f.receive_date_sk,
    a.receive_quantity = b.quantity
WHERE
    order_status = 'R'
        AND b.entry_date >= h.last_load AND b.entry_date < h.current_load
        AND b.order_number = a.order_number
        AND f.receive_date = b.status_date ;

-- 更新時間戳表的last_load欄位
UPDATE cdc_time SET last_load = current_load ;

COMMIT ;
清單(五)- 9-2
        圖(五)- 9-2到圖(五)- 9-5顯示了對Kettle定時裝載的修改。
圖(五)- 9-2
圖(五)- 9-3
圖(五)- 9-4
圖(五)- 9-5
        測試修改後的定期裝載
        現在使用清單(五)- 9-3裡的指令碼新增八個銷售訂單。
USE source;

INSERT INTO customer
(
  customer_number 
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state)
VALUES
  (10, 'Bigger Customers', '7777 Ridge Rd.', '44102',
       'Cleveland', 'OH', '7777 Ridge Rd.', '44102', 'Cleveland',
       'OH')
, (11, 'Smaller Stores', '8888 Jennings Fwy.', '44102',
       'Cleveland', 'OH', '8888 Jennings Fwy.', '44102',
       'Cleveland', 'OH')
, (12, 'Small-Medium Retailers', '9999 Memphis Ave.', '44102',
       'Cleveland', 'OH', '9999 Memphis Ave.', '44102', 'Cleveland',
       'OH') ,
 (13, 'PA Customer', '1111 Louise Dr.', '17050',
       'Mechanicsburg', 'PA', '1111 Louise Dr.', '17050',
       'Mechanicsburg', 'PA')
, (14, 'OH Customer', '6666 Ridge Rd.', '44102',
       'Cleveland', 'OH', '6666 Ridge Rd.', '44102',
       'Cleveland', 'OH') ;

INSERT INTO sales_order VALUES
  (54, 1, 1, 'Y', 'Y', 'N', 'Y', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 7500, 75)
, (55, 2, 2, 'N', 'N', 'N', 'N', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 1000, 10)
, (56, 3, 3, 'Y', 'Y', 'N', 'N', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 7500, 75)
, (57, 4, 4, 'Y', 'N', 'N', 'N', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 1000, 10)
, (58, 11, 1, 'N', 'Y', 'Y', 'Y', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 7500, 75)
, (59, 12, 2, 'N', 'Y', 'Y', 'N', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 1000, 10)
, (60, 13, 3, 'Y', 'Y', 'Y', 'N', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 7500, 75)
, (61, 14, 4, 'Y', 'N', 'Y', 'N', '2015-03-16', 'N', '2015-03-20',
       '2015-03-16', 1000, 10)
;

COMMIT;
清單(五)- 9-3
        現在把系統日期設定為2015年3月17日,然後再執行清單(五)- 9-2裡的指令碼或對應的Kettle作業。
        可以使用清單(五)- 9-4裡的分析性查詢確認裝載正確。該查詢分析出檢查了信用狀態的新使用者有多少銷售訂單。查詢結果如圖(五)- 9-6所示。
USE dw;
SELECT 
    CONCAT(ROUND(checked / (checked + not_checked) * 100),' % ')
FROM
    (SELECT 
        COUNT(*) checked
    FROM
        sales_order_fact a, sales_order_attribute_dim b
    WHERE
        new_customer_ind = 'Y'
            and credit_check_flag = 'Y'
            AND a.sales_order_attribute_sk = b.sales_order_attribute_sk) x,
    (SELECT 
        COUNT(*) not_checked
    FROM
        sales_order_fact a, sales_order_attribute_dim b
    WHERE
        new_customer_ind = 'Y'
            and credit_check_flag = 'N'
            AND a.sales_order_attribute_sk = b.sales_order_attribute_sk) y;
清單(五)- 9-4 圖(五)- 9-6