1. 程式人生 > >利用大資料智慧更正瑕疵資料的應用樣例

利用大資料智慧更正瑕疵資料的應用樣例

-- Created on 2018-09-27 by 三界
declare
  -- Local variables here
  VSDATE    DATE;
  VEDATE    DATE;
  VRealLine varchar2(24);
  vFlagJoin BOOLEAN;
begin
  -- Test statements here   liao xiaohui
  VSDATE := TO_DATE('2017-01-01', 'YYYY-MM-DD');
  VEDATE := TO_DATE('2018-01-01', 'YYYY-MM-DD');
  --應用說明:根據已有大量航班資料,計算航班的實際航程路線,並找出航班的計劃航線程式碼。
  --目標:提升系統智慧處理能力,減少基礎資料維護量。


  --表F_FLIGHT欄位註釋:
  --FLIGHT_DATE:航班日期
  --FLIGHTNO:航班號
  --REALCODE:實飛航程程式碼.
  --LINECODE:計劃航線程式碼.
  --DEPSTN:出發機場程式碼
  --ARRSTN:到達機場程式碼
  --OUT_TIME:撤輪檔時間
  --實飛航程程式碼/計劃航線程式碼,初始預設值:出發機場程式碼-到達機場程式碼,For Example: HAK-HKG


  --計算存在多航節航班的實飛航程程式碼.(用於計算存在返航、備降、多航節航班的實飛航程,當前航線網路下此類航班佔比不到4%)
  FOR RL IN (SELECT *
               FROM ((SELECT FLIGHT_ID, FLIGHT_DATE, FLIGHTNO, OUT_TIME, DEPSTN, ARRSTN,
                              LAG(ARRSTN, 1, NULL) OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO ORDER BY OUT_TIME ASC) AS PRIOR_ARR,
                              LEAD(DEPSTN, 1, NULL) OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO ORDER BY OUT_TIME ASC) AS NEXT_DEP,
                              ROW_NUMBER() OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO ORDER BY OUT_TIME ASC) AS ROWNO,
                              COUNT(*) OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO) AS CNT
                         FROM F_FLIGHT F
                        WHERE FLIGHT_DATE BETWEEN VSDATE AND VEDATE
                        ORDER BY FLIGHT_DATE, FLIGHTNO, OUT_TIME)) T
              WHERE CNT > 1 --航節總數大於1的情形.
             ) LOOP
    --計算實飛航程程式碼               
    if RL.ROWNO = 1 then
      VRealLine := RL.DEPSTN;
      --判斷飛行點是否銜接,驗證資料完整性
      vFlagJoin := (RL.Arrstn = RL.Next_Dep);
    elsif RL.ROWNO = RL.CNT then
      VRealLine := VRealLine || '-' || RL.DEPSTN || '-' || RL.ARRSTN;
      vFlagJoin := (RL.DEPSTN = RL.PRIOR_ARR) and vFlagJoin;
    else
      VRealLine := VRealLine || '-' || RL.DEPSTN;
      vFlagJoin := (RL.DEPSTN = RL.PRIOR_ARR) and vFlagJoin;
    end if;
 
    --迴圈到最後一節航班時
    IF (RL.ROWNO = RL.CNT) AND vFlagJoin then
      --維護實飛航程                         
      UPDATE F_FLIGHT
         SET REALCODE = VRealLine
       WHERE FLIGHT_DATE = RL.DATOP
         AND FLIGHTNO = RL.FLTID
         AND REALCODE <> VRealLine;
    END IF;
  END LOOP;
  COMMIT;

  --根據大資料推算航班的計劃航線程式碼(理論依據:大量正常航班的實飛航程程式碼,就是航班的計劃航線程式碼)
  For LN in (SELECT *
               FROM (SELECT T.*, ROW_NUMBER() OVER(PARTITION BY YYYYMM, FLTID ORDER BY CNT DESC) AS ROWNO,
                             COUNT(*) OVER(PARTITION BY YYYYMM, FLTID) AS ROWCNT
                        FROM (SELECT TO_Char(FLIGHT_DATE, 'yyyyMM') AS YYYYMM, SUBSTR(FLIGHTNO, 1, 6) AS FLTID,
                                      REALCODE, COUNT(*) AS CNT, MIN(FLIGHT_DATE) AS STARTDATE,
                                      MAX(FLIGHT_DATE) AS LASTDATE
                                 FROM F_FLIGHT F
                                WHERE FLIGHT_DATE BETWEEN VSDATE AND VEDATE
                                GROUP BY TO_Char(FLIGHT_DATE, 'yyyyMM'), SUBSTR(FLIGHTNO, 1, 6), REALCODE) T) TT
              WHERE ROWCNT > 1
                AND CNT > 4
                AND ROWNO = 1) loop
    UPDATE F_FLIGHT
       SET LINECODE = LN.REALCODE
     WHERE INSTR(FLIGHTNO, LN.FLTID) > 0
       AND TO_Char(FLIGHT_DATE, 'yyyyMM') = LN.YYYYMM
       AND FLIGHT_DATE BETWEEN LN.STARTDATE AND LN.LASTDATE
       AND LINECODE <> LN.REALCODE;
  end loop;
  COMMIT;
end;