利用大資料智慧更正瑕疵資料的應用樣例
-- Created on 2018-09-27 by 三界
declare
-- Local variables here
VSDATE DATE;
VEDATE DATE;
VRealLine varchar2(24);
vFlagJoin BOOLEAN;
begin
-- Test statements here liao xiaohui
VSDATE := TO_DATE('2017-01-01', 'YYYY-MM-DD');
VEDATE := TO_DATE('2018-01-01', 'YYYY-MM-DD');
--應用說明:根據已有大量航班資料,計算航班的實際航程路線,並找出航班的計劃航線程式碼。
--目標:提升系統智慧處理能力,減少基礎資料維護量。
--表F_FLIGHT欄位註釋:
--FLIGHT_DATE:航班日期
--FLIGHTNO:航班號
--REALCODE:實飛航程程式碼.
--LINECODE:計劃航線程式碼.
--DEPSTN:出發機場程式碼
--ARRSTN:到達機場程式碼
--OUT_TIME:撤輪檔時間
--實飛航程程式碼/計劃航線程式碼,初始預設值:出發機場程式碼-到達機場程式碼,For Example: HAK-HKG
--計算存在多航節航班的實飛航程程式碼.(用於計算存在返航、備降、多航節航班的實飛航程,當前航線網路下此類航班佔比不到4%)
FOR RL IN (SELECT *
FROM ((SELECT FLIGHT_ID, FLIGHT_DATE, FLIGHTNO, OUT_TIME, DEPSTN, ARRSTN,
LAG(ARRSTN, 1, NULL) OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO ORDER BY OUT_TIME ASC) AS PRIOR_ARR,
LEAD(DEPSTN, 1, NULL) OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO ORDER BY OUT_TIME ASC) AS NEXT_DEP,
ROW_NUMBER() OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO ORDER BY OUT_TIME ASC) AS ROWNO,
COUNT(*) OVER(PARTITION BY FLIGHT_DATE, FLIGHTNO) AS CNT
FROM F_FLIGHT F
WHERE FLIGHT_DATE BETWEEN VSDATE AND VEDATE
ORDER BY FLIGHT_DATE, FLIGHTNO, OUT_TIME)) T
WHERE CNT > 1 --航節總數大於1的情形.
) LOOP
--計算實飛航程程式碼
if RL.ROWNO = 1 then
VRealLine := RL.DEPSTN;
--判斷飛行點是否銜接,驗證資料完整性
vFlagJoin := (RL.Arrstn = RL.Next_Dep);
elsif RL.ROWNO = RL.CNT then
VRealLine := VRealLine || '-' || RL.DEPSTN || '-' || RL.ARRSTN;
vFlagJoin := (RL.DEPSTN = RL.PRIOR_ARR) and vFlagJoin;
else
VRealLine := VRealLine || '-' || RL.DEPSTN;
vFlagJoin := (RL.DEPSTN = RL.PRIOR_ARR) and vFlagJoin;
end if;
--迴圈到最後一節航班時
IF (RL.ROWNO = RL.CNT) AND vFlagJoin then
--維護實飛航程
UPDATE F_FLIGHT
SET REALCODE = VRealLine
WHERE FLIGHT_DATE = RL.DATOP
AND FLIGHTNO = RL.FLTID
AND REALCODE <> VRealLine;
END IF;
END LOOP;
COMMIT;
--根據大資料推算航班的計劃航線程式碼(理論依據:大量正常航班的實飛航程程式碼,就是航班的計劃航線程式碼)
For LN in (SELECT *
FROM (SELECT T.*, ROW_NUMBER() OVER(PARTITION BY YYYYMM, FLTID ORDER BY CNT DESC) AS ROWNO,
COUNT(*) OVER(PARTITION BY YYYYMM, FLTID) AS ROWCNT
FROM (SELECT TO_Char(FLIGHT_DATE, 'yyyyMM') AS YYYYMM, SUBSTR(FLIGHTNO, 1, 6) AS FLTID,
REALCODE, COUNT(*) AS CNT, MIN(FLIGHT_DATE) AS STARTDATE,
MAX(FLIGHT_DATE) AS LASTDATE
FROM F_FLIGHT F
WHERE FLIGHT_DATE BETWEEN VSDATE AND VEDATE
GROUP BY TO_Char(FLIGHT_DATE, 'yyyyMM'), SUBSTR(FLIGHTNO, 1, 6), REALCODE) T) TT
WHERE ROWCNT > 1
AND CNT > 4
AND ROWNO = 1) loop
UPDATE F_FLIGHT
SET LINECODE = LN.REALCODE
WHERE INSTR(FLIGHTNO, LN.FLTID) > 0
AND TO_Char(FLIGHT_DATE, 'yyyyMM') = LN.YYYYMM
AND FLIGHT_DATE BETWEEN LN.STARTDATE AND LN.LASTDATE
AND LINECODE <> LN.REALCODE;
end loop;
COMMIT;
end;