1. 程式人生 > >impala sql 脫敏 資料大約1億1千萬 + 800多萬

impala sql 脫敏 資料大約1億1千萬 + 800多萬

impala sql 脫敏   資料大約1億1千萬 

強大的impala 幾分鐘 就搞定了,一直跟擔心會吧叢集跑崩掉,想多了,impala + kudu 結構

背景:修改生產,使用者表8百萬資料,訂單表1億一千萬資料

impala sql 發現,一個漢字的長度是3(不固定,有的一個漢字長度更長)

直接對全表資料的進行update操作,對已經有脫敏資料,直接跳過

手機號脫敏
UPDATE bdp_ods.user
SET mobile =(CASE WHEN length(mobile)>7 and mobile !='_NUll' and mobile not LIKE '%*%' THEN concat(substring(mobile,1,3),'****',substr(mobile,8))
                  WHEN length(mobile)<=7 and mobile !='_NUll' and mobile not LIKE '%*%' THEN concat(substring(mobile,1,2),'****',substr(mobile,6))
                 ELSE mobile
             END)
--where id = 10014       ;

身份證脫敏
UPDATE bdp_ods.user
SET certificate_no=(CASE WHEN length(certificate_no) >4 and certificate_no !='_NUll' AND certificate_no NOT LIKE '%*%' 
                            THEN concat(substr(certificate_no,1,2),'**************',substr(certificate_no,17))
                         WHEN length(certificate_no) <=4 THEN '****'
                    ELSE certificate_no 
                    END)
--where id = 10553

姓名脫敏
UPDATE bdp_ods.user
SET user_name=(CASE WHEN length(user_name) =1 and user_name !='_NUll' AND user_name NOT LIKE '%*%' then '*'
                    WHEN length(user_name) =2 and user_name !='_NUll' AND user_name NOT LIKE '%*%' then concat(substr(user_name,1,1),'*')
                    WHEN length(user_name) =3 and user_name !='_NUll' AND user_name NOT LIKE '%*%' then '*'
                    WHEN length(user_name) =4 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*')
                    WHEN length(user_name) =5 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*')
                    WHEN length(user_name) =6 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*')
                    WHEN length(user_name) =7 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*')
                    WHEN length(user_name) =8 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*')
                    WHEN length(user_name) =9 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*',substr(user_name,7))
                    WHEN length(user_name) =10 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'*',substr(user_name,7))
                    WHEN length(user_name) =11 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,2),'**',substr(user_name,10))
                    WHEN length(user_name) =12 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'**',substr(user_name,10))
                    WHEN length(user_name) >12 and user_name !='_NUll' AND user_name NOT LIKE '%*%' THEN concat(substr(user_name,1,3),'**',substr(user_name,length(user_name)-3))
                    ELSE user_name 
                    END)
--where id = 10553