Linux備份-刪除指定日期內檔案
阿新 • • 發佈:2018-12-03
#!/usr/bin/env bash
source /etc/profile
echo " *************** start filter *************** " # get befor six month last day #m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d) #echo ${m0} #m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)
#echo ${m1}
#m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)
#echo ${m2}
#m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m3}
#m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m4}
#m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m5}
#m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m6}
# 取得當前月的最後一天,訪問陣列長度:${#m[*]} + ${#m[@]} m [ 0 ]= $(date -d " $(date -d ' month ' +%Y%m01) -1 day " +%Y%m%d) echo m0 : ${m[ 0 ]} ' month :
'
${#m[
@
]}
for
n
in
$(seq
0
11
)
;
do
m
[
$n
+
1
]=
$(date -d
"
$(date -d ${m[$n]} +%Y%m01)
-1 day
"
+%Y%m%d)
echo
m
$[$n+
1
]
:
${m[$n
+
1
]}
'
month :
'
${#m[
*
]}
;
done
echo " ****** time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "
max_date = 0 # get the latest file and copy to hdfs cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter for dir in $( ls -l ./ | awk ' /^d/{print $NF} ' ) do if [[ -d $dir && $dir == *\_* ]] ; then f_d = $( echo $dir | cut -d \_ -f 3 | cut -d \. -f 1 ) if [[ $max_date < $f_d ]] ; then max_date = $f_d max_filter = $dir fi fi done echo " max date is : " $max_date echo " max filter is : " $max_filter pwd # 複製最近日期的filter檔案到hdfs hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/ $max_filter if [[ $? == 0 ]] ; then echo " filter is already exist : " $max_filter else echo " start hdfs copy " echo " ****** start time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** " hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters echo " ****** end time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** " fi
remove_week = $(date -d " $max_date 7 days ago " +%Y%m%d) echo " 刪除本地序列化檔案的日期界限: " $remove_week remove_date = $(date -d " $max_date 30 days ago " +%Y%m%d) echo " 刪除檔案 和 Hadoop filter 的日期界限: " $remove_date
echo " *************** start remove filter *************** " for r_dir in $( ls -l ./ | awk ' /^d/{print $NF} ' ) do if [[ -d $r_dir && $r_dir == *\_* ]] ; then r_d = $( echo $r_dir | cut -d \_ -f 3 | cut -d \. -f 1 ) if [[ $r_d < $remove_date ]] ; then if [[ ${m[ * ]} == * $r_d * ]] ; then cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir pwd for f_dir in $( ls *) do if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then echo " ------ keep mau_filter is: " $f_dir ; else echo " remove file is: " $f_dir ; rm -r $f_dir fi done cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter pwd else echo " remove filter_dir is: " $r_dir rm -r $r_dir fi elif [[ $r_d < $remove_week ]] ; then if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]] ; then cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir pwd for f_dir in $( ls *) do if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then echo " ------ week keep mau_filter is: " $f_dir ; else if [[ " $f_dir " == *.FILTER.SER ]] ; then echo " - last day of month - week remove file is: " $f_dir ; rm -r $f_dir fi fi done cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter pwd else echo " week remove filter is: " $r_dir rm -r $r_dir /*.FILTER.SER fi fi fi done
echo " =============== start remove hdfs filter =============== " # 刪除hdfs上指定日期外的tdid for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk ' {print $8} ' ) do if [[ $h_filter == *\_* ]] ; then h_date = $( echo $h_filter | cut -d \/ -f 6 | cut -d \_ -f 3 | cut -d \. -f 1 ) # echo " hdfs date : "$h_date # echo " hdfs filter : "$h_filter if [[ ${m[ * ]} == * $h_date * ]] ; then echo " remain hdfs filter is : " $h_filter elif [[ $h_date < $remove_date ]] ; then echo " not remain date is : " $h_date echo " remove hdfs filter is : " $h_filter hadoop fs -rmr $h_filter fi fi done
echo " -------------- start tdid --------------- " # 刪除小於30天的tdid cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo for tdid in $( ls *) do if [[ $tdid == *\_* ]] ; then t_d = $( echo $tdid | cut -d \_ -f 2 | cut -d \. -f 1 ) if [[ $t_d == $max_date || $t_d > $max_date ]] ; then echo " need copy date : " $t_d echo " need copy tdid : " $tdid # 檢查tdid是否存在 # hadoop fs -test -e jiaojiao/tdid/$tdid # if [[ $? == 0 ]]; then # echo " tdid is already exist,remove it first " # hadoop fs -rm jiaojiao/tdid/$tdid # hadoop fs -put $tdid jiaojiao/tdid # else # echo " start copy " # hadoop fs -put $tdid jiaojiao/tdid # fi elif [[ $t_d < $remove_date ]] ; then echo " remove tdid : " $tdid rm $tdid fi fi done
#echo " =============== start remove hdfs tdid =============== " #for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}') #do # if [[ $h_tdid == *\_* ]]; then # h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d \. -f 1) # echo $h_date # echo $h_tdid # fi #done
source /etc/profile
echo " *************** start filter *************** " # get befor six month last day #m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d) #echo ${m0} #m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)
# 取得當前月的最後一天,訪問陣列長度:${#m[*]} + ${#m[@]} m [ 0 ]= $(date -d " $(date -d ' month ' +%Y%m01) -1 day " +%Y%m%d) echo m0 : ${m[ 0 ]} ' month :
echo " ****** time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "
max_date = 0 # get the latest file and copy to hdfs cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter for dir in $( ls -l ./ | awk ' /^d/{print $NF} ' ) do if [[ -d $dir && $dir == *\_* ]] ; then f_d = $( echo $dir | cut -d \_ -f 3 | cut -d \. -f 1 ) if [[ $max_date < $f_d ]] ; then max_date = $f_d max_filter = $dir fi fi done echo " max date is : " $max_date echo " max filter is : " $max_filter pwd # 複製最近日期的filter檔案到hdfs hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/ $max_filter if [[ $? == 0 ]] ; then echo " filter is already exist : " $max_filter else echo " start hdfs copy " echo " ****** start time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** " hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters echo " ****** end time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** " fi
remove_week = $(date -d " $max_date 7 days ago " +%Y%m%d) echo " 刪除本地序列化檔案的日期界限: " $remove_week remove_date = $(date -d " $max_date 30 days ago " +%Y%m%d) echo " 刪除檔案 和 Hadoop filter 的日期界限: " $remove_date
echo " *************** start remove filter *************** " for r_dir in $( ls -l ./ | awk ' /^d/{print $NF} ' ) do if [[ -d $r_dir && $r_dir == *\_* ]] ; then r_d = $( echo $r_dir | cut -d \_ -f 3 | cut -d \. -f 1 ) if [[ $r_d < $remove_date ]] ; then if [[ ${m[ * ]} == * $r_d * ]] ; then cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir pwd for f_dir in $( ls *) do if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then echo " ------ keep mau_filter is: " $f_dir ; else echo " remove file is: " $f_dir ; rm -r $f_dir fi done cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter pwd else echo " remove filter_dir is: " $r_dir rm -r $r_dir fi elif [[ $r_d < $remove_week ]] ; then if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]] ; then cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir pwd for f_dir in $( ls *) do if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then echo " ------ week keep mau_filter is: " $f_dir ; else if [[ " $f_dir " == *.FILTER.SER ]] ; then echo " - last day of month - week remove file is: " $f_dir ; rm -r $f_dir fi fi done cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter pwd else echo " week remove filter is: " $r_dir rm -r $r_dir /*.FILTER.SER fi fi fi done
echo " =============== start remove hdfs filter =============== " # 刪除hdfs上指定日期外的tdid for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk ' {print $8} ' ) do if [[ $h_filter == *\_* ]] ; then h_date = $( echo $h_filter | cut -d \/ -f 6 | cut -d \_ -f 3 | cut -d \. -f 1 ) # echo " hdfs date : "$h_date # echo " hdfs filter : "$h_filter if [[ ${m[ * ]} == * $h_date * ]] ; then echo " remain hdfs filter is : " $h_filter elif [[ $h_date < $remove_date ]] ; then echo " not remain date is : " $h_date echo " remove hdfs filter is : " $h_filter hadoop fs -rmr $h_filter fi fi done
echo " -------------- start tdid --------------- " # 刪除小於30天的tdid cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo for tdid in $( ls *) do if [[ $tdid == *\_* ]] ; then t_d = $( echo $tdid | cut -d \_ -f 2 | cut -d \. -f 1 ) if [[ $t_d == $max_date || $t_d > $max_date ]] ; then echo " need copy date : " $t_d echo " need copy tdid : " $tdid # 檢查tdid是否存在 # hadoop fs -test -e jiaojiao/tdid/$tdid # if [[ $? == 0 ]]; then # echo " tdid is already exist,remove it first " # hadoop fs -rm jiaojiao/tdid/$tdid # hadoop fs -put $tdid jiaojiao/tdid # else # echo " start copy " # hadoop fs -put $tdid jiaojiao/tdid # fi elif [[ $t_d < $remove_date ]] ; then echo " remove tdid : " $tdid rm $tdid fi fi done
#echo " =============== start remove hdfs tdid =============== " #for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}') #do # if [[ $h_tdid == *\_* ]]; then # h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d \. -f 1) # echo $h_date # echo $h_tdid # fi #done