常用的shell日誌統計指令碼
阿新 • • 發佈:2019-01-05
egrep "2017:15:" access.log | awk '{print $6}'| sort | uniq -c | sort -rn | head
sort -u 去重 保證唯一性uniq 去除連續性的重複
sort + uniq -c 是黃金搭檔
sort -n 按照整數排序 非常重要!
參考 http://man.linuxde.net/uniq
參考 http://man.linuxde.net/sort
一段時間內域名訪問總流量 or 粗略的掉量分析 精準的掉量分析可參考另一篇用資料庫的方式
egrep "2017:14:" access-9011.log | awk '{print $7, $11}' | awk '{a[$1]+=$2;} END{for(i in a)print i,a[i];}'
egrep "2017:15:" access.log | awk '($6 == "112.64.68.252") {print $6, $11}' | awk '{a[$1] += $2;} END{ for(i in a) print i,a[i];}' | sort -k2nr | head -20
a[]類似一種map的容器 sort -k2 安裝第二列排序
參考 https://www.cnblogs.com/51linux/archive/2012/05/23/2515299.html
檢視某個域名在一定時間內的訪問次數
cat access.log | awk '{$1 >= 1445429880 && $1 <= 1445430000; if($7 ~/\/\/dup.baidustatic.com/) print $0}' | wc -l
~代表匹配正則表示式,例:awk ‘$0 ~ /.*/ {print}’ test.txt
檢視日誌錯誤的狀態碼
tail -f access.log | awk '{if($3 ~/(4|5)../) print $0}'
具體域名的請求時間cat access.log | awk '{if($7 ~/\/\/img.baidu.com/) print $2}'|sort | uniq -c | sort -nr
具體域名的狀態碼數量
cat access.log | awk '$7~/img.baidu.com/ {a[$3]++} END{for(i in a) printf("%s %d\n", i, a[i])}' | sort
抓包過濾分析
egrep -v "(ali|dl|download|cname|taobao|tmall|ssl|https|api|login|denglu|logout|push|upload|https|ntp|timezone|pass|xunlei|pay|\:|update|akadns.net|money|ptlogin|(2[0-4][0-9]|25[0-5]|1[0-9][0-9]|[1-9]?[0-9])(\.(2[0-4][0-9]|25[0-5]|1[0-9][0-9]|[1-9]?[0-9])){3}|register|account|weibo|log|search|weather|reg|conf)" top20.txt | egrep "(img|static|pic|image)"> aaaaaaaaaa
統計域名訪問量
cat access.log | awk '$1>= 1511107202 && $1<1511181423 && $11~"TCP_MISS" {print $7}' | awk -F "/" '{print $3}' | sort | uniq -c | sort -k1nr | head -10
zcat access.log_HBYD-ICS-6.20180201.16h44m33s-20180201.21h26m12s.old.gz|awk '$1>= 1517394600 && $2 <= 1517396400 {print $0}'|awk -F'"' '{split($2,s,"/"); print s[3],$3 }' | awk '{a[$1]+=$2;} END{for(i in a) print i,a[i];}'|sort -k2rn|head -30
date -d "20180131 18:30:00" +%s
統計ip
zcat access.log_HBYD-ICS-14.20180131.16h32m09s-20180131.19h10m05s.old.gz | awk '$1>= 1517389200 && $2 <= 1517392800 {print $0}'|awk '{split($5,s,"."); print s[1]"."s[2]"."s[3]}' | awk '{a[$1]+=1;} END{for(i in a) print i,a[i];}'| sort -k2rn|head -30
awk '{$2=""; print $0}' hb_ip.txt 刪除第二列
cat xx.txt | sed -e '/^$/d' 去除空行
看總行數 第二種更妥當一些
cat ip_file | awk '{line_num++;} END{printf("the sum of line num = %d\n",line_num);}'
cat ip_file | awk 'BEGIN {line_num=0;} {line_num++;} END{printf("the sum of line num = %d\n",line_num);}'
awk條件語句計算總大小
ls -l | awk 'BEGIN{sum_size=0;} {if($5!=4096) sum_size+=$5;} END{printf("sum of = %dM\n",sum_size/1024/1024);}'
awk陣列操作 此例必在一個{}內
awk 'BEGIN{info="it is a test";lens=split(info,tA," ");print length(tA),lens;}'
awk 'BEGIN{str="it is a test"; lens=split(str,tA," "); print tA[3]}'
流量激增
cat access.log | awk '{if ($8 == "GET" && $1 >= 1511107202 && $1<= 1511169624) print $9, $11 }' | awk '{split($1, s, "/")} {a[s[5]]+=$2;} END{for(i in a)print i, a[i];}'
流量激增統計域名
zcat access.old.gz | awk '$1 >= 1515327480 && $1 <= 1515327540 {print}' | awk '{print $7}' | awk -F/ '{print$3}' | sort | uniq -c | sort -k1nr | head
統計哪種資源最多 eg 1.mp4?wd=linux&length=1024
awk '{split($7, arr_uri, "?"); num = split(arr_uri[1],suffix,"."); print suffix[num];}' icr_access.log | sort | uniq -c | sort -nr | head -20
icr分析方法:
某段ip在一段時間內的攔截次數
cd /var/log/icrskice/
zcat icr_access.log.gz | grep '\[20171129.1437' | grep 203.187.160.131 -c
cat icr_access.log.gz | grep '\[20171129.1450' | grep 203.187.160.131 | grep iqiyi -c
某段ip的訪問總流量
egrep "10\.17\." access-9011.log | awk '{print $7, $11}' | awk '{a[$1]+=$2;} END{for(i in a)print i,a[i], sum_size+=a[i];}'
zcat access-9011.log.gz | awk '$1>=1511830800&&$1<=1511859600{print}' | awk '$6~10.17{print}' | awk '{sum+=$11} END {print"Sum = ",sum}'