1. 程式人生 > >flume採集方案nginx日誌到hdfs上

flume採集方案nginx日誌到hdfs上

#########################################################

##主要作用是監聽目錄中的新增檔案,採集到資料之後,輸出到hdfs ######################################################### a1.sources = r1 a1.sinks = k1 a1.channels = c1 #對於source的配置描述 監聽目錄中的新增檔案 a1.sources.r1.type = exec a1.sources.r1.command = tail -F /home/centos/logs/nginx/access.log #對於sink的配置描述 使用log日誌做資料的消費 a1.sinks.k1.type = hdfs a1.sinks.k1.hdfs.path = hdfs://bdha/input/data-clean/nginx/%Y/%m/%d a1.sinks.k1.hdfs.filePrefix = nginx a1.sinks.k1.hdfs.fileSuffix = .log a1.sinks.k1.hdfs.inUseSuffix = .tmp a1.sinks.k1.hdfs.round = true a1.sinks.k1.hdfs.rollSize = 0 a1.sinks.k1.hdfs.rollInterval = 0 a1.sinks.k1.hdfs.rollCount = 100 a1.sinks.k1.hdfs.serializer = TEXT a1.sinks.k1.hdfs.fileType = DataStream a1.sinks.k1.hdfs.minBlockReplicas = 1 a1.sinks.k1.hdfs.useLocalTimeStamp = true #對於channel的配置描述 使用記憶體緩衝區域做資料的臨時快取 a1.channels.c1.type = memory a1.channels.c1.capacity = 1000 a1.channels.c1.transactionCapacity = 1000 #通過channel c1將source r1和sink k1關聯起來 a1.sources.r1.channels = c1 a1.sinks.k1.channel = c1

清洗完的指令碼定時追加到MySQL #!/bin/sh ############### ############### SQOOP_BIN=/home/centos/sqoop/bin/sqoop START_DATE=date -d "1 day ago" +%Y-%m-%d echo “START_DATE=”STARTDATEENDDATE=date+echo"ENDDATE="{START_DATE} END_DATE=`date +%Y-%m-%d` echo "END_DATE="

date+echo"ENDDATE="{END_DATE} YEAR=date -d "1 day ago" +%Y echo “YEAR=”YEARMONTH=dated"1dayago"+echo"MONTH="{YEAR} MONTH=`date -d "1 day ago" +%m` echo "MONTH="{MONTH} ${SQOOP_BIN} import –connect jdbc:mysql://192.168.2.101:3306/spoop –username root –password root –query “SELECT id, name, date
FROM t_user WHERE date >= ${START_DATE} AND date < ${END_DATE} AND $CONDITIONS” –target-dir hdfs://bdha/input/data-clean/t_user/YEAR/{YEAR}/{MONTH} –append