flume採集方案nginx日誌到hdfs上
#########################################################
##主要作用是監聽目錄中的新增檔案,採集到資料之後,輸出到hdfs ######################################################### a1.sources = r1 a1.sinks = k1 a1.channels = c1 #對於source的配置描述 監聽目錄中的新增檔案 a1.sources.r1.type = exec a1.sources.r1.command = tail -F /home/centos/logs/nginx/access.log #對於sink的配置描述 使用log日誌做資料的消費 a1.sinks.k1.type = hdfs a1.sinks.k1.hdfs.path = hdfs://bdha/input/data-clean/nginx/%Y/%m/%d a1.sinks.k1.hdfs.filePrefix = nginx a1.sinks.k1.hdfs.fileSuffix = .log a1.sinks.k1.hdfs.inUseSuffix = .tmp a1.sinks.k1.hdfs.round = true a1.sinks.k1.hdfs.rollSize = 0 a1.sinks.k1.hdfs.rollInterval = 0 a1.sinks.k1.hdfs.rollCount = 100 a1.sinks.k1.hdfs.serializer = TEXT a1.sinks.k1.hdfs.fileType = DataStream a1.sinks.k1.hdfs.minBlockReplicas = 1 a1.sinks.k1.hdfs.useLocalTimeStamp = true #對於channel的配置描述 使用記憶體緩衝區域做資料的臨時快取 a1.channels.c1.type = memory a1.channels.c1.capacity = 1000 a1.channels.c1.transactionCapacity = 1000 #通過channel c1將source r1和sink k1關聯起來 a1.sources.r1.channels = c1 a1.sinks.k1.channel = c1
清洗完的指令碼定時追加到MySQL
#!/bin/sh
###############
###############
SQOOP_BIN=/home/centos/sqoop/bin/sqoop
START_DATE=date -d "1 day ago" +%Y-%m-%d
echo “START_DATE=”{END_DATE}
YEAR=date -d "1 day ago" +%Y
echo “YEAR=”{MONTH}
${SQOOP_BIN} import
–connect jdbc:mysql://192.168.2.101:3306/spoop
–username root
–password root
–query “SELECT id, name, date
date
>= ${START_DATE} AND date
< ${END_DATE} AND $CONDITIONS”
–target-dir hdfs://bdha/input/data-clean/t_user/{MONTH}
–append