Storm+Hbase廣告實時統計
阿新 • • 發佈:2019-06-24
本文主要講述使用Kafka+Strom+Hbase搭建的一套廣告實時計算系統。其中伺服器顯示使用的是SpringBoot+Vue+ElementUI+EChats.
主要內容:
- 1.需求
- 2.日誌格式
- 3.Hbase表格設計
- 4.編寫Storm程式
- 5.Kafka接收訊息
- 6.Hbase資料查詢
- 7.參考
1.需求
- 1、某個廣告在某個省的當前投放量
- 2、某個廣告在某個市的當前投放量
- 3、某個廣告在某個使用者客戶端上的當前投放量
- 4、某個廣告在累加一段時間內的某個省額歷史投放趨勢
- 5、某個廣告在累加一段時間內的某個市額歷史投放趨勢
- 6、某個廣告在累加一段時間內的某個客戶端歷史投放趨勢
- 7、某個廣告的當前的點選量
- 8、某個廣告在累加一段時間內的點選趨勢
2.日誌格式
2014-01-13\t19:11:55\t{"adid":"31789","uid":"9871","action":"view"}\t63.237.239.3\t北京\t北京 日期:2014-01-13 時間:19:11:55 Json:方便擴充套件 adid:廣告ID uid:使用者ID action:使用者行為click、view IP:63.237.239.3 省:北京 市:北京
3.Hbase建表
表名 | realtime_ad_stat |
---|---|
行鍵 | ADID_Province_20181212 ADID_City_20181212 ADID_UID_20181212 |
列簇 | stat |
列 | view_cnt、click_cnt |
# 建立表 create 'realtime_ad_stat',{NAME => 'stat',VERSIONS => 2147483647} # 查看錶 list # 清空資料 truncate 'realtime_ad_stat' # 刪除表 disable 'realtime_ad_stat' drop 'realtime_ad_stat'
4.編寫Storm程式
4.1.AdTopology
public class AdTopology {
public static void main(String[] args) throws Exception {
TopologyBuilder topologyBuilder = new TopologyBuilder();
KafkaSpoutConfig<String, String> kafkaSpoutConfig =
KafkaSpoutConfig.builder("hadoop1:9092,hadoop2:9092,hadoop3:9092", "AD")
.setProp(ConsumerConfig.GROUP_ID_CONFIG, "STORM_AD_GROUP")
.setFirstPollOffsetStrategy(KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST)
.build();
topologyBuilder.setSpout("KafkaSpout", new KafkaSpout(kafkaSpoutConfig), 2);
topologyBuilder.setBolt("me.jinkun.ad.storm.LogToModelBolt", new LogToModelBolt(), 2).localOrShuffleGrouping("KafkaSpout");
topologyBuilder.setBolt("me.jinkun.ad.storm.ToHbaseBolt", new ToHbaseBolt(), 4).localOrShuffleGrouping("me.jinkun.ad.storm.LogToModelBolt");
StormTopology topology = topologyBuilder.createTopology();
Config config = new Config();
config.setDebug(false);
if (args != null && args.length > 0) {
//執行叢集模式
config.setNumWorkers(4);
StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());
} else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("AdTopology", config, topology);
}
}
}
從Kafka裡讀取Topic為AD的最新的日誌訊息併發送個LogToModelBolt
4.2.LogToModelBolt
public class LogToModelBolt extends BaseBasicBolt {
private static final Logger LOG = LoggerFactory.getLogger(LogToModelBolt.class);
public void execute(Tuple input, BasicOutputCollector collector) {
// 2014-01-13 19:11:55 {"adid":"31789","uid":"9871","action":"view"} 63.237.239.3 北京 北京
String line = input.getStringByField("value");
if (LOG.isInfoEnabled()) {
LOG.info("line:[{}]", line);
}
String[] arr = line.split("\t", -1);
if (arr.length == 6) {
String date = arr[0].trim().replace("-", "");
String time = arr[1].trim();
String json = arr[2].trim();
String ip = arr[3].trim();
String province = arr[4].trim();
String city = arr[5].trim();
if (StringUtils.isNotEmpty(json)) {
Ad ad = new Gson().fromJson(json, Ad.class);
if (null != ad && StringUtils.isNotEmpty(ad.getAdid())) {
// 省
if (StringUtils.isNotEmpty(province)) {
String rowkey = ad.getAdid() + "_" + province + "_" + date;
collector.emit(new Values(ad.getAction(), rowkey, 1L));
}
// 市
if (StringUtils.isNotEmpty(city)) {
String rowkey = ad.getAdid() + "_" + city + "_" + date;
collector.emit(new Values(ad.getAction(), rowkey, 1L));
}
// 客戶端
if (StringUtils.isNotEmpty(province)) {
String rowkey = ad.getAdid() + "_" + ad.getUid() + "_" + date;
collector.emit(new Values(ad.getAction(), rowkey, 1L));
}
}
}
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("action", "rowkey", "cnt"));
}
}
解析Log並轉化為Model,傳送給ToHbaseBolt
4.3.ToHbaseBolt
public class ToHbaseBolt extends BaseBasicBolt {
private static final Logger LOG = LoggerFactory.getLogger(ToHbaseBolt.class);
private Table table;
@Override
public void prepare(Map stormConf, TopologyContext context) {
try {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop1:2181,hadoop2:2181,hadoop3:2181");
Connection conn = ConnectionFactory.createConnection(conf);
table = conn.getTable(TableName.valueOf("realtime_ad_stat"));
} catch (IOException e) {
e.printStackTrace();
}
}
public void execute(Tuple input, BasicOutputCollector collector) {
String action = input.getStringByField("action");
String rowkey = input.getStringByField("rowkey");
Long pv = input.getLongByField("cnt");
try {
if ("view".equals(action)) {
table.incrementColumnValue(Bytes.toBytes(rowkey), Bytes.toBytes("stat"), Bytes.toBytes("view_cnt"), pv);
}
if ("click".equals(action)) {
table.incrementColumnValue(Bytes.toBytes(rowkey), Bytes.toBytes("stat"), Bytes.toBytes("click_cnt"), pv);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
ToHbaseBolt 將處理後的資料寫入到Hbase表裡
5.Kafka
5.1.建立名為AD的Topic
#檢視
kafka-topics.sh --describe \
--zookeeper hadoop1:2181,hadoop2:2181,hadoop3:2181/kafka
#建立AD
kafka-topics.sh --create \
--zookeeper hadoop1:2181,hadoop2:2181,hadoop3:2181/kafka \
--topic AD \
--partitions 3 \
--replication-factor 3
#消費者AD
kafka-console-consumer.sh \
--zookeeper hadoop1:2181,hadoop2:2181,hadoop3:2181/kafka \
--topic AD \
--from-beginning
#刪除
kafka-topics.sh --delete \
--zookeeper hadoop1:2181,hadoop2:2181,hadoop3:2181/kafka \
--topic AD
5.2.模擬傳送訊息
public class ProducerClient {
private static final Logger LOG = LoggerFactory.getLogger(ProducerClient.class);
private static final String[] PROVINCES_CITIES = new String[]{
"山東\t濟南",
"河北\t石家莊",
"吉林\t長春",
"黑龍江\t哈爾濱",
"遼寧\t瀋陽",
"內蒙古\t呼和浩特",
"新疆\t烏魯木齊",
"甘肅\t蘭州",
"寧夏\t銀川",
"山西\t太原",
"陝西\t西安",
"河南\t鄭州",
"安徽\t合肥",
"江蘇\t南京",
"浙江\t杭州",
"福建\t福州",
"廣東\t廣州",
"江西\t南昌",
"海南\t海口",
"廣西\t南寧",
"貴州\t貴陽",
"湖南\t長沙",
"湖北\t武漢",
"四川\t成都",
"雲南\t昆明",
"西藏\t拉薩",
"青海\t西寧",
"天津\t天津",
"上海\t上海",
"重慶\t重慶",
"北京\t北京",
"臺灣\t臺北",
"香港\t香港",
"澳門\t澳門"
};
private static final String[] ACTIONS = new String[]{
"view", "click"
};
private static final String[] ADIDS = new String[]{
"1", "2", "3", "4", "5"
};
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put("bootstrap.servers", "hadoop1:9092,hadoop2:9092,hadoop3:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
org.apache.kafka.clients.producer.KafkaProducer<String, String> kafkaProducer = new org.apache.kafka.clients.producer.KafkaProducer(props);
boolean flag = true;
if (flag) {
for (int i = 0; i < 2000; i++) {
//3、傳送資料
//2014-01-13 19:11:55 {"adid":"31789","uid":"9871"} 63.237.239.3 北京市 北京市
StringBuilder sb = new StringBuilder();
//sb.append(new SimpleDateFormat("yyyy-MM-dd").format(date));
sb.append("2018-08-10");
sb.append("\t");
sb.append("12:00:00");
sb.append("\t");
sb.append("{\"adid\":\"" + ADIDS[new Random().nextInt(ADIDS.length)] + "\",\"uid\":\"" + new Random().nextInt(200) + "\",\"action\":\"" + ACTIONS[new Random().nextInt(ACTIONS.length)] + "\"}");
sb.append("\t");
sb.append(new Random().nextInt(255) + "." + new Random().nextInt(255) + "." + new Random().nextInt(255) + "." + new Random().nextInt(255));
sb.append("\t");
sb.append(PROVINCES_CITIES[new Random().nextInt(PROVINCES_CITIES.length)]);
kafkaProducer.send(new ProducerRecord("AD", sb.toString()));
}
Thread.sleep(1000);
kafkaProducer.flush();
if (LOG.isInfoEnabled()) {
LOG.info("{}", "傳送訊息完成");
}
}
kafkaProducer.close();
}
}
部分日誌截圖
6.Hbase資料查詢
public Map<String, Object> get(Table table, String adid, String date, String province) {
try {
if (StringUtils.isNotEmpty(date)) {
date = date.replace("-", "");
}
Map<String, Object> map = Maps.newHashMapWithExpectedSize(5);
map.put("adid", adid);
map.put("date", date);
map.put("province", province);
// adid_province_date or adid_city_date
String rowKey = adid + "_" + province + "_" + date;
Get get = new Get(Bytes.toBytes(rowKey));
Result result = table.get(get);
//獲取stat:view_cnt
long viewCnt = 0L;
byte[] viewBytes = result.getValue(Bytes.toBytes("stat"), Bytes.toBytes("view_cnt"));
if (viewBytes != null) {
viewCnt = Bytes.toLong(viewBytes);
}
map.put("view", viewCnt);
//獲取stat:click_cnt
long clickCnt = 0L;
byte[] clickBytes = result.getValue(Bytes.toBytes("stat"), Bytes.toBytes("click_cnt"));
if (clickBytes != null) {
clickCnt = Bytes.toLong(clickBytes);
}
map.put("click", clickCnt);
return map;
} catch (IOException e) {
e.printStackTrace();
throw new ServiceException("查詢列表失敗");
}
}
使用Hbase客戶端將realtime_ad_stat表裡的資料封裝成Map物件並轉為Json給前端展示
{
"data":[
{
"date":"20180810",
"view":6,
"adid":"1",
"province":"山東",
"click":4
},
{
"date":"20180810",
"view":4,
"adid":"1",
"province":"河北",
"click":8
},
{
"date":"20180810",
"view":2,
"adid":"1",
"province":"吉林",
"click":4
},
{
"date":"20180810",
"view":4,
"adid":"1",
"province":"黑龍江",
"click":2
},
{
"date":"20180810",
"view":4,
"adid":"1",
"province":"遼寧",
"click":7
},
{
"date":"20180810",
"view":6,
"adid":"1",
"province":"內蒙古",
"click":5
},
{
"date":"20180810",
"view":10,
"adid":"1",
"province":"新疆",
"click":6
},
{
"date":"20180810",
"view":12,
"adid":"1",
"province":"甘肅",
"click":5
},
{
"date":"20180810",
"view":11,
"adid":"1",
"province":"寧夏",
"click":5
},
{
"date":"20180810",
"view":5,
"adid":"1",
"province":"山西",
"click":5
},
{
"date":"20180810",
"view":7,
"adid":"1",
"province":"陝西",
"click":5
},
{
"date":"20180810",
"view":3,
"adid":"1",
"province":"河南",
"click":6
},
{
"date":"20180810",
"view":1,
"adid":"1",
"province":"安徽",
"click":8
},
{
"date":"20180810",
"view":6,
"adid":"1",
"province":"江蘇",
"click":10
},
{
"date":"20180810",
"view":12,
"adid":"1",
"province":"浙江",
"click":5
},
{
"date":"20180810",
"view":4,
"adid":"1",
"province":"福建",
"click":2
},
{
"date":"20180810",
"view":5,
"adid":"1",
"province":"廣東",
"click":13
},
{
"date":"20180810",
"view":8,
"adid":"1",
"province":"江西",
"click":6
},
{
"date":"20180810",
"view":5,
"adid":"1",
"province":"海南",
"click":1
},
{
"date":"20180810",
"view":6,
"adid":"1",
"province":"廣西",
"click":7
},
{
"date":"20180810",
"view":5,
"adid":"1",
"province":"貴州",
"click":11
},
{
"date":"20180810",
"view":8,
"adid":"1",
"province":"湖南",
"click":8
},
{
"date":"20180810",
"view":9,
"adid":"1",
"province":"湖北",
"click":4
},
{
"date":"20180810",
"view":6,
"adid":"1",
"province":"四川",
"click":8
},
{
"date":"20180810",
"view":2,
"adid":"1",
"province":"雲南",
"click":7
},
{
"date":"20180810",
"view":4,
"adid":"1",
"province":"西藏",
"click":4
},
{
"date":"20180810",
"view":4,
"adid":"1",
"province":"青海",
"click":3
},
{
"date":"20180810",
"view":16,
"adid":"1",
"province":"天津",
"click":4
},
{
"date":"20180810",
"view":12,
"adid":"1",
"province":"上海",
"click":12
},
{
"date":"20180810",
"view":10,
"adid":"1",
"province":"重慶",
"click":16
},
{
"date":"20180810",
"view":10,
"adid":"1",
"province":"北京",
"click":14
},
{
"date":"20180810",
"view":5,
"adid":"1",
"province":"臺灣",
"click":4
},
{
"date":"20180810",
"view":18,
"adid":"1",
"province":"香港",
"click":10
},
{
"date":"20180810",
"view":8,
"adid":"1",
"province":"澳門",
"click":12
}
],
"message":"操作成功!",
"resultCode":"00000"
}
7.參考:
EChats
HBase企業應用開發實戰 第8章
Hadoop叢集環境搭建(三臺)
Zookeeper叢集安裝
Strom之WordCount
Hbase之環境搭建
Kafka之叢集安裝