1. 程式人生 > WINDOWS開發 >Window7搭建Kafka環境總結

Window7搭建Kafka環境總結

1.安裝zooeleeper

下載連結:http://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.4.14/

安裝步驟如下:

1)解壓zookeeper-3.4.14.tar.gz檔案

2)解壓後進入目錄conf下,將“zoo_sample.cfg”重新命名為“zoo.cfg”

注:zookeeper的預設埠是2181,如需改變可以在對zoo.cfg進行編輯

3)進入bin目錄下,點選zkServer.cmd執行結果如下表示安裝成功

技術分享圖片

2.安裝kafka

下載連結:http://kafka.apache.org/downloads

技術分享圖片

安裝步驟如下:

1)解壓kafka_2.11-1.0.2.tgz檔案

2)解壓後進入目錄如下:

技術分享圖片

3)在此目錄下開啟cmd,輸入如下命令:

.\bin\windows\kafka-server-start.bat .\config\server.properties

出現如下頁面表示啟動成功

技術分享圖片

注:下次啟動之前要刪除kafka-logs這個資料夾,不然會提示ERROR Error while loading log dir E:\tmp\kafka-logs (kafka.log.LogManager)

技術分享圖片

3.使用Python連線測試

生產者和消費者

開啟pycharm,新建目錄kafka,然後在該目錄下新建test_producer.py和test_consumer.py檔案

技術分享圖片

test_producer.py程式碼如下:

# coding=utf-8
# 生產者程式碼
from kafka import KafkaProducer

producer = KafkaProducer(bootstrap_servers=‘localhost:9092‘)
msg = "HelloWorld".encode(‘utf-8‘)
print(msg)
producer.send(‘demo‘,msg,partition=0)
producer.close()

test_consumer.py程式碼如下:

# coding=utf-8
# 消費者程式碼
from kafka import KafkaConsumer

consumer = KafkaConsumer(‘demo‘,bootstrap_servers=[‘localhost:9092‘])
for msg in consumer:
info = "%s:%d:%d: key=%s value=%s" % (msg.topic,msg.partition,msg.offset,msg.key,msg.value)
print(info)

然後先執行test_consumer.py,再執行test_producer.py,觀察終端如下:

技術分享圖片

4.使用Java連線測試

方法1:

生產者程式碼:

package kafka;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Date;
import java.text.SimpleDateFormat;

import java.util.Properties;

public class ProducerTest {
public static void main(String[] args) {
Properties properties = new Properties();
//broker的地址清單,建議至少填寫兩個,避免宕機
properties.put("bootstrap.servers","localhost:9092");
/*
acks:指定必須有多少個分割槽副本接收訊息,生產者才認為訊息寫入成功,使用者檢測資料丟失的可能性
acks=0:生產者在成功寫入訊息之前不會等待任何來自伺服器的響應,無法監控資料是否傳送成功,
但可以以網路能夠支援的最大速度傳送訊息,達到很高的吞吐量
acks=1:只要叢集的首領節點收到訊息,生產者就會收到來自伺服器的成功響應
acks=all:只有所有參與複製的節點全部收到訊息時,生產者才會收到來自伺服器的成功響應,這種模式是最安全的
*/
properties.put("acks","all");
//retries:生產者從伺服器收到的錯誤有可能是臨時性的錯誤的次數
properties.put("retries",0);
//batch.size:該引數指定了一個批次可以使用的記憶體大小,按照位元組數計算(而不是訊息個數)
properties.put("batch.size",16384);
//linger.ms:該引數指定了生產者在傳送批次之前等待更多訊息加入批次的時間,增加延遲,提高吞吐量
properties.put("linger.ms",1);
//buffer.memory該引數用來設定生產者記憶體緩衝區的大小,生產者用它緩衝要傳送到伺服器的訊息
properties.put("buffer.memory",33554432);
//compression.type:資料壓縮格式,有snappy、gzip和lz4,snappy演算法比較均衡,gzip會消耗更高的cpu,但壓縮比更高
//key和value的序列化
properties.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
/*
client.id:該引數可以是任意的字串,伺服器會用它來識別訊息的來源
max.in.flight.requests.per.connection:生產者在收到伺服器晌應之前可以傳送多少個訊息;越大越佔用記憶體,但會提高吞吐量
timeout.ms:指定了broker等待同步副本返回訊息確認的時間
request.timeout.ms:生產者在傳送資料後等待伺服器返回響應的時間
metadata.fetch.timeout.ms:生產者在獲取元資料(比如目標分割槽的首領是誰)時等待伺服器返回響應的時間
max.block.ms:該引數指定了在呼叫send()方法或使用partitionsFor()方法獲取元資料時生產者阻塞時間
max.request.size:該引數用於控制生產者傳送的請求大小
receive.buffer.bytes和send.buffer.bytes:指定了TCP socket接收和傳送資料包的緩衝區大小,預設值為-1
*/
Producer<String,String> producer = null;
//設定日期格式
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
try {
producer = new KafkaProducer<>(properties);
for (int i = 0; i < 10; i++) {
String msg = "test" + i + "\t" + df.format(new Date()) + "\t";
producer.send(new ProducerRecord<String,String>("kafka_test",msg));
Thread.sleep(500);
System.out.println("Sent:" + msg);
}
} catch (Exception e) {
e.printStackTrace();

} finally {
producer.close();
}
}
}

消費者程式碼

package kafka;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Arrays;
import java.util.Properties;

public class ConsumerTest {
public static void main(String[] args) throws InterruptedException {
Properties properties = new Properties();
properties.put("bootstrap.servers","localhost:9092");
properties.put("group.id","group-2");
//session.timeout.ms:消費者在被認為死亡之前可以與伺服器斷開連線的時間,預設是3s
properties.put("session.timeout.ms","30000");
//消費者是否自動提交偏移量,預設值是true,避免出現重複資料和資料丟失,可以把它設為 false
properties.put("enable.auto.commit","false");
properties.put("auto.commit.interval.ms","1000");
//auto.offset.reset:消費者在讀取一個沒有偏移量的分割槽或者偏移量無效的情況下的處理
//earliest:在偏移量無效的情況下,消費者將從起始位置讀取分割槽的記錄
//latest:在偏移量無效的情況下,消費者將從最新位置讀取分割槽的記錄
properties.put("auto.offset.reset","earliest");
properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
// max.partition.fetch.bytes:伺服器從每個分割槽裡返回給消費者的最大位元組數
//fetch.max.wait.ms:消費者等待時間,預設是500。
// fetch.min.bytes:消費者從伺服器獲取記錄的最小位元組數
// client.id:該引數可以是任意的字串,伺服器會用它來識別訊息的來源
// max.poll.records:用於控制單次呼叫call()方住能夠返回的記錄數量
//receive.buffer.bytes和send.buffer.bytes:指定了TCPsocket接收和傳送資料包的緩衝區大小,預設值為-1

KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<>(properties);
kafkaConsumer.subscribe(Arrays.asList("kafka_test"));
while (true) {
ConsumerRecords<String,String> records = kafkaConsumer.poll(100);
for (ConsumerRecord<String,String> record : records) {
System.out.printf("offset = %d,value = %s",record.offset(),record.value());
System.out.println("=====================>");
}
}

}
}

同上,先執行生產者程式碼,再執行消費者程式碼,終端結果如下:

技術分享圖片

技術分享圖片

方法2:

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.junit.Test;

import java.util.Collections;
import java.util.Properties;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

public class KafkaDemo {
//伺服器地址
private static final String SERVERS = "localhost:9092";
//topic
private static final String TOPIC = "test-kafka";
//消費組
private static final String COMSUMER_GROUP = "test-comsumer";

@Test
public void TestProduct() throws Exception {
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,SERVERS);
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,StringSerializer.class);
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);

KafkaProducer<String,String> kafkaProducer = new KafkaProducer<>(properties);
for (int i = 0; i <= 5; i++) {
String msg = "hello kafka" + i;
ProducerRecord<String,String> record = new ProducerRecord<>(TOPIC,msg);

Future<RecordMetadata> future = kafkaProducer.send(record);
RecordMetadata recordMetadata = future.get(1,TimeUnit.SECONDS);
System.out.println(recordMetadata.offset());
}

kafkaProducer.close();
}

@Test
public void TestConsumer() {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,SERVERS);
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
properties.put(ConsumerConfig.GROUP_ID_CONFIG,COMSUMER_GROUP);

KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<>(properties);
kafkaConsumer.subscribe(Collections.singletonList(TOPIC));
while (true) {
ConsumerRecords<String,String> records = kafkaConsumer.poll(1000);
for (ConsumerRecord<String,String> record : records) {
System.out.println(record.value());
}
}
}
}
以上程式碼Maven依賴如下:
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>

<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.11.0.0</version>
</dependency>
<dependency>
<groupId>com.github.eljah</groupId>
<artifactId>xmindjbehaveplugin</artifactId>
<version>0.8</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
</dependencies>

至此Window下Kafka環境搭建完畢