Kafka原始碼分析(6)
六 其他
1、Producer
首先來看org.apache.kafka.clients.producer中的KafkaProducer類實現,org.apache.kafka開頭的包全部是以java實現的,為使用者提供呼叫kafka元件的功能。該類程式碼如下:
public class KafkaProducer<K,V> implements Producer<K,V> { ……………………………… private KafkaProducer(ProducerConfig config, Serializer<K> keySerializer, Serializer<V> valueSerializer) { log.trace("Starting the Kafka producer"); this.producerConfig = config; this.time = new SystemTime(); MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG)) .timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS); String clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG); if(clientId.length() <= 0) clientId = "producer-" + producerAutoId.getAndIncrement(); String jmxPrefix = "kafka.producer"; List<MetricsReporter> reporters = config.getConfiguredInstances(ProducerConfig.METRIC_REPORTER_CLASSES_CONFIG, MetricsReporter.class); reporters.add(new JmxReporter(jmxPrefix)); this.metrics = new Metrics(metricConfig, reporters, time); this.partitioner = new Partitioner(); long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG); this.metadataFetchTimeoutMs = config.getLong(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG); this.metadata = new Metadata(retryBackoffMs, config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG)); this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG); this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG); this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG)); Map<String, String> metricTags = new LinkedHashMap<String, String>(); metricTags.put("client-id", clientId); this.accumulator = new RecordAccumulator(config.getInt(ProducerConfig.BATCH_SIZE_CONFIG), this.totalMemorySize, config.getLong(ProducerConfig.LINGER_MS_CONFIG), retryBackoffMs, config.getBoolean(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG), metrics, time, metricTags); List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(config.getList(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)); this.metadata.update(Cluster.bootstrap(addresses), time.milliseconds()); NetworkClient client = new NetworkClient(new Selector(this.metrics, time , "producer", metricTags), this.metadata, clientId, config.getInt(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION), config.getLong(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG), config.getInt(ProducerConfig.SEND_BUFFER_CONFIG), config.getInt(ProducerConfig.RECEIVE_BUFFER_CONFIG)); this.sender = new Sender(client, this.metadata, this.accumulator, config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG), (short) parseAcks(config.getString(ProducerConfig.ACKS_CONFIG)), config.getInt(ProducerConfig.RETRIES_CONFIG), config.getInt(ProducerConfig.TIMEOUT_CONFIG), this.metrics, new SystemTime(), clientId); String ioThreadName = "kafka-producer-network-thread" + (clientId.length() > 0 ? " | " + clientId : ""); this.ioThread = new KafkaThread(ioThreadName, this.sender, true); this.ioThread.start(); this.errors = this.metrics.sensor("errors"); if (keySerializer == null) { this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, Serializer.class); this.keySerializer.configure(config.originals(), true); } else this.keySerializer = keySerializer; if (valueSerializer == null) { this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, Serializer.class); this.valueSerializer.configure(config.originals(), false); } else this.valueSerializer = valueSerializer; config.logUnused(); log.debug("Kafka producer started"); } ……………………………… @Override public Future<RecordMetadata> send(ProducerRecord<K,V> record, Callback callback) { try { // first make sure the metadata for the topic is available waitOnMetadata(record.topic(), this.metadataFetchTimeoutMs); byte[] serializedKey; try { serializedKey = keySerializer.serialize(record.topic(), record.key()); } catch (ClassCastException cce) { throw new SerializationException("Can't convert key of class " + record.key().getClass().getName() + " to class " + producerConfig.getClass(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).getName() + " specified in key.serializer"); } byte[] serializedValue; try { serializedValue = valueSerializer.serialize(record.topic(), record.value()); } catch (ClassCastException cce) { throw new SerializationException("Can't convert value of class " + record.value().getClass().getName() + " to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() + " specified in value.serializer"); } ProducerRecord<byte[], byte[]> serializedRecord = new ProducerRecord<byte[], byte[]>(record.topic(), record.partition(), serializedKey, serializedValue); int partition = partitioner.partition(serializedRecord, metadata.fetch()); int serializedSize = Records.LOG_OVERHEAD + Record.recordSize(serializedKey, serializedValue); ensureValidRecordSize(serializedSize); TopicPartition tp = new TopicPartition(record.topic(), partition); log.trace("Sending record {} with callback {} to topic {} partition {}", record, callback, record.topic(), partition); RecordAccumulator.RecordAppendResult result = accumulator.append(tp, serializedKey, serializedValue, compressionType, callback); if (result.batchIsFull || result.newBatchCreated) { log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition); this.sender.wakeup(); } return result.future; // Handling exceptions and record the errors; // For API exceptions return them in the future, // for other exceptions throw directly } catch (ApiException e) { log.debug("Exception occurred during message send:", e); if (callback != null) callback.onCompletion(null, e); this.errors.record(); return new FutureFailure(e); } catch (InterruptedException e) { this.errors.record(); throw new KafkaException(e); } catch (KafkaException e) { this.errors.record(); throw e; } } ……………………………… }
該類public的建構函式最後都呼叫了上述private的建構函式,主要完成讀取配置檔案、建立網路連線、建立傳送執行緒等工作。其次比較重要的就是send方法了,它每次將一個record傳送到指定的topic和partition下。另外注意到這個類中的傳送方式只支援非同步的,同步的傳送方法在另一個類MockProducer中,但同步方式在0.8版本的kafka中還是不完善的,不建議使用。
kafka主體專案中的Producer類有兩個,一個在kafka.javaapi.producer下,一個在kafka.producer下,前者是後者的封裝。在這個類裡底層還是支援同步和非同步傳送的,只不過走的是不同的通道:
/** * Sends the data, partitioned by key to the topic using either the * synchronous or the asynchronous producer * @param messages the producer data object that encapsulates the topic, key and message data */ def send(messages: KeyedMessage[K,V]*) { lock synchronized { if (hasShutdown.get) throw new ProducerClosedException recordStats(messages) sync match { case true => eventHandler.handle(messages) case false => asyncSend(messages) } } }
2、Consumer
首先來看org.apache.kafka.clients.consumer中的KafkaProducer類實現,在這個類檔案的同步用300多行說明了該類的使用方式,且subscribe、unsubscribe、commit均有指定offset和partition兩種版本,但是……目前這個版本的poll函式永遠返回null,因此是個還不能用的版本。官方說的是:
We are in the process of rewritting the JVMclients for Kafka. As of 0.8.2 Kafka includes a newly rewritten Java producer.The next release will include an equivalent Java consumer
反正在kafka-clients-0.8.2.2.jar中還不能用就是了,大家不要踩這個坑了。該版本下能用的consumer還是需要實現kafka主工程kafka.javaapi.consumer包下的ConsumerConnector介面,通過該方式可以呼叫高階consumer API(即offset只能遞增消費):
public interface ConsumerConnector {
/**
* Create a list of MessageStreams of type T for each topic.
*
* @param topicCountMap a map of (topic, #streams) pair
* @param keyDecoder a decoder that decodes the message key
* @param valueDecoder a decoder that decodes the message itself
* @return a map of (topic, list of KafkaStream) pairs.
* The number of items in the list is #streams. Each stream supports
* an iterator over message/metadata pairs.
*/
public <K,V> Map<String, List<KafkaStream<K,V>>>
createMessageStreams(Map<String, Integer> topicCountMap, Decoder<K> keyDecoder, Decoder<V> valueDecoder);
public Map<String, List<KafkaStream<byte[], byte[]>>> createMessageStreams(Map<String, Integer> topicCountMap);
/**
* Create a list of MessageAndTopicStreams containing messages of type T.
*
* @param topicFilter a TopicFilter that specifies which topics to
* subscribe to (encapsulates a whitelist or a blacklist).
* @param numStreams the number of message streams to return.
* @param keyDecoder a decoder that decodes the message key
* @param valueDecoder a decoder that decodes the message itself
* @return a list of KafkaStream. Each stream supports an
* iterator over its MessageAndMetadata elements.
*/
public <K,V> List<KafkaStream<K,V>>
createMessageStreamsByFilter(TopicFilter topicFilter, int numStreams, Decoder<K> keyDecoder, Decoder<V> valueDecoder);
public List<KafkaStream<byte[], byte[]>> createMessageStreamsByFilter(TopicFilter topicFilter, int numStreams);
public List<KafkaStream<byte[], byte[]>> createMessageStreamsByFilter(TopicFilter topicFilter);
/**
* Commit the offsets of all broker partitions connected by this connector.
*/
public void commitOffsets();
public void commitOffsets(boolean retryOnFailure);
/**
* Shut down the connector
*/
public void shutdown();
}
另外該包下還有個SimpleConsumer類(是kafka.consumer.SimpleConsumer類的封裝),使用該類可以呼叫低層的consumer API,即可以重複消費某個offset:
/**
* A consumer of kafka messages
*/
@threadsafe
class SimpleConsumer(val host: String,
val port: Int,
val soTimeout: Int,
val bufferSize: Int,
val clientId: String) {
private val underlying = new kafka.consumer.SimpleConsumer(host, port, soTimeout, bufferSize, clientId)
/**
* Fetch a set of messages from a topic. This version of the fetch method
* takes the Scala version of a fetch request (i.e.,
* [[kafka.api.FetchRequest]] and is intended for use with the
* [[kafka.api.FetchRequestBuilder]].
*
* @param request specifies the topic name, topic partition, starting byte offset, maximum bytes to be fetched.
* @return a set of fetched messages
*/
def fetch(request: kafka.api.FetchRequest): FetchResponse = {
import kafka.javaapi.Implicits._
underlying.fetch(request)
}
/**
* Fetch a set of messages from a topic.
*
* @param request specifies the topic name, topic partition, starting byte offset, maximum bytes to be fetched.
* @return a set of fetched messages
*/
def fetch(request: kafka.javaapi.FetchRequest): FetchResponse = {
fetch(request.underlying)
}
/**
* Fetch metadata for a sequence of topics.
*
* @param request specifies the versionId, clientId, sequence of topics.
* @return metadata for each topic in the request.
*/
def send(request: kafka.javaapi.TopicMetadataRequest): kafka.javaapi.TopicMetadataResponse = {
import kafka.javaapi.Implicits._
underlying.send(request.underlying)
}
/**
* Get a list of valid offsets (up to maxSize) before the given time.
*
* @param request a [[kafka.javaapi.OffsetRequest]] object.
* @return a [[kafka.javaapi.OffsetResponse]] object.
*/
def getOffsetsBefore(request: OffsetRequest): kafka.javaapi.OffsetResponse = {
import kafka.javaapi.Implicits._
underlying.getOffsetsBefore(request.underlying)
}
/**
* Commit offsets for a topic to Zookeeper
* @param request a [[kafka.javaapi.OffsetCommitRequest]] object.
* @return a [[kafka.javaapi.OffsetCommitResponse]] object.
*/
def commitOffsets(request: kafka.javaapi.OffsetCommitRequest): kafka.javaapi.OffsetCommitResponse = {
import kafka.javaapi.Implicits._
underlying.commitOffsets(request.underlying)
}
/**
* Fetch offsets for a topic from Zookeeper
* @param request a [[kafka.javaapi.OffsetFetchRequest]] object.
* @return a [[kafka.javaapi.OffsetFetchResponse]] object.
*/
def fetchOffsets(request: kafka.javaapi.OffsetFetchRequest): kafka.javaapi.OffsetFetchResponse = {
import kafka.javaapi.Implicits._
underlying.fetchOffsets(request.underlying)
}
def close() {
underlying.close
}
}