一次性拉取多條數據,消費后再手動提交ACK,因為要保存到數據庫去, 這過程如果失敗的話, 需要重新消費這些數據
所以 配置的時候,KAFKA不能自動提交 ,
批量消費數據
1.設置ENABLE_AUTO_COMMIT_CONFIG=false,禁止自動提交
2.設置AckMode=MANUAL_IMMEDIATE
3.監聽方法加入Acknowledgment ack 參數
package com.zenlayer.ad.kafuka; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.StringDeserializer; import org.apache.kafka.common.serialization.StringSerializer; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.kafka.annotation.EnableKafka; import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory; import org.springframework.kafka.config.KafkaListenerContainerFactory; import org.springframework.kafka.core.DefaultKafkaConsumerFactory; import org.springframework.kafka.core.DefaultKafkaProducerFactory; import org.springframework.kafka.core.KafkaTemplate; import org.springframework.kafka.core.ProducerFactory; import org.springframework.kafka.listener.AbstractMessageListenerContainer; import java.util.HashMap; import java.util.Map; @Configuration @EnableKafka public class KafkaConfiguration { /** * @author zhff * @version 2019/9/1 下午04:07 */ @Value("${spring.kafka.bootstrap-servers}") private String bootstrapServers; @Value("${spring.kafka.consumer.enable-auto-commit}") private Boolean autoCommit; @Value("${spring.kafka.consumer.auto-commit-interval}") private Integer autoCommitInterval; @Value("${spring.kafka.consumer.group-id}") private String groupId; @Value("${spring.kafka.consumer.max-poll-records}") private Integer maxPollRecords; @Value("${spring.kafka.consumer.auto-offset-reset}") private String autoOffsetReset; @Value("${spring.kafka.producer.retries}") private Integer retries; @Value("${spring.kafka.producer.batch-size}") private Integer batchSize; @Value("${spring.kafka.producer.buffer-memory}") private Integer bufferMemory; /** * 生產者配置信息 */ @Bean public Map<String, Object> producerConfigs() { Map<String, Object> props = new HashMap<String, Object>(); props.put(ProducerConfig.ACKS_CONFIG, "0");//默認為1,all和-1都是消費在服務副本里 也已經接收成功,防止數據丟失 props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); props.put(ProducerConfig.RETRIES_CONFIG, retries); props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize); props.put(ProducerConfig.LINGER_MS_CONFIG, 1); props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, bufferMemory); props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); return props; } /** * 生產者工廠 */ @Bean public ProducerFactory<String, String> producerFactory() { return new DefaultKafkaProducerFactory<>(producerConfigs()); } /** * 生產者模板 */ @Bean public KafkaTemplate<String, String> kafkaTemplate() { return new KafkaTemplate<>(producerFactory()); } /** * 消費者配置信息 */ @Bean public Map<String, Object> consumerConfigs() { Map<String, Object> props = new HashMap<String, Object>(); props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, autoOffsetReset); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords); props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords); props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, autoCommit);// 手動提交 配置 false props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 120000); props.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, 180000); props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); return props; } /** * 消費者批量工程 */ @Bean public KafkaListenerContainerFactory<?> batchFactory() { ConcurrentKafkaListenerContainerFactory<Integer, String> factory = new ConcurrentKafkaListenerContainerFactory<>(); factory.setConsumerFactory(new DefaultKafkaConsumerFactory<>(consumerConfigs())); // 設置為批量消費,每個批次數量在Kafka配置參數中設置ConsumerConfig.MAX_POLL_RECORDS_CONFIG factory.setBatchListener(true); factory.setConcurrency(4); factory.getContainerProperties().setAckMode(AbstractMessageListenerContainer.AckMode.MANUAL_IMMEDIATE); factory.getContainerProperties().setPollTimeout(30000); return factory; } }
配置文件 也可以把手動提交配置 寫成這樣
ack-mode: MANUAL_IMMEDIATE
spring: kafka: bootstrap-servers: 192.168.1.125:9092 192.168.1.126:9092 192.168.1.127:9092 producer: # 重試次數 retries: 3 # 批量發送的消息數量 batch-size: 16384 # 32MB的批處理緩沖區 buffer-memory: 33554432 key-serializer: org.apache.kafka.common.serialization.StringSerializer value-serializer: org.apache.kafka.common.serialization.StringSerializer consumer: # 默認消費者組 group-id: 0 # 最早未被消費的offset auto-offset-reset: earliest # 批量一次最大拉取數據量 max-poll-records: 3000 # 自動提交時間間隔, 這種直接拉到數據就提交 容易丟數據 auto-commit-interval: 2000 # 禁止自動提交 enable-auto-commit: false # 批量拉取間隔,要大於批量拉取數據的處理時間,時間間隔太小會有重復消費 max.poll.interval.ms: 5000 topicName: topic2: topic_collect1 topic5: topic_collect111
消費的方法如下, 方法比較簡單
@KafkaListener(id = "0", topics = "topic_collect", containerFactory = "batchFactory") public void listen100(List<ConsumerRecord<String, String>> records, Acknowledgment ack) { System.out.println(records.size() + "條數被消費"); try { batchConsumer(records); ack.acknowledge(); } catch (Exception ex) { logger.error("消費數據出錯 ", ex.getStackTrace()); } }