python-kafka实现produce与consumer

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python-kafka实现produce与consumer相关的知识,希望对你有一定的参考价值。

1.python-kafka:

api送上:https://kafka-python.readthedocs.io/en/latest/apidoc/KafkaConsumer.html

2.实现一个broker、topic可配置的生产者与消费者:

 

#coding=utf-8

import time
import logging
import sys
import json
import etc.config as conf
sys.path.append(‘***********/kafka-python-1.3.3)
from kafka import KafkaProducer
from kafka import KafkaConsumer
from kafka.errors import KafkaError
from kafka import TopicPartition


def log_name():
    base_name = conf.kafka_logDir
    date = time.strftime(%Y%m%d,time.localtime(time.time())) + .log
    return base_name + date

logging.basicConfig(level=logging.DEBUG,
        format=%(asctime)-15s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s,
        datefmt=%Y-%m-%d %H:%M:%S,
        filename=log_name(),
        filemode=a
        )
console = logging.StreamHandler()
console.setLevel(logging.INFO)
logging.getLogger(‘‘).addHandler(console)


class kfkProducer(object):

    # producer = None

    def __init__(self, broker, kafkaPort, kafkaTopic=‘‘):
        self._broker = broker
        self._kafkaPort = kafkaPort
        self._kafkaTopic = kafkaTopic

    def __str__(self):
        logging.info("--------------------------------")
        logging.info("kafka-producer params ...")
        logging.info("[KAFKA-BROKER]:%s" %self._broker)
        logging.info("[KAFKA-PORT]:%s" %self._kafkaPort)
        logging.info("[KAFKA-TOPIC]:%s" %self._kafkaTopic)
        logging.info("--------------------------------")

    def registerKfkProducer(self):
        try:
            producer = KafkaProducer(bootstrap_servers = {kafka_host}:{kafka_port}.format(
                kafka_host=self._broker,
                kafka_port=self._kafkaPort
                ))
        except KafkaError as e:
            logging.info(e)
        return producer

    def produceMsg(self, topic, msg, partition=0):
        # 自动将输入字符串转化为json格式,产出消息
        if(topic in (‘‘, None)):
            logging.error("topic is None, plz check!")
        else:
            try:
                # parmas_message = json.dumps(msg)#转化为json格式
                producer = self.registerKfkProducer()
                producer.send(topic, value=msg, partition=partition)
                producer.flush()
                # time.sleep(1)
            except KafkaError as e:
                logging.info(e)



class kfkConsumer(object):

    # consumer = None

    def __init__(self, broker, kafkaPort, kafkaTopic=‘‘):
        self._broker = broker
        self._kafkaPort = kafkaPort
        self._kafkaTopic = kafkaTopic

    def __str__(self):
        logging.info("--------------------------------")
        logging.info("kafka-consumer params ...")
        logging.info("[KAFKA-BROKER]:%s" %self._broker)
        logging.info("[KAFKA-PORT]:%s" %self._kafkaPort)
        logging.info("[KAFKA-TOPIC]:%s" %self._kafkaTopic)
        logging.info("--------------------------------")

    def registerConsumer(self):
        try:
            consumer = KafkaConsumer(
                bootstrap_servers=[self._broker+:+self._kafkaPort],
                auto_offset_reset=earliest)
        except KafkaError as e:
            logging.info(e)
        return consumer

    def consumerMsg(self, topic, partition=0):
        if(topic in (‘‘, None)):
            logging.error("topic is None, plz check!")
        else:
            try:
                v_consumer = self.registerConsumer()
                v_consumer.assign([TopicPartition(topic,partition)])
                # self.registerConsumer.subscribe([self._kafkaTopic])
                for message in v_consumer:
                    # message value and key are raw bytes -- decode if necessary!
                    # e.g., for unicode: `message.value.decode(‘utf-8‘)
                    logging.info("%s:%d:%d: msg=%s" % (message.topic, message.partition,
                                                            message.offset, message.value.decode(utf-8)))
            except KafkaError as e:
                logging.info(e)


3.实现命令行输入topic和partition,即可生产消息:

#coding=utf-8

import os 
import sys
import json
import etc.config as conf
from PykafkaMgr import kfkProducer

#从json文件获取消息
def getMsgFromJsonfile(filePath):
    if(not os.path.isfile(filePath)):
        print(u"[%s] 输入的json文件路径有误,请检查..." %filePath)
    else:
        with open(filePath) as json_file:
            return json.load(json_file)

def except4v():
    if(len(sys.argv) <= 1):
        print(u"未输入topic和partition!\n你可以--help查看具体使用方法...")
    elif(sys.argv[1].startswith("--")):
        option = sys.argv[1][2:]
        # print(option)
        if(option in ("version", "Version")):
            print("Version 1.0 \nPython 2.7.3 (default, Nov  6 2015, 14:11:14)                     \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-4)] on linux2")
        elif(option == "help"):
            print(u"produceMsg.py 接收两个参数, 第一个是topic, 第二个是partition \neg:python produceMsg.py test 0 \n向topic名为test第0分区生产消息")

def calcMsg(jsonMsg):
    sumMsg, sumAcct = 0, 0
    msgNum = len(jsonMsg)
    print("------------------------------------------")
    for i in range(msgNum):
        acct_num = len(jsonMsg[i]["MSGBODY"])
        print(u"第[%d]条消息,包含ACCT_ID账户数:[%d]个"%(i+1, acct_num))
        sumMsg = i+1
        sumAcct += acct_num
        acct_num = 0
    print(u"本次生产消息总共[%d]条, 总共账户数:[%d]个"%(sumMsg, sumAcct))
    print("------------------------------------------")

if __name__ == __main__:

    except4v()

    if(len(sys.argv) == 3):
        topic = sys.argv[1]
        partition = int(sys.argv[2])
        produce = kfkProducer(conf.kafka_mgr["broker"], conf.kafka_mgr["port"], topic)
        produce.__str__()
        jsonMsg = getMsgFromJsonfile(conf.kafka_produce)
        for i in range(len(jsonMsg)):
            produce.produceMsg(topic, (%s%jsonMsg[i]).encode(utf-8), partition)
        calcMsg(jsonMsg)

4.设置两个配置文件:

第一个是config.py

#coding=utf-8

#broker配置还有一种方式是:kafka_mgr={"broker":‘ip1:port,ip2:port,...,ipn:port‘},就是改为kafka集群,不过代码要稍微作调整(参数列表改下就行了)。当然配置两种,通过一个开关去控制也可以。自选
kafka_mgr = {
    "broker" : 10.***.***.***,
    "port" : 6667,
}

kafka_logDir = r"/*******/log/****"

#生产者输入json文件
kafka_produce = r"/**********/data/input/produceMsg.json"
生产者输入json文件:
produceMsg.json
json文件附上说明,具体可以按照说明配置

hi, welcome here~

produceMsg.json
=================================
输入json格式数据,作为生产者消息的输入。
1.支持多条json数据输入。格式如下:
    [
    json1,
    json2,
    ...,
    jsonN
    ]
总体结构是:[  ,  ]

2.此json文件不能加注释,因为会破坏json文件格式,导致无法解析
3.输入只要是json格式,不需要关注是不是一行或多行,多换行、空格等都不影响解析

 

消费者也是利用以上两个配置文件去实现即可。此处代码略



以上是关于python-kafka实现produce与consumer的主要内容,如果未能解决你的问题,请参考以下文章

rabbitmq学习笔记

程序实现kafka 生产和消费

幂等性与事务性Producer区别(Kafka)

Kafka Producer 实现源码分析

Kafka源码分析-序列2 -Producer -Metadata的数据结构与读取更新策略

Java中的并发和可扩展数据结构来处理任务?