序列化之protobuf与avro对比(Java)

Posted upupgo

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了序列化之protobuf与avro对比(Java)相关的知识,希望对你有一定的参考价值。

  最近在做socket通信中用到了关于序列化工具选型的问题,在调研过程中开始趋向于用protobuf,可以省去了编解码的过程。能够实现快速开发,且只需要维护一份协议文件即可。

  但是调研过程中发现了protobuf的一些弊端,比如需要生成相应的文件类,和业务绑定太紧密,所以在看了AVRO之后发现它完美解决了这个问题。

  下面记录下对这两种序列化工具的入门与测评。

一、protobuf基本操作

protobuf简介:

Protocol Buffers (a.k.a., protobuf) are Google\'s language-neutral, platform-neutral, extensible mechanism for serializing structured data. 

protobuf是google提供的一种跨语言、跨平台、可扩展的序列化工具。

1.1定义协议文件(部分字段)(TCPLog.proto):

syntax = "proto2";
message TCPLog{
         optional int32   total_byteps = 1;
         optional int64 flow_start_time =2;
         optional int64 date =3;
}

1.2生成对应的Java类:

生成过程可以使用ecplise 的插件 或者 直接在控制台中使用命令生成。

命令行中生成规则如下:

protoc.exe -I=proto的输入目录 --java_out=java类输出目录 proto的输入目录包括包括proto文件

  生成java类如下:

// Generated by the protocol buffer compiler.  DO NOT EDIT!
// source: TCPLog.proto

public final class TCPLogOuterClass {
  private TCPLogOuterClass() {}
  public static void registerAllExtensions(
      com.google.protobuf.ExtensionRegistryLite registry) {
  }

  public static void registerAllExtensions(
      com.google.protobuf.ExtensionRegistry registry) {
    registerAllExtensions(
        (com.google.protobuf.ExtensionRegistryLite) registry);
  }
  public interface TCPLogOrBuilder extends
      // @@protoc_insertion_point(interface_extends:TCPLog)
      com.google.protobuf.MessageOrBuilder {

    /**
     * <code>optional int32 total_byteps = 1;</code>
     */
    boolean hasTotalByteps();
    /**
     * <code>optional int32 total_byteps = 1;</code>
     */
    int getTotalByteps();

    /**
     * <code>optional int64 flow_start_time = 2;</code>
     */
    boolean hasFlowStartTime();
    /**
     * <code>optional int64 flow_start_time = 2;</code>
     */
    long getFlowStartTime();

    /**
     * <code>optional int64 date = 3;</code>
     */
    boolean hasDate();
    /**
     * <code>optional int64 date = 3;</code>
     */
    long getDate();

    /**
     * <code>optional int64 server_total_packet = 4;</code>
     */
    boolean hasServerTotalPacket();
    /**
     * <code>optional int64 server_total_packet = 4;</code>
     */
    long getServerTotalPacket();

    /**
     * <code>optional int64 client_total_byte = 5;</code>
     */
    boolean hasClientTotalByte();
    /**
     * <code>optional int64 client_total_byte = 5;</code>
     */
    long getClientTotalByte();

    /**
     * <code>optional int32 link_id = 6;</code>
     */
    boolean hasLinkId();
    /**
     * <code>optional int32 link_id = 6;</code>
     */
    int getLinkId();

    /**
     * <code>optional int64 total_byte = 7;</code>
     */
    boolean hasTotalByte();
    /**
     * <code>optional int64 total_byte = 7;</code>
     */
    long getTotalByte();

    /**
     * <code>optional int64 flow_end_time = 8;</code>
     */
    boolean hasFlowEndTime();
    /**
     * <code>optional int64 flow_end_time = 8;</code>
     */
    long getFlowEndTime();

    /**
     * <code>optional int32 client_port = 9;</code>
     */
    boolean hasClientPort();
    /**
     * <code>optional int32 client_port = 9;</code>
     */
    int getClientPort();

    /**
     * <code>optional int32 protocol = 10;</code>
     */
    boolean hasProtocol();
    /**
     * <code>optional int32 protocol = 10;</code>
     */
    int getProtocol();

    /**
     * <code>optional int64 total_packet = 11;</code>
     */
    boolean hasTotalPacket();
    /**
     * <code>optional int64 total_packet = 11;</code>
     */
    long getTotalPacket();

    /**
     * <code>optional int64 flow_duration = 12;</code>
     */
    boolean hasFlowDuration();
    /**
     * <code>optional int64 flow_duration = 12;</code>
     */
    long getFlowDuration();

    /**
     * <code>optional string id = 13;</code>
     */
    boolean hasId();
    /**
     * <code>optional string id = 13;</code>
     */
    java.lang.String getId();
    /**
     * <code>optional string id = 13;</code>
     */
    com.google.protobuf.ByteString
        getIdBytes();

    /**
     * <code>optional string server_ip_addr = 14;</code>
     */
    boolean hasServerIpAddr();
    /**
     * <code>optional string server_ip_addr = 14;</code>
     */
    java.lang.String getServerIpAddr();
    /**
     * <code>optional string server_ip_addr = 14;</code>
     */
    com.google.protobuf.ByteString
        getServerIpAddrBytes();

    /**
     * <code>optional string direction_mask = 15;</code>
     */
    boolean hasDirectionMask();
    /**
     * <code>optional string direction_mask = 15;</code>
     */
    java.lang.String getDirectionMask();
    /**
     * <code>optional string direction_mask = 15;</code>
     */
    com.google.protobuf.ByteString
        getDirectionMaskBytes();

    /**
     * <code>optional int32 app = 16;</code>
     */
    boolean hasApp();
    /**
     * <code>optional int32 app = 16;</code>
     */
    int getApp();

    /**
     * <code>optional int32 client_country_id = 17;</code>
     */
    boolean hasClientCountryId();
    /**
     * <code>optional int32 client_country_id = 17;</code>
     */
    int getClientCountryId();

    /**
     * <code>optional int32 client_netsegment_id = 18;</code>
     */
    boolean hasClientNetsegmentId();
    /**
     * <code>optional int32 client_netsegment_id = 18;</code>
     */
    int getClientNetsegmentId();

    /**
     * <code>optional int64 client_total_packet = 19;</code>
     */
    boolean hasClientTotalPacket();
    /**
     * <code>optional int64 client_total_packet = 19;</code>
     */
    long getClientTotalPacket();

    /**
     * <code>optional string client_ip_addr = 20;</code>
     */
    boolean hasClientIpAddr();
    /**
     * <code>optional string client_ip_addr = 20;</code>
     */
    java.lang.String getClientIpAddr();
    /**
     * <code>optional string client_ip_addr = 20;</code>
     */
    com.google.protobuf.ByteString
        getClientIpAddrBytes();

    /**
     * <code>optional int32 tcp_status = 21;</code>
     */
    boolean hasTcpStatus();
    /**
     * <code>optional int32 tcp_status = 21;</code>
     */
    int getTcpStatus();

    /**
     * <code>optional int32 server_country_id = 22;</code>
     */
    boolean hasServerCountryId();
    /**
     * <code>optional int32 server_country_id = 22;</code>
     */
    int getServerCountryId();

    /**
     * <code>optional int32 server_netsegment_id = 23;</code>
     */
    boolean hasServerNetsegmentId();
    /**
     * <code>optional int32 server_netsegment_id = 23;</code>
     */
    int getServerNetsegmentId();

    /**
     * <code>optional int64 avg_pkt_size = 24;</code>
     */
    boolean hasAvgPktSize();
    /**
     * <code>optional int64 avg_pkt_size = 24;</code>
     */
    long getAvgPktSize();

    /**
     * <code>optional int32 server_port = 25;</code>
     */
    boolean hasServerPort();
    /**
     * <code>optional int32 server_port = 25;</code>
     */
    int getServerPort();

    /**
     * <code>optional int64 server_total_byte = 26;</code>
     */
    boolean hasServerTotalByte();
    /**
     * <code>optional int64 server_total_byte = 26;</code>
     */
    long getServerTotalByte();

    /**
     * <code>optional int32 total_packetps = 27;</code>
     */
    boolean hasTotalPacketps();
    /**
     * <code>optional int32 total_packetps = 27;</code>
     */
    int getTotalPacketps();
  }
  /**
   * Protobuf type {@code TCPLog}
   */
  public  static final class TCPLog extends
      com.google.protobuf.GeneratedMessageV3 implements
      // @@protoc_insertion_point(message_implements:TCPLog)
      TCPLogOrBuilder {
    // Use TCPLog.newBuilder() to construct.
    private TCPLog(com.google.protobuf.GeneratedMessageV3.Builder<?> builder) {
      super(builder);
    }
    private TCPLog() {
      totalByteps_ = 0;
      flowStartTime_ = 0L;
      date_ = 0L;
      serverTotalPacket_ = 0L;
      clientTotalByte_ = 0L;
      linkId_ = 0;
      totalByte_ = 0L;
      flowEndTime_ = 0L;
      clientPort_ = 0;
      protocol_ = 0;
      totalPacket_ = 0L;
      flowDuration_ = 0L;
      id_ = "";
      serverIpAddr_ = "";
      directionMask_ = "";
      app_ = 0;
      clientCountryId_ = 0;
      clientNetsegmentId_ = 0;
      clientTotalPacket_ = 0L;
      clientIpAddr_ = "";
      tcpStatus_ = 0;
      serverCountryId_ = 0;
      serverNetsegmentId_ = 0;
      avgPktSize_ = 0L;
      serverPort_ = 0;
      serverTotalByte_ = 0L;
      totalPacketps_ = 0;
    }

    @java.lang.Override
    public final com.google.protobuf.UnknownFieldSet
    getUnknownFields() {
      return this.unknownFields;
    }
    private TCPLog(
        com.google.protobuf.CodedInputStream input,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws com.google.protobuf.InvalidProtocolBufferException {
      this();
      int mutable_bitField0_ = 0;
      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
          com.google.protobuf.UnknownFieldSet.newBuilder();
      try {
        boolean done = false;
        while (!done) {
          int tag = input.readTag();
          switch (tag) {
            case 0:
              done = true;
              break;
            default: {
              if (!parseUnknownField(input, unknownFields,
                                     extensionRegistry, tag)) {
                done = true;
              }
              break;
            }
            case 8: {
              bitField0_ |= 0x00000001;
              totalByteps_ = input.readInt32();
              break;
            }
            case 16: {
              bitField0_ |= 0x00000002;
              flowStartTime_ = input.readInt64();
              break;
            }
            case 24: {
              bitField0_ |= 0x00000004;
              date_ = input.readInt64();
              break;
            }
            case 32: {
              bitField0_ |= 0x00000008;
              serverTotalPacket_ = input.readInt64();
              break;
            }
            case 40: {
              bitField0_ |= 0x00000010;
              clientTotalByte_ = input.readInt64();
              break;
            }
            case 48: {
              bitField0_ |= 0x00000020;
              linkId_ = input.readInt32();
              break;
            }
            case 56: {
              bitField0_ |= 0x00000040;
              totalByte_ = input.readInt64();
              break;
            }
            case 64: {
              bitField0_ |= 0x00000080;
              flowEndTime_ = input.readInt64();
              break;
            }
            case 72: {
              bitField0_ |= 0x00000100;
              clientPort_ = input.readInt32();
              break;
            }
            case 80: {
              bitField0_ |= 0x00000200;
              protocol_ = input.readInt32();
              break;
            }
            case 88: {
              bitField0_ |= 0x00000400;
              totalPacket_ = input.readInt64();
              break;
            }
            case 96: {
              bitField0_ |= 0x00000800;
              flowDuration_ = input.readInt64();
              break;
            }
            case 106: {
              com.google.protobuf.ByteString bs = input.readBytes();
              bitField0_ |= 0x00001000;
              id_ = bs;
              break;
            }
            case 114: {
              com.google.protobuf.ByteString bs = input.readBytes();
              bitField0_ |= 0x00002000;
              serverIpAddr_ = bs;
              break;
            }
            case 122: {
              com.google.protobuf.ByteString bs = input.readBytes();
              bitField0_ |= 0x00004000;
              directionMask_ = bs;
              break;
            }
            case 128: {
              bitField0_ |= 0x00008000;
              app_ = input.readInt32();
              break;
            }
            case 136: {
              bitField0_ |= 0x00010000;
              clientCountryId_ = input.readInt32();
              break;
            }
            case 144: {
              bitField0_ |= 0x00020000;
              clientNetsegmentId_ = input.readInt32();
              break;
            }
            case 152: {
              bitField0_ |= 0x00040000;
              clientTotalPacket_ = input.readInt64();
              break;
            }
            case 162: {
              com.google.protobuf.ByteString bs = input.readBytes();
              bitField0_ |= 0x00080000;
              clientIpAddr_ = bs;
              break;
            }
            case 168: {
              bitField0_ |= 0x00100000;
              tcpStatus_ = input.readInt32();
              break;
            }
            case 176: {
              bitField0_ |= 0x00200000;
              serverCountryId_ = input.readInt32();
              break;
            }
            case 184: {
              bitField0_ |= 0x00400000;
              serverNetsegmentId_ = input.readInt32();
              break;
            }
            case 192: {
              bitField0_ |= 0x00800000;
              avgPktSize_ = input.readInt64();
              break;
            }
            case 200: {
              bitField0_ |= 0x01000000;
              serverPort_ = input.readInt32();
              break;
            }
            case 208: {
              bitField0_ |= 0x02000000;
              serverTotalByte_ = input.readInt64();
              break;
            }
            case 216: {
              bitField0_ |= 0x04000000;
              totalPacketps_ = input.readInt32();
              break;
            }
          }
        }
      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
        throw e.setUnfinishedMessage(this);
      } catch (java.io.IOException e) {
        throw new com.google.protobuf.InvalidProtocolBufferException(
            e).setUnfinishedMessage(this);
      } finally {
        this.unknownFields = unknownFields.build();
        makeExtensionsImmutable();
      }
    }
    public static final com.google.protobuf.Descriptors.Descriptor
        getDescriptor() {
      return TCPLogOuterClass.internal_static_TCPLog_descriptor;
    }

    protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
        internalGetFieldAccessorTable() {
      return TCPLogOuterClass.internal_static_TCPLog_fieldAccessorTable
          .ensureFieldAccessorsInitialized(
              TCPLogOuterClass.TCPLog.class, TCPLogOuterClass.TCPLog.Builder.class);
    }

    private int bitField0_;
    public static final int TOTAL_BYTEPS_FIELD_NUMBER = 1;
    private int totalByteps_;
    /**
     * <code>optional int32 total_byteps = 1;</code>
     */
    public boolean hasTotalByteps() {
      return ((bitField0_ & 0x00000001) == 0x00000001);
    }
    /**
     * <code>optional int32 total_byteps = 1;</code>
     */
    public int getTotalByteps() {
      return totalByteps_;
    }

    public static final int FLOW_START_TIME_FIELD_NUMBER = 2;
    private long flowStartTime_;
    /**
     * <code>optional int64 flow_start_time = 2;</code>
     */
    public boolean hasFlowStartTime() {
      return ((bitField0_ & 0x00000002) == 0x00000002);
    }
    /**
     * <code>optional int64 flow_start_time = 2;</code>
     */
    public long getFlowStartTime() {
      return flowStartTime_;
    }

    public static final int DATE_FIELD_NUMBER = 3;
    private long date_;
    /**
     * <code>optional int64 date = 3;</code>
     */
    public boolean hasDate() {
      return ((bitField0_ & 0x00000004) == 0x00000004);
    }
    /**
     * <code>optional int64 date = 3;</code>
     */
    public long getDate() {
      return date_;
    }

    public static final int SERVER_TOTAL_PACKET_FIELD_NUMBER = 4;
    private long serverTotalPacket_;
    /**
     * <code>optional int64 server_total_packet = 4;</code>
     */
    public boolean hasServerTotalPacket() {
      return ((bitField0_ & 0x00000008) == 0x00000008);
    }
    /**
     * <code>optional int64 server_total_packet = 4;</code>
     */
    public long getServerTotalPacket() {
      return serverTotalPacket_;
    }

    public static final int CLIENT_TOTAL_BYTE_FIELD_NUMBER = 5;
    private long clientTotalByte_;
    /**
     * <code>optional int64 client_total_byte = 5;</code>
     */
    public boolean hasClientTotalByte() {
      return ((bitField0_ & 0x00000010) == 0x00000010);
    }
    /**
     * <code>optional int64 client_total_byte = 5;</code>
     */
    public long getClientTotalByte() {
      return clientTotalByte_;
    }

    public static final int LINK_ID_FIELD_NUMBER = 6;
    private int linkId_;
    /**
     * <code>optional int32 link_id = 6;</code>
     */
    public boolean hasLinkId() {
      return ((bitField0_ & 0x00000020) == 0x00000020);
    }
    /**
     * <code>optional int32 link_id = 6;</code>
     */
    public int getLinkId() {
      return linkId_;
    }

    public static final int TOTAL_BYTE_FIELD_NUMBER = 7;
    private long totalByte_;
    /**
     * <code>optional int64 total_byte = 7;</code>
     */
    public boolean hasTotalByte() {
      return ((bitField0_ & 0x00000040) == 0x00000040);
    }
    /**
     * <code>optional int64 total_byte = 7;</code>
     */
    public long getTotalByte() {
      return totalByte_;
    }

    public static final int FLOW_END_TIME_FIELD_NUMBER = 8;
    private long flowEndTime_;
    /**
     * <code>optional int64 flow_end_time = 8;</code>
     */
    public boolean hasFlowEndTime() {
      return ((bitField0_ & 0x00000080) == 0x00000080);
    }
    /**
     * <code>optional int64 flow_end_time = 8;</code>
     */
    public long getFlowEndTime() {
      return flowEndTime_;
    }

    public static final int CLIENT_PORT_FIELD_NUMBER = 9;
    private int clientPort_;
    /**
     * <code>optional int32 client_port = 9;</code>
     */
    public boolean hasClientPort() {
      return ((bitField0_ & 0x00000100) == 0x00000100);
    }
    /**
     * <code>optional int32 client_port = 9;</code>
     */
    public int getClientPort() {
      return clientPort_;
    }

    public static final int PROTOCOL_FIELD_NUMBER = 10;
    private int protocol_;
    /**
     * <code>optional int32 protocol = 10;</code>
     */
    public boolean hasProtocol() {
      return ((bitField0_ & 0x00000200) == 0x00000200);
    }
    /**
     * <code>optional int32 protocol = 10;</code>
     */
    public int getProtocol() {
      return protocol_;
    }

    public static final int TOTAL_PACKET_FIELD_NUMBER = 11;
    private long totalPacket_;
    /**
     * <code>optional int64 total_packet = 11;</code>
     */
    public boolean hasTotalPacket() {
      return ((bitField0_ & 0x00000400) == 0x00000400);
    }
    /**
     * <code>optional int64 total_packet = 11;</code>
     */
    public long getTotalPacket() {
      return totalPacket_;
    }

    public static final int FLOW_DURATION_FIELD_NUMBER = 12;
    private long flowDuration_;
    /**
     * <code>optional int64 flow_duration = 12;</code>
     */
    public boolean hasFlowDuration() {
      return ((bitField0_ & 0x00000800) == 0x00000800);
    }
    /**
     * <code>optional int64 flow_duration = 12;</code>
     */
    public long getFlowDuration() {
      return flowDuration_;
    }

    public static final int ID_FIELD_NUMBER = 13;
    private volatile java.lang.Object id_;
    /**
     * <code>optional string id = 13;</code>
     */
    public boolean hasId() {
      return ((bitField0_ & 0x00001000) == 0x00001000);
    }
    /**
     * <code>optional string id = 13;</code>
     */
    public java.lang.String getId() {
      java.lang.Object ref = id_;
      if (ref instanceof java.lang.String) {
        return (java.lang.String) ref;
      } else {
        com.google.protobuf.ByteString bs = 
            (com.google.protobuf.ByteString) ref;
        java.lang.String s = bs.toStringUtf8();
        if (bs.isValidUtf8()) {
          id_ = s;
        }
        return s;
      }
    }
    /**
     * <code>optional string id = 13;</code>
     */

以上是关于序列化之protobuf与avro对比(Java)的主要内容,如果未能解决你的问题,请参考以下文章

Avro 与 Protobuf 的性能指标

深入对比Java与Hadoop大数据序列化机制Avro

Netty_05_六种序列化方式(JavaIO序列化 XML序列化 Hessian序列化 JSON序列化 Protobuf序列化 AVRO序列化)(实践类)

Netty_05_六种序列化方式(JavaIO序列化 XML序列化 Hessian序列化 JSON序列化 Protobuf序列化 AVRO序列化)(实践类)

序列化系列protobuf

Hadoop之AVRO