DPDK flow_classify 源码阅读
Posted zcplayground
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了DPDK flow_classify 源码阅读相关的知识,希望对你有一定的参考价值。
todo
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2017 Intel Corporation
*/
#include <stdint.h>
#include <inttypes.h>
#include <getopt.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <rte_flow.h>
#include <rte_flow_classify.h>
#include <rte_table_acl.h>
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
#define MAX_NUM_CLASSIFY 30
#define FLOW_CLASSIFY_MAX_RULE_NUM 91
#define FLOW_CLASSIFY_MAX_PRIORITY 8
#define FLOW_CLASSIFIER_NAME_SIZE 64
#define COMMENT_LEAD_CHAR ('#')
#define OPTION_RULE_IPV4 "rule_ipv4"
#define RTE_LOGTYPE_FLOW_CLASSIFY RTE_LOGTYPE_USER3
#define flow_classify_log(format, ...) RTE_LOG(ERR, FLOW_CLASSIFY, format, ##__VA_ARGS__)
#define uint32_t_to_char(ip, a, b, c, d) do { *a = (unsigned char)(ip >> 24 & 0xff); *b = (unsigned char)(ip >> 16 & 0xff); *c = (unsigned char)(ip >> 8 & 0xff); *d = (unsigned char)(ip & 0xff); } while (0)
enum {
CB_FLD_SRC_ADDR,
CB_FLD_DST_ADDR,
CB_FLD_SRC_PORT,
CB_FLD_SRC_PORT_DLM,
CB_FLD_SRC_PORT_MASK,
CB_FLD_DST_PORT,
CB_FLD_DST_PORT_DLM,
CB_FLD_DST_PORT_MASK,
CB_FLD_PROTO,
CB_FLD_PRIORITY,
CB_FLD_NUM,
};
static struct{
const char *rule_ipv4_name;
} parm_config;
const char cb_port_delim[] = ":";
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
.max_rx_pkt_len = ETHER_MAX_LEN,
.ignore_offload_bitfield = 1,
},
};
struct flow_classifier {
struct rte_flow_classifier *cls;
};
struct flow_classifier_acl {
struct flow_classifier cls;
} __rte_cache_aligned;
/* ACL field definitions for IPv4 5 tuple rule */
enum {
PROTO_FIELD_IPV4, // 0
SRC_FIELD_IPV4, // 1
DST_FIELD_IPV4, // 2
SRCP_FIELD_IPV4, // 3
DSTP_FIELD_IPV4, // 4
NUM_FIELDS_IPV4 // 5
};
enum {
PROTO_INPUT_IPV4,
SRC_INPUT_IPV4,
DST_INPUT_IPV4,
SRCP_DESTP_INPUT_IPV4
};
/* 数据结构 rte_acl_field_def:ACL访问控制表的字段的定义
ACL规则中的每个字段都有一个关联定义。有五个,分别是:类型,大小,字段的索引(指示哪一个字段),输入索引(0-N)和(距离字段开始处的)偏移量。
*/
static struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = {
/* first input field - always one byte long. */
{
.type = RTE_ACL_FIELD_TYPE_BITMASK,
.size = sizeof(uint8_t),
.field_index = PROTO_FIELD_IPV4,
.input_index = PROTO_INPUT_IPV4,
.offset = sizeof(struct ether_hdr) +
offsetof(struct ipv4_hdr, next_proto_id),
},
/* next input field (IPv4 source address) - 4 consecutive bytes. */
{
/* rte_flow uses a bit mask for IPv4 addresses */
.type = RTE_ACL_FIELD_TYPE_BITMASK,
.size = sizeof(uint32_t),
.field_index = SRC_FIELD_IPV4,
.input_index = SRC_INPUT_IPV4,
.offset = sizeof(struct ether_hdr) +
offsetof(struct ipv4_hdr, src_addr),
},
/* next input field (IPv4 destination address) - 4 consecutive bytes. */
{
/* rte_flow uses a bit mask for IPv4 addresses */
.type = RTE_ACL_FIELD_TYPE_BITMASK,
.size = sizeof(uint32_t),
.field_index = DST_FIELD_IPV4,
.input_index = DST_INPUT_IPV4,
.offset = sizeof(struct ether_hdr) +
offsetof(struct ipv4_hdr, dst_addr),
},
/*
* Next 2 fields (src & dst ports) form 4 consecutive bytes.
* They share the same input index.
*/
{
/* rte_flow uses a bit mask for protocol ports */
.type = RTE_ACL_FIELD_TYPE_BITMASK,
.size = sizeof(uint16_t),
.field_index = SRCP_FIELD_IPV4,
.input_index = SRCP_DESTP_INPUT_IPV4,
.offset = sizeof(struct ether_hdr) +
sizeof(struct ipv4_hdr) +
offsetof(struct tcp_hdr, src_port),
},
{
/* rte_flow uses a bit mask for protocol ports */
.type = RTE_ACL_FIELD_TYPE_BITMASK,
.size = sizeof(uint16_t),
.field_index = DSTP_FIELD_IPV4,
.input_index = SRCP_DESTP_INPUT_IPV4,
.offset = sizeof(struct ether_hdr) +
sizeof(struct ipv4_hdr) +
offsetof(struct tcp_hdr, dst_port),
},
};
/* flow classify data */
static int num_classify_rules; // rules数组的下标
static struct rte_flow_classify_rule *rules[MAX_NUM_CLASSIFY]; // rules 数组
static struct rte_flow_classify_ipv4_5tuple_stats ntuple_stats;
static struct rte_flow_classify_stats classify_stats = {
.stats = (void **)&ntuple_stats
};
/* parameters for rte_flow_classify_validate and
* rte_flow_classify_table_entry_add functions
*/
static struct rte_flow_item eth_item = { RTE_FLOW_ITEM_TYPE_ETH,
0, 0, 0 };
static struct rte_flow_item end_item = { RTE_FLOW_ITEM_TYPE_END,
0, 0, 0 };
/* sample actions:
* "actions count / end"
*/
struct rte_flow_query_count count = {
.reset = 1,
.hits_set = 1,
.bytes_set = 1,
.hits = 0,
.bytes = 0,
};
static struct rte_flow_action count_action = { RTE_FLOW_ACTION_TYPE_COUNT,
&count};
static struct rte_flow_action end_action = { RTE_FLOW_ACTION_TYPE_END, 0};
static struct rte_flow_action actions[2];
/* sample attributes */
static struct rte_flow_attr attr;
/* flow_classify.c: * Based on DPDK skeleton forwarding example. */
/*
* Initializes a given port using global settings and with the RX buffers
* coming from the mbuf_pool passed as a parameter.
*/
static inline int
port_init(uint8_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf = port_conf_default;
struct ether_addr addr;
const uint16_t rx_rings = 1, tx_rings = 1;
int retval;
uint16_t q;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf txconf;
if (!rte_eth_dev_is_valid_port(port))
return -1;
rte_eth_dev_info_get(port, &dev_info);
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
/* Configure the Ethernet device. */
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
/* Allocate and set up 1 RX queue per Ethernet port. */
for (q = 0; q < rx_rings; q++) {
retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0)
return retval;
}
txconf = dev_info.default_txconf;
txconf.txq_flags = ETH_TXQ_FLAGS_IGNORE;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per Ethernet port. */
for (q = 0; q < tx_rings; q++) {
retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
/* Start the Ethernet port. */
retval = rte_eth_dev_start(port);
if (retval < 0)
return retval;
/* Display the port MAC address. */
rte_eth_macaddr_get(port, &addr);
printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
" %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "
",
port,
addr.addr_bytes[0], addr.addr_bytes[1],
addr.addr_bytes[2], addr.addr_bytes[3],
addr.addr_bytes[4], addr.addr_bytes[5]);
/* Enable RX in promiscuous mode for the Ethernet device. */
rte_eth_promiscuous_enable(port);
return 0;
}
/*
* The lcore main. This is the main thread that does the work, reading from
* an input port classifying the packets and writing to an output port.
*/
static __attribute__((noreturn)) void
lcore_main(struct flow_classifier *cls_app)
{
uint16_t port;
int ret;
int i = 0;
ret = rte_flow_classify_table_entry_delete(cls_app->cls,
rules[7]);
if (ret)
printf("table_entry_delete failed [7] %d
", ret);
else
printf("table_entry_delete succeeded [7]
");
/*
* Check that the port is on the same NUMA node as the polling thread
* for best performance.
*/
RTE_ETH_FOREACH_DEV(port)
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) != (int)rte_socket_id()) {
printf("
");
printf("WARNING: port %u is on remote NUMA node
",
port);
printf("to polling thread.
");
printf("Performance will not be optimal.
");
}
printf("
Core %u forwarding packets. ", rte_lcore_id());
printf("[Ctrl+C to quit]
");
/* Run until the application is quit or killed. */
for (;;) {
/*
* Receive packets on a port, **classify them** and forward them
* on the paired port.
* The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
*/
RTE_ETH_FOREACH_DEV(port) {
/* Get burst of RX packets, from first port of pair. */
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE); // 收包
if (unlikely(nb_rx == 0))
continue;
for (i = 0; i < MAX_NUM_CLASSIFY; i++) {
if (rules[i]) { // 在classifier中查询特定的规则
// 收包之后,将感兴趣的流放到
ret = rte_flow_classifier_query(
cls_app->cls, // 流分类器句柄
// 要处理的数据包的 mbuf
// 数据包数量
// 规则
bufs, nb_rx, rules[i],
&classify_stats);
if (ret) // 返回 0 代表分类成功
printf(
"rule [%d] query failed ret [%d]
",
i, ret);
else {
printf(
"rule[%d] count=%"PRIu64"
",
i, ntuple_stats.counter1);
printf("proto = %d
",
ntuple_stats.ipv4_5tuple.proto);
}
}
}
/* Send burst of TX packets, to second port of pair. */
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
/* Free any unsent packets. */
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
}
}
/*
* Parse IPv4 5 tuple rules file, ipv4_rules_file.txt.
* Expected format:
* <src_ipv4_addr>'/'<masklen> <space> * <dst_ipv4_addr>'/'<masklen> <space> * <src_port> <space> ":" <src_port_mask> <space> * <dst_port> <space> ":" <dst_port_mask> <space> * <proto>'/'<proto_mask> <space> * <priority>
*/
static int
get_cb_field(char **in, uint32_t *fd, int base, unsigned long lim,
char dlm)
{
unsigned long val;
char *end;
errno = 0;
val = strtoul(*in, &end, base);
if (errno != 0 || end[0] != dlm || val > lim)
return -EINVAL;
*fd = (uint32_t)val;
*in = end + 1;
return 0;
}
static int
parse_ipv4_net(char *in, uint32_t *addr, uint32_t *mask_len)
{
uint32_t a, b, c, d, m;
if (get_cb_field(&in, &a, 0, UINT8_MAX, '.'))
return -EINVAL;
if (get_cb_field(&in, &b, 0, UINT8_MAX, '.'))
return -EINVAL;
if (get_cb_field(&in, &c, 0, UINT8_MAX, '.'))
return -EINVAL;
if (get_cb_field(&in, &d, 0, UINT8_MAX, '/'))
return -EINVAL;
if (get_cb_field(&in, &m, 0, sizeof(uint32_t) * CHAR_BIT, 0))
return -EINVAL;
addr[0] = IPv4(a, b, c, d);
mask_len[0] = m;
return 0;
}
static int
parse_ipv4_5tuple_rule(char *str, struct rte_eth_ntuple_filter *ntuple_filter)
{
int i, ret;
char *s, *sp, *in[CB_FLD_NUM];
static const char *dlm = "
";
int dim = CB_FLD_NUM;
uint32_t temp;
s = str;
for (i = 0; i != dim; i++, s = NULL) {
in[i] = strtok_r(s, dlm, &sp);
if (in[i] == NULL)
return -EINVAL;
}
ret = parse_ipv4_net(in[CB_FLD_SRC_ADDR],
&ntuple_filter->src_ip,
&ntuple_filter->src_ip_mask);
if (ret != 0) {
flow_classify_log("failed to read source address/mask: %s
",
in[CB_FLD_SRC_ADDR]);
return ret;
}
ret = parse_ipv4_net(in[CB_FLD_DST_ADDR],
&ntuple_filter->dst_ip,
&ntuple_filter->dst_ip_mask);
if (ret != 0) {
flow_classify_log("failed to read source address/mask: %s
",
in[CB_FLD_DST_ADDR]);
return ret;
}
if (get_cb_field(&in[CB_FLD_SRC_PORT], &temp, 0, UINT16_MAX, 0))
return -EINVAL;
ntuple_filter->src_port = (uint16_t)temp;
if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim,
sizeof(cb_port_delim)) != 0)
return -EINVAL;
if (get_cb_field(&in[CB_FLD_SRC_PORT_MASK], &temp, 0, UINT16_MAX, 0))
return -EINVAL;
ntuple_filter->src_port_mask = (uint16_t)temp;
if (get_cb_field(&in[CB_FLD_DST_PORT], &temp, 0, UINT16_MAX, 0))
return -EINVAL;
ntuple_filter->dst_port = (uint16_t)temp;
if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim,
sizeof(cb_port_delim)) != 0)
return -EINVAL;
if (get_cb_field(&in[CB_FLD_DST_PORT_MASK], &temp, 0, UINT16_MAX, 0))
return -EINVAL;
ntuple_filter->dst_port_mask = (uint16_t)temp;
if (get_cb_field(&in[CB_FLD_PROTO], &temp, 0, UINT8_MAX, '/'))
return -EINVAL;
ntuple_filter->proto = (uint8_t)temp;
if (get_cb_field(&in[CB_FLD_PROTO], &temp, 0, UINT8_MAX, 0))
return -EINVAL;
ntuple_filter->proto_mask = (uint8_t)temp;
if (get_cb_field(&in[CB_FLD_PRIORITY], &temp, 0, UINT16_MAX, 0))
return -EINVAL;
ntuple_filter->priority = (uint16_t)temp;
if (ntuple_filter->priority > FLOW_CLASSIFY_MAX_PRIORITY)
ret = -EINVAL;
return ret;
}
/* Bypass comment and empty lines */
static inline int
is_bypass_line(char *buff)
{
int i = 0;
/* comment line */
if (buff[0] == COMMENT_LEAD_CHAR)
return 1;
/* empty line */
while (buff[i] != '