android bpf流程
Posted osnet
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了android bpf流程相关的知识,希望对你有一定的参考价值。
以dhcpd使用bpf为例进行分析
通过PF_PACKET,SOCK_DGRAM socket直接从kernel 网卡设备层把原始整个以太网数据原始数据读到用空空间。为了只读取感兴趣的数据包类型,例如ARP包,可以通过配置bpf进行过滤。
用户空间 attch bpf
int
open_socket(struct interface *iface, int protocol)
int s;
union sockunion
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_ll sll;
struct sockaddr_storage ss;
su;
struct sock_fprog pf;
int *fd;
if ((s = socket(PF_PACKET, SOCK_DGRAM, htons(protocol))) == -1) //创建PF_PACKET, SOCK_DGRAM,、、ETHERTYPE_ARP socket
memset(&su, 0, sizeof(su));
su.sll.sll_family = PF_PACKET;
su.sll.sll_protocol = htons(protocol);
if (!(su.sll.sll_ifindex = if_nametoindex(iface->name))) //获取网络接口索引
errno = ENOENT;
goto eexit;
/* Install the DHCP filter */
memset(&pf, 0, sizeof(pf));
if (protocol == ETHERTYPE_ARP)
pf.filter = UNCONST(arp_bpf_filter); //设置bpf过滤配置,这里是arp
pf.len = arp_bpf_filter_len;
else
pf.filter = UNCONST(dhcp_bpf_filter);
pf.len = dhcp_bpf_filter_len;
if (setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &pf, sizeof(pf)) != 0) //把bpf过滤配置attach到socket
goto eexit;
if (set_cloexec(s) == -1)
goto eexit;
if (set_nonblock(s) == -1)
goto eexit;
if (bind(s, &su.sa, sizeof(su)) == -1) //绑定socket到指定接口
goto eexit;
if (protocol == ETHERTYPE_ARP)
fd = &iface->arp_fd;
else
fd = &iface->raw_fd;
if (*fd != -1)
close(*fd);
*fd = s;
return s;
eexit:
close(s);
return -1;
BPF指令码
arp过滤配置为例,
/*
* Try and keep these values and structures similar to BSD, especially
* the BPF code definitions which need to match so you can share filters
*/
struct sock_filter /* Filter block */
__u16 code; /* Actual filter code */ 指令码
__u8 jt; /* Jump true */ 跳转指令时,如果满足判断条件,跳转到jt偏移指令处
__u8 jf; /* Jump false */跳转指令时,如果不满足判断条件,跳转到jf偏移指令处
__u32 k; /* Generic multiuse field */ 存放用于判断的值
;
struct sock_fprog /* Required for SO_ATTACH_FILTER. */
unsigned short len; /* Number of filter blocks */
struct sock_filter __user *filter;
;
static const struct bpf_insn const arp_bpf_filter [] =
#ifndef BPF_SKIPTYPE
/* Make sure this is an ARP packet... */
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12), //指令ld ,读取以太网数据包开头偏移12字节的2个字节(数据包类型)到寄存器
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_ARP, 0, 3),//判断跳转指令jmp,如果上面读取值为ETHERTYPE_ARP,跳转到本条指令+0偏移,也就是下一条指令,否则跳转到偏移3的指令
#endif
/* Make sure this is an ARP REQUEST... */
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20 + BPF_ETHCOOK),//指令ld ,读取以太网数据包开头偏移20字节的2个字节(arp数据包类型)到寄存器
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), //判断跳转指令jmp,如果上面读取值为ARPOP_REQUEST,跳转到本条指令+2偏移,否则跳转到偏移0的指令,也就是下一条指令
/* or ARP REPLY... */
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20 + BPF_ETHCOOK),//如果上面判断不是ARPOP_REQUEST,指令ld ,读取以太网数据包开头偏移20字节的2个字节(arp数据包类型)到寄存器
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 0, 1),//判断跳转指令jmp,如果上面读取值为ARPOP_REPLY,跳转到本条指令+0偏移,也就是下一条指令;否则跳转到偏移1的指令,
/* If we passed all the tests, ask for the whole packet. */
BPF_STMT(BPF_RET + BPF_K, BPF_WHOLEPACKET), //ret 返回指令,BPF_WHOLEPACKET表示前面判断通过,接收数据包到用户空间
/* Otherwise, drop it. */
BPF_STMT(BPF_RET + BPF_K, 0), //ret 返回指令,BPF_WHOLEPACKET表示前面判断不通过,丢弃
;
SOCK_PACKET
系统初始化时packet socket创建,只看关注的点
static int packet_create(struct net *net, struct socket *sock, int protocol,
int kern)
struct sock *sk;
struct packet_sock *po;
__be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
sock->ops = &packet_ops;
if (sock->type == SOCK_PACKET)
sock->ops = &packet_ops_spkt;
sock_init_data(sock, sk);
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
/*
* Attach a protocol block
*/
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
po->prot_hook.func = packet_rcv;//网卡设备层回调函数,__netif_receive_skb_core--deliver_skb
if (sock->type == SOCK_PACKET)
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
if (proto)
po->prot_hook.type = proto;
register_prot_hook(sk); 注册到网卡设备层
register_prot_hook--》dev_add_pack--》list_add_rcu(&pt->list, head);ptype_all
用户空间bind
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
struct sock *sk = sock->sk;
struct net_device *dev = NULL;
int err;
/*
* Check legality
*/
if (sll->sll_ifindex)
err = -ENODEV;
dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
if (dev == NULL)
goto out;
err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
struct packet_sock *po = pkt_sk(sk);
unregister_prot_hook(sk, true); //先解除了之前注册
po->num = protocol;
po->prot_hook.type = protocol;
po->prot_hook.dev = dev; //设置网卡设备,绑定
po->ifindex = dev ? dev->ifindex : 0;//设置网卡设备索引,绑定
packet_cached_dev_assign(po, dev);
if (!dev || (dev->flags & IFF_UP))
register_prot_hook(sk); //再次注册,这里在新网口注册,也就是在bind的接口注册
网卡设备收到数据
__netif_receive_skb_core
list_for_each_entry_rcu(ptype, &ptype_all, list) //遍历注册的ptype
if (!ptype->dev || ptype->dev == skb->dev)
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
static inline int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
return -ENOMEM;
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 调用回调函数,对于PF_PACKET,是packet_rcv
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
struct sock *sk;
struct sockaddr_ll *sll;
struct packet_sock *po;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
sk = pt->af_packet_priv;
po = pkt_sk(sk);
skb->dev = dev;
snaplen = skb->len;
res = run_filter(skb, sk, snaplen); //调用过滤函数,也有就是用户空间配置的bpf过滤条件,进行过滤
if (!res) //0,丢弃数据包,否则返回到用户空间
goto drop_n_restore;
if (snaplen > res)
snaplen = res;
。。。。
内核bpf配置流程
用户空间通过setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &pf, sizeof(pf)) 配置bpf
sock_setsockopt–》sk_attach_filter
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
struct sk_filter *fp, *old_fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); //分配sk_filter 结构用于存储bpf配置
if (copy_from_user(fp->insns, fprog->filter, fsize)) //复制用户空间过滤配置到fp->insns
sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len; //过滤条件,bpf指令条数
err = __sk_prepare_filter(fp); //
rcu_assign_pointer(sk->sk_filter, fp); //把fp赋给 socket结构sk_filter
return 0;
static int __sk_prepare_filter(struct sk_filter *fp)
int err;
fp->bpf_func = sk_run_filter; //复制bpf处理函数
err = sk_chk_filter(fp->insns, fp->len); //检查bpf配置指令字节码,并转换指令code,存储到fp insns中
bpf_jit_compile(fp);// 编译bpf字节码,也就是把bpf字节码转换为对应cpu架构指令
return 0;
bpf字节码编译
bpf字节码在内核编译(转换)为对应cpu架构指令;
linux在处理bpf字节码时两种方式,一种定义了CONFIG_BPF_JIT,会把bpf字节码在内核编译(转换)为对应cpu架构指令。如果没有定义了CONFIG_BPF_JIT,则模拟了过滤过程。
#ifdef CONFIG_BPF_JIT
#include <stdarg.h>
#include <linux/linkage.h>
#include <linux/printk.h>
extern void bpf_jit_compile(struct sk_filter *fp);
extern void bpf_jit_free(struct sk_filter *fp);
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
u32 pass, void *image)
pr_err("flen=%u proglen=%u pass=%u image=%p\\n",
flen, proglen, pass, image);
if (image)
print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
16, 1, image, proglen, false);
#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
#else
static inline void bpf_jit_compile(struct sk_filter *fp)
static inline void bpf_jit_free(struct sk_filter *fp)
#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
#endif
CONFIG_BPF_JIT定义时:arm指令
void bpf_jit_compile(struct sk_filter *fp)
struct jit_ctx ctx;
unsigned tmp_idx;
unsigned alloc_size;
if (!bpf_jit_enable)
return;
memset(&ctx, 0, sizeof(ctx));
ctx.skf = fp;
ctx.ret0_fp_idx = -1;
ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
if (ctx.offsets == NULL)
return;
/* fake pass to fill in the ctx->seen */
if (unlikely(build_body(&ctx)))
goto out;
tmp_idx = ctx.idx;
build_prologue(&ctx);
ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
/* there's nothing after the epilogue on ARMv7 */
build_epilogue(&ctx);
alloc_size = 4 * ctx.idx;
//申请内存用于存储bpf字节码编译后对应arm指令
ctx.target = module_alloc(max(sizeof(struct work_struct),
alloc_size));
if (unlikely(ctx.target == NULL))
goto out;
ctx.idx = 0;
//把bpf字节码转换为arm指令
build_prologue(&ctx);
build_body(&ctx);
build_epilogue(&ctx);
flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
if (bpf_jit_enable > 1)
/* there are 2 passes here */
bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
fp->bpf_func = (void *)ctx.target; 重新修改bpf处理函数,指向arm指令所在内存地址
。。。。
过滤过程
packet_rcv–》run_filter—》SK_RUN_FILTER(filter, skb);
对于SK_RUN_FILTER,如果定义了CONFIG_BPF_JIT,那么调用bpf_func,实际跳转到arm指令
如果没有定义了CONFIG_BPF_JIT,调用sk_run_filter。
#ifdef CONFIG_BPF_JIT
#include <stdarg.h>
#include <linux/linkage.h>
#include <linux/printk.h>
extern void bpf_jit_compile(struct sk_filter *fp);
extern void bpf_jit_free(struct sk_filter *fp);
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
u32 pass, void *image)
pr_err("flen=%u proglen=%u pass=%u image=%p\\n",
flen, proglen, pass, image);
if (image)
print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
16, 1, image, proglen, false);
#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
#else
static inline void bpf_jit_compile(struct sk_filter *fp)
以上是关于android bpf流程的主要内容,如果未能解决你的问题,请参考以下文章