android bpf流程

Posted osnet

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了android bpf流程相关的知识,希望对你有一定的参考价值。

以dhcpd使用bpf为例进行分析
通过PF_PACKET,SOCK_DGRAM socket直接从kernel 网卡设备层把原始整个以太网数据原始数据读到用空空间。为了只读取感兴趣的数据包类型,例如ARP包,可以通过配置bpf进行过滤。

用户空间 attch bpf

int
open_socket(struct interface *iface, int protocol)

	int s;
	union sockunion 
		struct sockaddr sa;
		struct sockaddr_in sin;
		struct sockaddr_ll sll;
		struct sockaddr_storage ss;
	 su;
	struct sock_fprog pf;
	int *fd;


	if ((s = socket(PF_PACKET, SOCK_DGRAM, htons(protocol))) == -1) //创建PF_PACKET, SOCK_DGRAM,、、ETHERTYPE_ARP socket


	memset(&su, 0, sizeof(su));
	su.sll.sll_family = PF_PACKET;
	su.sll.sll_protocol = htons(protocol);
	if (!(su.sll.sll_ifindex = if_nametoindex(iface->name)))  //获取网络接口索引
		errno = ENOENT;
		goto eexit;
	
	/* Install the DHCP filter */
	memset(&pf, 0, sizeof(pf));
	if (protocol == ETHERTYPE_ARP) 
		pf.filter = UNCONST(arp_bpf_filter);  //设置bpf过滤配置,这里是arp
		pf.len = arp_bpf_filter_len;
	 else 
		pf.filter = UNCONST(dhcp_bpf_filter);
		pf.len = dhcp_bpf_filter_len;
	
	if (setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &pf, sizeof(pf)) != 0) //把bpf过滤配置attach到socket
		goto eexit;

	if (set_cloexec(s) == -1)
		goto eexit;
	if (set_nonblock(s) == -1)
		goto eexit;
	if (bind(s, &su.sa, sizeof(su)) == -1) //绑定socket到指定接口
		goto eexit;
	if (protocol == ETHERTYPE_ARP)
		fd = &iface->arp_fd;
	else
		fd = &iface->raw_fd;
	if (*fd != -1)
		close(*fd);
	*fd = s;
	return s;

eexit:
	close(s);
	return -1;

BPF指令码

arp过滤配置为例,

/*
 *	Try and keep these values and structures similar to BSD, especially
 *	the BPF code definitions which need to match so you can share filters
 */
 
struct sock_filter 	/* Filter block */
	__u16	code;   /* Actual filter code */  指令码
	__u8	jt;	/* Jump true */ 跳转指令时,如果满足判断条件,跳转到jt偏移指令处
	__u8	jf;	/* Jump false */跳转指令时,如果不满足判断条件,跳转到jf偏移指令处
	__u32	k;      /* Generic multiuse field */  存放用于判断的值
;

struct sock_fprog 	/* Required for SO_ATTACH_FILTER. */
	unsigned short		len;	/* Number of filter blocks */
	struct sock_filter __user *filter;
;

static const struct bpf_insn const arp_bpf_filter [] = 
#ifndef BPF_SKIPTYPE
	/* Make sure this is an ARP packet... */
	BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12),  //指令ld ,读取以太网数据包开头偏移12字节的2个字节(数据包类型)到寄存器
	BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_ARP, 0, 3),//判断跳转指令jmp,如果上面读取值为ETHERTYPE_ARP,跳转到本条指令+0偏移,也就是下一条指令,否则跳转到偏移3的指令
#endif
	/* Make sure this is an ARP REQUEST... */
	BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20 + BPF_ETHCOOK),//指令ld ,读取以太网数据包开头偏移20字节的2个字节(arp数据包类型)到寄存器
	BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), //判断跳转指令jmp,如果上面读取值为ARPOP_REQUEST,跳转到本条指令+2偏移,否则跳转到偏移0的指令,也就是下一条指令
	/* or ARP REPLY... */
	BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20 + BPF_ETHCOOK),//如果上面判断不是ARPOP_REQUEST,指令ld ,读取以太网数据包开头偏移20字节的2个字节(arp数据包类型)到寄存器
	BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 0, 1),//判断跳转指令jmp,如果上面读取值为ARPOP_REPLY,跳转到本条指令+0偏移,也就是下一条指令;否则跳转到偏移1的指令,
	/* If we passed all the tests, ask for the whole packet. */
	BPF_STMT(BPF_RET + BPF_K, BPF_WHOLEPACKET), //ret 返回指令,BPF_WHOLEPACKET表示前面判断通过,接收数据包到用户空间
	/* Otherwise, drop it. */
	BPF_STMT(BPF_RET + BPF_K, 0), //ret 返回指令,BPF_WHOLEPACKET表示前面判断不通过,丢弃
;

SOCK_PACKET

系统初始化时packet socket创建,只看关注的点

static int packet_create(struct net *net, struct socket *sock, int protocol,
			 int kern)

	struct sock *sk;
	struct packet_sock *po;
	__be16 proto = (__force __be16)protocol; /* weird, but documented */
	int err;


	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);


	sock->ops = &packet_ops;
	if (sock->type == SOCK_PACKET)
		sock->ops = &packet_ops_spkt;

	sock_init_data(sock, sk);

	po = pkt_sk(sk);
	sk->sk_family = PF_PACKET;
	po->num = proto;

	/*
	 *	Attach a protocol block
	 */

	spin_lock_init(&po->bind_lock);
	mutex_init(&po->pg_vec_lock);
	po->prot_hook.func = packet_rcv;//网卡设备层回调函数,__netif_receive_skb_core--deliver_skb

	if (sock->type == SOCK_PACKET)
		po->prot_hook.func = packet_rcv_spkt;

	po->prot_hook.af_packet_priv = sk;

	if (proto) 
		po->prot_hook.type = proto;
		register_prot_hook(sk); 注册到网卡设备层
	


register_prot_hook--》dev_add_pack--list_add_rcu(&pt->list, head);ptype_all

用户空间bind

static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
	struct sock *sk = sock->sk;
	struct net_device *dev = NULL;
	int err;


	/*
	 *	Check legality
	 */



	if (sll->sll_ifindex) 
		err = -ENODEV;
		dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
		if (dev == NULL)
			goto out;
	
	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);




static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)

	struct packet_sock *po = pkt_sk(sk);

	unregister_prot_hook(sk, true); //先解除了之前注册

	po->num = protocol;
	po->prot_hook.type = protocol;


	po->prot_hook.dev = dev;  //设置网卡设备,绑定
	po->ifindex = dev ? dev->ifindex : 0;//设置网卡设备索引,绑定

	packet_cached_dev_assign(po, dev);



	if (!dev || (dev->flags & IFF_UP)) 
		register_prot_hook(sk);  //再次注册,这里在新网口注册,也就是在bind的接口注册
	 



网卡设备收到数据

__netif_receive_skb_core


	list_for_each_entry_rcu(ptype, &ptype_all, list)  //遍历注册的ptype
		if (!ptype->dev || ptype->dev == skb->dev) 
			if (pt_prev)
				ret = deliver_skb(skb, pt_prev, orig_dev);
			pt_prev = ptype;
		
	



static inline int deliver_skb(struct sk_buff *skb,
			      struct packet_type *pt_prev,
			      struct net_device *orig_dev)

	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
		return -ENOMEM;
	atomic_inc(&skb->users);
	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 调用回调函数,对于PF_PACKET,是packet_rcv


static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
		      struct packet_type *pt, struct net_device *orig_dev)

	struct sock *sk;
	struct sockaddr_ll *sll;
	struct packet_sock *po;
	u8 *skb_head = skb->data;
	int skb_len = skb->len;
	unsigned int snaplen, res;



	sk = pt->af_packet_priv;
	po = pkt_sk(sk);



	skb->dev = dev;

	

	snaplen = skb->len;

	res = run_filter(skb, sk, snaplen); //调用过滤函数,也有就是用户空间配置的bpf过滤条件,进行过滤
	if (!res) //0,丢弃数据包,否则返回到用户空间
		goto drop_n_restore;
	if (snaplen > res) 
		snaplen = res;

	。。。。

内核bpf配置流程

用户空间通过setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &pf, sizeof(pf)) 配置bpf
sock_setsockopt–》sk_attach_filter

/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)

	struct sk_filter *fp, *old_fp;
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;



	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); //分配sk_filter 结构用于存储bpf配置

	if (copy_from_user(fp->insns, fprog->filter, fsize))  //复制用户空间过滤配置到fp->insns
		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
		return -EFAULT;
	

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len; //过滤条件,bpf指令条数

	err = __sk_prepare_filter(fp); //


	rcu_assign_pointer(sk->sk_filter, fp); //把fp赋给 socket结构sk_filter 


	return 0;



static int __sk_prepare_filter(struct sk_filter *fp)

	int err;

	fp->bpf_func = sk_run_filter; //复制bpf处理函数

	err = sk_chk_filter(fp->insns, fp->len); //检查bpf配置指令字节码,并转换指令code,存储到fp insns中


	bpf_jit_compile(fp);// 编译bpf字节码,也就是把bpf字节码转换为对应cpu架构指令
	return 0;


bpf字节码编译

bpf字节码在内核编译(转换)为对应cpu架构指令;
linux在处理bpf字节码时两种方式,一种定义了CONFIG_BPF_JIT,会把bpf字节码在内核编译(转换)为对应cpu架构指令。如果没有定义了CONFIG_BPF_JIT,则模拟了过滤过程。

#ifdef CONFIG_BPF_JIT
#include <stdarg.h>
#include <linux/linkage.h>
#include <linux/printk.h>

extern void bpf_jit_compile(struct sk_filter *fp);
extern void bpf_jit_free(struct sk_filter *fp);

static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
				u32 pass, void *image)

	pr_err("flen=%u proglen=%u pass=%u image=%p\\n",
	       flen, proglen, pass, image);
	if (image)
		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
			       16, 1, image, proglen, false);

#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
#else
static inline void bpf_jit_compile(struct sk_filter *fp)


static inline void bpf_jit_free(struct sk_filter *fp)


#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
#endif


CONFIG_BPF_JIT定义时:arm指令

void bpf_jit_compile(struct sk_filter *fp)

	struct jit_ctx ctx;
	unsigned tmp_idx;
	unsigned alloc_size;

	if (!bpf_jit_enable)
		return;

	memset(&ctx, 0, sizeof(ctx));
	ctx.skf		= fp;
	ctx.ret0_fp_idx = -1;

	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
	if (ctx.offsets == NULL)
		return;

	/* fake pass to fill in the ctx->seen */
	if (unlikely(build_body(&ctx)))
		goto out;

	tmp_idx = ctx.idx;
	build_prologue(&ctx);
	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;

	/* there's nothing after the epilogue on ARMv7 */
	build_epilogue(&ctx);


	alloc_size = 4 * ctx.idx;
	//申请内存用于存储bpf字节码编译后对应arm指令
	ctx.target = module_alloc(max(sizeof(struct work_struct),
				      alloc_size));
	if (unlikely(ctx.target == NULL))
		goto out;

	ctx.idx = 0;
	//把bpf字节码转换为arm指令
	build_prologue(&ctx);
	build_body(&ctx);
	build_epilogue(&ctx);

	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));



	if (bpf_jit_enable > 1)
		/* there are 2 passes here */
		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);

	fp->bpf_func = (void *)ctx.target;  重新修改bpf处理函数,指向arm指令所在内存地址
  。。。。


过滤过程

packet_rcv–》run_filter—》SK_RUN_FILTER(filter, skb);

对于SK_RUN_FILTER,如果定义了CONFIG_BPF_JIT,那么调用bpf_func,实际跳转到arm指令
如果没有定义了CONFIG_BPF_JIT,调用sk_run_filter。

#ifdef CONFIG_BPF_JIT
#include <stdarg.h>
#include <linux/linkage.h>
#include <linux/printk.h>

extern void bpf_jit_compile(struct sk_filter *fp);
extern void bpf_jit_free(struct sk_filter *fp);

static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
				u32 pass, void *image)

	pr_err("flen=%u proglen=%u pass=%u image=%p\\n",
	       flen, proglen, pass, image);
	if (image)
		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
			       16, 1, image, proglen, false);

#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
#else
static inline void bpf_jit_compile(struct sk_filter *fp)
以上是关于android bpf流程的主要内容,如果未能解决你的问题,请参考以下文章

Dynamics 365-关于BPF的进一步探究

XDP/eBPF — BPF

XDP/eBPF — BPF

Dynamics 365 BPF背景及按钮颜色更改

Linux kernel 4.20 BPF 整数溢出漏洞分析

Wireshark-BPF过滤规则