bpftrace系统分析脚本实战

Posted 文皓

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了bpftrace系统分析脚本实战相关的知识,希望对你有一定的参考价值。

最近ebpf比较火,作为运维,大家应该比较关注ebpf对系统问题分析定位的能力。下面分享一些我实际在用的bpftrace脚本案例。


案例一,重要文件被删除

有时候线上某个重要文件不翼而飞,我们需要知道是谁删的,可以利用相关跟踪点找到罪魁祸首。。。

  • 通过静态跟踪点跟踪unlink相关的跟踪点(tracepoint)
#!/usr/local/bin/bpftrace

/*
tracepoint:syscalls:sys_enter_unlinkat
int __syscall_nr
int dfd
const char • const char * pathname
int flag
*/

BEGIN

printf("%-20s %-10s %-10s %-10s %-20s %s\\n","TIME", "USERNAME", "UID","PID","COMM", "FNAME");

tracepoint:syscalls:sys_enter_unlinkat

printf("%-20s %-10s %-10u %-10u %-20s %s\\n", strftime("%F-%T", nsecs), username, uid, pid, comm, str(args->pathname));


# 执行结果
$ sudo bpftrace rm.bt
Attaching 2 probes...
TIME USERNAME UID PID COMM FNAME
2021-02-26-18:35:02 obbobbxy 1008 130183 rm abc
2021-02-26-18:35:02 obbobbxy 1008 130183 rm 123
2021-02-26-18:35:26 obbobbxy 1008 130209 rm 123
2021-02-26-18:35:34 obbobbxy 1008 130212 rm 123
2021-02-26-18:35:38 root 0 130508 rm /var/rudder/cfengine-community/state/cf_lock.lmdb
2021-02-26-18:35:38 root 0 130509 rm /var/rudder/cfengine-community/state/cf_lock.lmdb.lock
2021-02-26-18:35:40 obbobbxy 1008 130704 rm abc
2021-02-26-18:35:40 obbobbxy 1008 130704 rm 123
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent.extra
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent.ssh
2021-02-26-18:36:05 root 0 634 systemd-logind S.dirmngr
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent.browser
2021-02-26-18:36:05 root 0 634 systemd-logind gnupg
2021-02-26-18:36:05 root 0 634 systemd-logind private
2021-02-26-18:36:05 root 0 634 systemd-logind notify
2021-02-26-18:36:05 root 0 634 systemd-logind transient
2021-02-26-18:36:05 root 0 634 systemd-logind systemd
  • 通过动态跟踪点跟踪unlink相关的跟踪点(kprobe)
#!/usr/local/bin/bpftrace

// header file path: /usr/src/linux-headers-$(uname -r | sed s/-amd64//)-common/include
#include <linux/dcache.h>

/*
https://www.kernel.org/doc/htmldocs/filesystems/API• https://www.kernel.org/doc/htmldocs/filesystems/API-vfs-unlink.html : int vfs_unlink ( struct inode * dir, struct dentry * dentry, struct inode ** delegated_inode);
https://docs.huihoo.com/doxygen/linux/kernel/3.7/structdentry.html : struct dentry -> struct qstr d_name
https://docs.huihoo.com/doxygen/linux/kernel/3.7/structqstr.html : struct qstr d_name • https://docs.huihoo.com/doxygen/linux/kernel/3.7/structqstr.html : struct qstr d_name -> const unsigned char * name
filename: ((struct dentry *)arg1)->d_name.name
*/

BEGIN

@t = 0;
@c = 0;
@duration = $1;
@maxcount = $2;
if (@duration == 0)
@duration = 10;

printf("%-20s %-5s %-10s %-10s %-10s %-20s %s\\n","TIME", "TYPE", "USERNAME", "UID","PID","COMM", "FNAME");

kprobe:vfs_unlink

printf("%-20s %-5s %-10s %-10u %-10u %-20s %s\\n", strftime("%F-%T", nsecs), "file", username, uid, pid, comm, str(((struct dentry *)arg1)->d_name.name));
@c++;

kprobe:vfs_rmdir

printf("%-20s %-5s %-10s %-10u %-10u %-20s %s\\n", strftime("%F-%T", nsecs), "dir", username, uid, pid, comm, str(((struct dentry *)arg1)->d_name.name));
@c++;

interval:s:1

@t++;
if ( @t >= @duration )
clear(@t);
clear(@c);
clear(@maxcount);
exit();

if ( @maxcount != 0 && @c > @maxcount )
clear(@t);
clear(@c);
clear(@duration);
exit();


# 脚本默认运行10s,可以通过第一个位置参数传入sudo ./watch_rm.bt 30,结束后会打印运行时间内所有删除文件/目录的进程,第二个参数可以指定最多获取记录数
# sudo ./watch_rm.bt 5 30, 表示脚本最多运行5s,最多获取30条记录,如果不指定第二个参数,默认就按超时退出;
$ sudo ./watch_rm.bt
Attaching 4 probes...
TIME TYPE USERNAME UID PID COMM FNAME
2021-02-23-22:43:59 dir obbobbxy 1008 187638 rm dir1
2021-02-23-22:43:59 dir obbobbxy 1008 187639 rm dir2
2021-02-23-22:43:59 dir obbobbxy 1008 187640 rm dir3
2021-02-23-22:43:59 file obbobbxy 1008 187641 rm file1
2021-02-23-22:43:59 file obbobbxy 1008 187642 rm file2
2021-02-23-22:43:59 file obbobbxy 1008 187643 rm file3
2021-02-23-22:43:59 file obbobbxy 1008 187644 rm file4
2021-02-23-22:43:59 file obbobbxy 1008 187645 rm file5
2021-02-23-22:43:59 file obbobbxy 1008 187646 rm file6
2021-02-23-22:43:59 file obbobbxy 1008 187647 rm file7
2021-02-23-22:43:59 file obbobbxy 1008 187648 rm file8
2021-02-23-22:43:59 file obbobbxy 1008 187649 rm file9

案例二,进程top流量

#!/usr/local/bin/bpftrace

#include <linux/fs.h>
#include <net/sock.h>

kprobe:sock_recvmsg,
kprobe:sock_sendmsg

@socket[tid] = arg0;


kretprobe:sock_recvmsg

if (retval < 0x7fffffff)
@read_bytes[comm] = hist(retval);

delete(@socket[tid]);


kretprobe:sock_sendmsg

if (retval < 0x7fffffff)
@write_bytes[comm] = hist(retval);

delete(@socket[tid]);


END

clear(@socket);


# 以直方图形式输出进程socket读写字节数,如果需要,则可以加上pid/sport/dport
$ sudo bpftrace socksize.bt
Attaching 5 probes...
^C

@read_bytes[ospfd]:
[64, 128) 1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|

@read_bytes[sshd]:
[32, 64) 1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|

@read_bytes[java]:
[8, 16) 2 |@@@@@@@@@@@ |
[16, 32) 9 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[32, 64) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 2 |@@@@@@@@@@@ |

@read_bytes[nginx]:
[0] 107 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1] 1 | |
[2, 4) 0 | |
[4, 8) 0 | |
[8, 16) 0 | |
[16, 32) 0 | |
[32, 64) 0 | |
[64, 128) 20 |@@@@@@@@@ |
[128, 256) 113 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[256, 512) 103 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 35 |@@@@@@@@@@@@@@@@ |
[1K, 2K) 5 |@@ |


# 使用bcc的tcptop工具也可以做到
$ sudo /usr/share/bcc/tools/tcptop 1 1
Tracing... Output every 1 secs. Hit Ctrl-C to end
23:37:39 loadavg: 0.49 0.48 0.35 2/831 85332

PID COMM LADDR RADDR RX_KB TX_KB
82242 nginx 192.168.3.185:52456 192.168.9.57:7130 2 4
82242 nginx 192.168.3.185:47782 192.168.7.66:7130 2 4
82242 nginx 192.168.3.185:43064 192.168.1.51:7130 2 4


案例三,redis 热key发现

# 查看redis源码,可以跟踪lookupkey相关的函数
#!/usr/local/bin/bpftrace

BEGIN

@t=0;
@duration=10;
if ($1 > 0)
@duration=$1;

printf("Tracing /usr/bin/redis-server %d seconds: \\n\\n", @duration);


/*
https://github.com/redis/redis/blob/8f9958dc24fa5992d3d10f6b9caf999e1beee4e5/src/server.h#L666:
typedef struct redisObject
unsigned type:4;
unsigned encoding:4;
unsigned lru:LRU_BITS;
int refcount;
void *ptr;
robj;

https://github.com/redis/redis/blob/8f9958dc24fa5992d3d10f6b9caf999e1beee4e5/src/db.c#L168:
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply)
*/

// 注意lookupKey在get/set时候都会调用,lookupKeyReadOrReply只在get时调用
//u:/usr/bin/redis-server:lookupKeyReadOrReply,
u:/usr/bin/redis-server:lookupKey

// arg1: robj *key
// $key: key
• // $key: key + (4*4) offset # unsigned 4 bytes, int 4 bytes
$key = str(arg1+16);
@keys[$key] = count();


END

print(@keys, 10);
clear(@keys);


interval:s:1

@t++;
if (@t > @duration)
print(@keys);
clear(@keys);
clear(@t);
clear(@duration);
exit();



$ sudo ./redis-uprobe.bt 99999 # 默认10自动退出,可以通过第一个位置变量传入跟踪时长,最多输出top10的key;同时另外一个终端执行/usr/bin/redis-benchmark -q模拟redis请求
Attaching 4 probes...
Tracing /usr/bin/redis-server 99999 seconds:

keys[counter:__rand_int__]: 100000
@keys[myset]: 200000
@keys[mylist]: 900000
@keys[key:__rand_int__]: 1200000

@duration: 99999
@t: 91


案例四,审计系统shell命令执行记录

通常公司因为某些资质申请或者证书审核的原因,会需要做一些系统安全或者日志审计,需要有服务器命令行执行记录。我们通常使用auditd工具实现,同样的,我们用ebpf的uprobe能力也能简单实现相关的功能。

#!/usr/local/bin/bpftrace

BEGIN

printf("%-20s %-6s %-10s %s\\n", "TIME", "PID", "USERNAME", "COMMAND");


/*
int shell_execve PARAMS((char *, char , char ));
shell_execve (command, args, env);
*/
u:/bin/bash:shell_execve

time("%F-%T ");
printf("%-6d %-10s ", pid, username);
join(arg1); // argv


$ sudo ./bash-readline.bt
Attaching 2 probes...
TIME PID USERNAME COMMAND
2021-02-28-19:16:31 65488 obbobbxy bash a.sh
2021-02-28-19:16:31 65489 obbobbxy /bin/echo 123 300c
2021-02-28-19:16:31 65490 obbobbxy /bin/echo abc asdfadf
2021-02-28-19:16:31 65491 obbobbxy whoami
2021-02-28-19:16:31 65492 obbobbxy bpftrace --version
2021-02-28-19:16:34 65495 obbobbxy cat a.sh
2021-02-28-19:17:15 65523 root ps aux
2021-02-28-19:17:15 65524 root grep /usr/bin/osquery
2021-02-28-19:17:15 65525 root grep -v grep
2021-02-28-19:17:15 65526 root wc -l


小结

目前debian9以上系统都是4.x内核,基本利用bpftrace或者bcc-tools能实现许多跟踪点的信息跟踪,辅助我们日常工作,提升问题定位能力。

debian11以后系统默认都启用了BTF,CO-RE不远了。

以上是关于bpftrace系统分析脚本实战的主要内容,如果未能解决你的问题,请参考以下文章

记一次pthread_cancel_init段错误分析(bpftrace)

LinuxBPF学习笔记 - bpftrace开发[7]

LinuxBPF学习笔记 - bpftrace开发[7]

LinuxBPF学习笔记 - bpftrace开发[7]

Shell 编程实战

实操 : shell编程实战