记一次pthread_cancel_init段错误分析(bpftrace)
Posted rtoax
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了记一次pthread_cancel_init段错误分析(bpftrace)相关的知识,希望对你有一定的参考价值。
1. coredump栈
gdb app coredump.dat
[...]
#15 0x00000000004fceba in SignalHandler (sigid=11)
at /work/workspace/VOS_platform/workspace/os/../../platform/vos/vos_signal.c:80
#16 <signal handler called>
#17 0x00007ff8f170cd72 in strcmp () from /lib64/ld-linux-x86-64.so.2
#18 0x00007ff8f16fbae2 in _dl_map_object () from /lib64/ld-linux-x86-64.so.2
#19 0x00007ff8f1707254 in dl_open_worker () from /lib64/ld-linux-x86-64.so.2
#20 0x00007ff8f1702784 in _dl_catch_error () from /lib64/ld-linux-x86-64.so.2
#21 0x00007ff8f1706b3b in _dl_open () from /lib64/ld-linux-x86-64.so.2
#22 0x00007ff8ef0136d2 in do_dlopen () from /lib64/libc.so.6
#23 0x00007ff8f1702784 in _dl_catch_error () from /lib64/ld-linux-x86-64.so.2
#24 0x00007ff8ef013792 in __libc_dlopen_mode () from /lib64/libc.so.6
#25 0x00007ff8f14e6d73 in pthread_cancel_init () from /lib64/libpthread.so.0
#26 0x00007ff8f14e3b57 in pthread_cancel () from /lib64/libpthread.so.0
[...]
此调用栈的详情:
1.1. frame 26: pthread_cancel
int
__pthread_cancel (pthread_t th)
{
[...]
#ifdef SHARED
pthread_cancel_init ();
#endif
[...]
}
1.2. frame 25: pthread_cancel_init
void
__attribute_noinline__
pthread_cancel_init (void)
{
[...]
handle = __libc_dlopen (LIBGCC_S_SO);
[...]
}
1.3. frame 24: __libc_dlopen_mode
__libc_dlopen
:
#define __libc_dlopen(name) \\
__libc_dlopen_mode (name, RTLD_NOW | __RTLD_DLOPEN)
__libc_dlopen_mode
void *
__libc_dlopen_mode (const char *name, int mode)
{
[...]
#ifdef SHARED
return (dlerror_run (do_dlopen, &args) ? NULL : (void *) args.map);
#else
[...]
}
1.4. frame 23: _dl_catch_error
dlerror_run
static int
dlerror_run (void (*operate) (void *), void *args)
{
[...]
int result = (GLRO(dl_catch_error) (&objname, &last_errstring, &malloced,
operate, args)
[...]
}
_dl_catch_error
int
internal_function
_dl_catch_error (const char **objname, const char **errstring,
bool *mallocedp, void (*operate) (void *), void *args)
{
[...] if (__builtin_expect (errcode, 0) == 0)
{
[...]
(*operate) (args);
[...]
return 0;
}
[...]
}
1.5. frame 22: do_dlopen
static void
do_dlopen (void *ptr)
{
struct do_dlopen_args *args = (struct do_dlopen_args *) ptr;
/* Open and relocate the shared object. */
args->map = GLRO(dl_open) (args->name, args->mode, args->caller_dlopen,
__LM_ID_CALLER, __libc_argc, __libc_argv,
__environ);
}
1.6. frame 21: _dl_open
void *
_dl_open (const char *file, int mode, const void *caller_dlopen, Lmid_t nsid,
int argc, char *argv[], char *env[])
{
[...]
int errcode = _dl_catch_error (&objname, &errstring, &malloced,
dl_open_worker, &args);
[...]
}
1.7. frame 20: _dl_catch_error
同上。
1.8. frame 19: dl_open_worker
static void
dl_open_worker (void *a)
{
[...]
struct link_map *new;
args->map = new = _dl_map_object (call_map, file, lt_loaded, 0,
mode | __RTLD_CALLMAP, args->nsid);
[...]
}
1.9. frame 18: _dl_map_object
struct link_map *
internal_function
_dl_map_object (struct link_map *loader, const char *name,
int type, int trace_mode, int mode, Lmid_t nsid)
{
[...]
/* Look for this name among those already loaded. */
for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next)
{
[...]
soname = ((const char *) D_PTR (l, l_info[DT_STRTAB])
+ l->l_info[DT_SONAME]->d_un.d_val);
if (strcmp (name, soname) != 0)
continue;
[...]
}
[...]
}
1.10. frame 17: strcmp
2. 正常的pthread_cancel
栈
通过进行用户栈分析,正常的pthread_cancel
的代码路径为:
strcmp+0
_dl_check_map_versions+312
dl_open_worker+1153
_dl_catch_error+100
do_dlopen+66
start_thread+197
3. 分析
源码见下一章:
./a.out
Stack: 00000000908eadb7, 0x007ffc908eadb7.
Heap: 00000000008c9010, 0x000000008c9010.
Stack: 000000005625bf07, 0x007f975625bf07.
Heap: 00000000500008c0, 0x007f97500008c0.
Stack: 0000000055a5af07, 0x007f9755a5af07.
Heap: 00000000480008c0, 0x007f97480008c0.
cat /proc/$(pidof a.out)/maps
[...]
008c9000-008ea000 rw-p 00000000 00:00 0 [heap]
7f9748000000-7f9748021000 rw-p 00000000 00:00 0
7f9748021000-7f974c000000 ---p 00000000 00:00 0
7f9750000000-7f9750021000 rw-p 00000000 00:00 0
7f9750021000-7f9754000000 ---p 00000000 00:00 0
7f9755045000-7f975505a000 r-xp 00000000 fd:00 5053616 /usr/lib64/libgcc_s-4.8.5-20150702.so.1
[...]
源码见下一章:
_dl_check_map_versions(link map 0x00007f9750000b30, verbose 0, trace_mode 0)
_dl_check_map_versions
在glibc中的几个调用:
/* So far, so good. Now check the versions. */
for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
if (new->l_searchlist.r_list[i]->l_real->l_versions == NULL)
(void) _dl_check_map_versions (new->l_searchlist.r_list[i]->l_real,
0, 0);
int
internal_function
_dl_check_all_versions (struct link_map *map, int verbose, int trace_mode)
{
struct link_map *l;
int result = 0;
for (l = map; l != NULL; l = l->l_next)
result |= (! l->l_faked
&& _dl_check_map_versions (l, verbose, trace_mode));
return result;
}
4. 本文用到的所有代码
4.1. test.c
#include <stdio.h>
#include <malloc.h>
#include <pthread.h>
pthread_t t_1, t_2;
#define CHECK_STACK() \\
char __c = 'a'; \\
printf("Stack: %016x, %016p.\\n", &__c, &__c);
#define CHECK_HEAP() \\
char *____c = malloc(1024); \\
printf("Heap: %016x, %016p.\\n", ____c, ____c); \\
free(____c);
void *t1(void *arg) {
CHECK_STACK();
CHECK_HEAP();
while(1) {
sleep(1);
printf("t1 running.\\n");
}
}
void *t2(void *arg) {
CHECK_STACK();
CHECK_HEAP();
while(1) {
pthread_create(&t_1, NULL, t1, NULL);
sleep(10);
printf("cancel t1.\\n");
pthread_cancel(t_1);
printf("t2 exit.\\n");
sleep(3);
}
pthread_exit(NULL);
}
int main() {
CHECK_STACK();
CHECK_HEAP();
pthread_create(&t_2, NULL, t2, NULL);
pthread_join(t_2, NULL);
}
4.2. uprobe.bt
#!/usr/bin/env bpftrace
/*
* 分析 CuUpApp Coredump
* 荣涛 2021年5月17日
*/
//#include <link.h>
BEGIN
{
printf("Uprobe pthread... Hit Ctrl-C to end.\\n");
printf("%-6s %-16s %-6s\\n", "PID", "COMM", "TID");
}
uprobe:/lib64/ld-linux-x86-64.so.2:_dl_map_object
/comm == "a.out"/
{
printf("%-6d %-16s %s( ,%s, , ,%x, %d)\\n",
pid, comm, "_dl_map_object", str(arg1), arg4, arg5);
}
uprobe:/lib64/ld-linux-x86-64.so.2:strcmp
/comm == "a.out"/
{
// printf("%-6d %-16s %s(%s ,%s)\\n",
// pid, comm, "strcmp", str(arg0), str(arg1));
// @[ustack] = count();
}
uprobe:/lib64/ld-linux-x86-64.so.2:_dl_check_map_versions
/comm == "a.out"/
{
printf("%-6d %-16s %s(link map 0x%016lx, verbose %d, trace_mode %d)\\n",
pid, comm, "_dl_check_map_versions", arg0, arg1, arg2);
// @[ustack] = count();
}
END
{
printf("exit.\\n");
}
以上是关于记一次pthread_cancel_init段错误分析(bpftrace)的主要内容,如果未能解决你的问题,请参考以下文章
记一次pg_rman备份postgresql数据库报段错误的处理过程
记一次pg_rman备份postgresql数据库报段错误的处理过程