linux内核情景分析之exit与Wait
Posted 笨拙的菜鸟
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了linux内核情景分析之exit与Wait相关的知识,希望对你有一定的参考价值。
//第一层系统调用
asmlinkage long sys_exit(int error_code)
{
do_exit((error_code&0xff)<<8);
}
其主体是do_exit,接下来我们来看看do_exit的实现
NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;//获取当前进程描述符
if (in_interrupt())//禁止中断时调用do_exit
panic("Aiee, killing interrupt handler!");
if (!tsk->pid)//空转进程也就是0号进程禁止退出
panic("Attempted to kill the idle task!");
if (tsk->pid == 1)//1号进程禁止退出
panic("Attempted to kill init!");
tsk->flags |= PF_EXITING;//退出进程时,设置此标志位
/*
进程退出时,可能已经设置了实时定时器,real_timer已挂载到内核定时器队列,
现在进程要退出,没必要存在了,就把当前进程从定时器队列中脱离出来
*/
del_timer_sync(&tsk->real_timer);
fake_volatile:
#ifdef CONFIG_BSD_PROCESS_ACCT
acct_process(code);
#endif
/*如果是指针共享,那就只是减少mm->mm_users,
如果有独立的进程空间,那就直接释放页表,mm_struct,vm_struct
以及所有的vma*/
__exit_mm(tsk);
//加锁
lock_kernel();
//如果调用exit()之前该信号量还没退出,那就把它撤销
sem_exit();
//如果只是指针共享,那就减少files_struct->count,如果是独享,那就销毁
__exit_files(tsk);
//以上相同,释放fs->count
__exit_fs(tsk);
//释放信号处理函数表
exit_sighand(tsk);
//空函数
exit_thread();
///表示进程是否为会话主管
if (current->leader)
disassociate_ctty(1);//删除终端,释放tty
//若正在执行的代码是符合iBCS2标准的程序,则减少相对应模块的引用数目
put_exec_domain(tsk->exec_domain);
/* 若正在执行的代码属于全局执行文
件结构格则减少相对应模块的引用数目 */
if (tsk->binfmt && tsk->binfmt->module)
__MOD_DEC_USE_COUNT(tsk->binfmt->module);
tsk->exit_code = code;
//将当前进程设置为僵死状态;并给父进程发信号;其当前进程的子进程的父进程设置为init进程或者其他线程
exit_notify();
schedule();
BUG();
接着挨个分析释放资源相关函数(信号量就等到进程间通信学完再分析)
static inline void __exit_mm(struct task_struct * tsk)
{
struct mm_struct * mm = tsk->mm;//获取当前进程的内存描述符
mm_release();//唤醒睡眠的父进程
if (mm) {
atomic_inc(&mm->mm_count);
if (mm != tsk->active_mm) BUG();//确保mm与active_mm一样
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;//设置为NULL
task_unlock(tsk);
//刷新tlb
enter_lazy_tlb(mm, current, smp_processor_id());
mmput(mm);//释放页表等等
}
}
以上资源释放完后,进程设置为僵尸状态,还保留pcb以及内核栈,自己并不释放而是由父进程负责,将调用exit_notify()通知其父进程
原因:让父进程可以统计信息,接下来看看exit_notify()
/*
* Send signals to all our closest relatives so that they know
* to properly mourn us..
*/
static void exit_notify(void)
{
struct task_struct * p, *t;
//其当前进程的子进程的父进程设置为init进程,如果父进程是线程,那就托孤给其他线程
forget_original_parent(current);
/*
* Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*
* Case i: Our father is in a different pgrp than we are
* and we were the only connection outside, so our pgrp
* is about to become orphaned.
*/
t = current->p_pptr;//获取其养父
/*
如果当前进程与父进程属于相同的会话,又处于不同的组,当前进程挂了,整个组如果成了孤儿组,那就要
给这个进程组的所有进程发送一个SIGHUP跟SIGCONT信号
*/
if ((t->pgrp != current->pgrp) &&//组不同而会话相同
(t->session == current->session) &&
will_become_orphaned_pgrp(current->pgrp, current) &&//判断是否是孤儿进程组
has_stopped_jobs(current->pgrp)) {////如果进程组中有处于TASK_STOP状态的进程
kill_pg(current->pgrp,SIGHUP,1);//先发送SIGHUP在发送SIGCONT
kill_pg(current->pgrp,SIGCONT,1);
}
/* Let father know we died
*
* Thread signals are configurable, but you aren‘t going to use
* that to send signals to arbitary processes.
* That stops right now.
*
* If the parent exec id doesn‘t match the exec id we saved
* when we started then we know the parent has changed security
* domain.
*
* If our self_exec id doesn‘t match our parent_exec_id then
* we have changed execution domain as these two values started
* the same after a fork.
*
*/
if(current->exit_signal != SIGCHLD &&
( current->parent_exec_id != t->self_exec_id ||
current->self_exec_id != current->parent_exec_id)
&& !capable(CAP_KILL))
current->exit_signal = SIGCHLD;//给父进程发的信号是SIGCHLD
/** This loop does two things:
*
* A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
write_lock_irq(&tasklist_lock);
current->state = TASK_ZOMBIE;//设置为僵尸进程
do_notify_parent(current, current->exit_signal);//由父进程来料理后事
//将子进程队列中的每个进程都转移到托孤的父进程的子进程队列中去
while (current->p_cptr != NULL) {//p_cptr表示子进程
p = current->p_cptr;//p指向子进程
current->p_cptr = p->p_osptr;//子进程指向子进程他哥,形成一个队列
p->p_ysptr = NULL;//子进程的滴滴设置为0
p->ptrace = 0;
p->p_pptr = p->p_opptr;//将养父改为亲父
p->p_osptr = p->p_pptr->p_cptr;//子进程的哥哥改为子进程的养父的子进程,移到子进程队列
if (p->p_osptr)
p->p_osptr->p_ysptr = p;
p->p_pptr->p_cptr = p;
if (p->state == TASK_ZOMBIE)//并且判断每个子进程是否是僵尸状态
do_notify_parent(p, p->exit_signal);
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
孤儿进程组: 一个进程组中的所有进程的父进程要么是该进程组的一个进程,
要么不是该进程组所在的会话中的进程。 一个进程组不是孤儿进程组的条件是,
该组中有一个进程其父进程在属于同一个会话的另一个组中。
*/
if ((p->pgrp != current->pgrp) &&
(p->session == current->session)) {
int pgrp = p->pgrp;
write_unlock_irq(&tasklist_lock);
//父进程所在的组是否是孤儿进程组,以及是否含有stop进程
if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
kill_pg(pgrp,SIGHUP,1);
kill_pg(pgrp,SIGCONT,1);
}
write_lock_irq(&tasklist_lock);
}
}
write_unlock_irq(&tasklist_lock);
}
子进程托孤给其他进程(如果该进程是线程,也就是含有其他线程),否则托孤给init进程
/*
* When we die, we re-parent all our children.
* Try to give them to another thread in our process
* group, and if no such member exists, give it to
* the global child reaper process (ie "init")
*/
static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct * p, *reaper;
read_lock(&tasklist_lock);
/* 获取当前用户空间的下一线程 */
reaper = next_thread(father);
if (reaper == father)//如果相等说明是进程,不是线程组,那就只能托孤给init进程
reaper = child_reaper;//init进程
for_each_task(p) {
if (p->p_opptr == father) {//搜索所有task_struct数据结构,发现其进程生父就是要退出的进程
/* We dont want people slaying init */
p->exit_signal = SIGCHLD;//设置发送信号
p->self_exec_id++;
p->p_opptr = reaper;//将要死的进程的子进程托孤给reaper(当前线程的其他线程或者init进程?
if (p->pdeath_signal)
send_sig(p->pdeath_signal, p, 0);//发送信号,告知儿子死了
}
}
read_unlock(&tasklist_lock);
}
接下来查看do_notify_parent发送信号给父进程
/*
* Let a parent know about a status change of a child.
让一个父亲知道有关儿子的改变
参数为当前要退出进程,以及信号
*/
void do_notify_parent(struct task_struct *tsk, int sig)
{
struct siginfo info;
int why, status;
info.si_signo = sig;
info.si_errno = 0;
info.si_pid = tsk->pid;
info.si_uid = tsk->uid;
/* FIXME: find out whether or not this is supposed to be c*time. */
info.si_utime = tsk->times.tms_utime;
info.si_stime = tsk->times.tms_stime;
status = tsk->exit_code & 0x7f;
why = SI_KERNEL; /* shouldn‘t happen */
switch (tsk->state) {
case TASK_STOPPED:
/* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */
if (tsk->ptrace & PT_PTRACED)
why = CLD_TRAPPED;
else
why = CLD_STOPPED;
break;
default:
if (tsk->exit_code & 0x80)
why = CLD_DUMPED;
else if (tsk->exit_code & 0x7f)
why = CLD_KILLED;
else {
why = CLD_EXITED;
status = tsk->exit_code >> 8;
}
break;
}
info.si_code = why;
info.si_status = status;
send_sig_info(sig, &info, tsk->p_pptr);//发送信号
wake_up_parent(tsk->p_pptr);//唤醒父进程
}
/*
* This function is typically called only by the session leader, when
* it wants to disassociate itself from its controlling tty.
*
* It performs the following functions:
* (1) Sends a SIGHUP and SIGCONT to the foreground process group
* (2) Clears the tty from being controlling the session
* (3) Clears the controlling tty for all processes in the
* session group.
*当前进程是一个会话的主进程(current->leader非0)那就还要将整个session与中断切断,并释放tty,pcb有个tty指针
* The argument on_exit is set to 1 if called when a process is
* exiting; it is 0 if called by the ioctl TIOCNOTTY.
*/
void disassociate_ctty(int on_exit)
{
struct tty_struct *tty = current->tty;//获取当前进程的tty
struct task_struct *p;
int tty_pgrp = -1;
if (tty) {
tty_pgrp = tty->pgrp;//获取进程组的tty
if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)//统计tty设备打开的次数
tty_vhangup(tty);
} else {
if (current->tty_old_pgrp) {
kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);//给当前进程组发送sighup与sigcont信号
kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
}
return;
}
if (tty_pgrp > 0) {
kill_pg(tty_pgrp, SIGHUP, on_exit);
if (!on_exit)
kill_pg(tty_pgrp, SIGCONT, on_exit);
}
current->tty_old_pgrp = 0;//进程控制终端所在的组标识设置为0
tty->session = 0;//会话设置为0
tty->pgrp = -1;//组设置为-1
read_lock(&tasklist_lock);
for_each_task(p)//遍历每个进程是否位于同一会话
if (p->session == current->session)//当前进程是会话的主进程
p->tty = NULL;//切断tty终端
read_unlock(&tasklist_lock);
}
do_exit流程:
禁止中断调用,0号进程,1号进程退出
如果有独立空间那就删除独立空间,释放页表,释放信号量,释放文件对象,释放信号处理函数表
如果是会话控制进程,删除终端,释放tty,接下来调用exit_notify()函数
如果当前进程是是线程(也就包含其他线程,非独享),托孤给其他线程,否则托孤给init进程
判断当前进程退出是否会导致孤儿进程组出现
设置发送信号为SIGCHLD,将当前进程设置为僵尸状态,接着调用do_notify_parent发送信号给父进程,并唤醒父进程
并将僵尸进程的所有子进程的队列移到托孤的队列.最后shedule()
//等待子进程的pid
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
int flag, retval;
DECLARE_WAITQUEUE(wait, current);//为当前进程分配一个waitqueue结构
struct task_struct *tsk;
if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
//添加到当前进程的waitchldexit对列中
add_wait_queue(¤t->wait_chldexit,&wait);
repeat:
flag = 0;
current->state = TASK_INTERRUPTIBLE;//设置为睡眠,让其他进程先运行,等待子进程挂了
read_lock(&tasklist_lock);
tsk = current;
do {
struct task_struct *p;
for (p = tsk->p_cptr ; p ; p = p->p_osptr) {//p表示当前进程的子进程
if (pid>0) {
if (p->pid != pid)//是否等于参数pid,不等于就继续
continue;
} else if (!pid) {//不是0号进程
if (p->pgrp != current->pgrp)
continue;
} else if (pid != -1) {//不是-1(随便)
if (p->pgrp != -pid)
continue;
}
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
* A "clone" child here is one that reports to its parent
* using a signal other than SIGCHLD.) */
//判断子进程的信号是否是sigchld
if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
&& !(options & __WALL))
continue;
flag = 1;//表示是当前进程的子进程
switch (p->state) {
case TASK_STOPPED://等待子进程被跟踪
if (!p->exit_code)//是否设置了退出码
continue;
if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))//判断条件是否跟踪
continue;
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
if (!retval) {
p->exit_code = 0;
retval = p->pid;
}
goto end_wait4;//满足直接跳到end_wait4
case TASK_ZOMBIE://僵尸状态
current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user(p->exit_code, stat_addr);//指定位置保存退出码
if (retval)
goto end_wait4;
retval = p->pid;
if (p->p_opptr != p->p_pptr) {//生父与养父是否相同
write_lock_irq(&tasklist_lock);
REMOVE_LINKS(p);//将task_struct从养父队列中脱离出来
p->p_pptr = p->p_opptr;//将养父设置为生父
SET_LINKS(p);
do_notify_parent(p, SIGCHLD);//通知生父进程自己挂了
write_unlock_irq(&tasklist_lock);
} else
release_task(p);//释放残留的资源如pcb等等
goto end_wait4;//子进程处于僵死状态,goto end_wait4
default:
continue;
}
}
if (options & __WNOTHREAD)//如果设置了wnothread直接跳出
break;
tsk = next_thread(tsk);//到同一进程的寻找下一个线程,一线程创建的子进程挂了,其他线程调用wait应该没用吧?
} while (tsk != current);
read_unlock(&tasklist_lock);
if (flag) {//如果pid不是当前进程的子进程,直接到end_wait4
retval = 0;
if (options & WNOHANG)//设置了wnohang
goto end_wait4;
retval = -ERESTARTSYS;
if (signal_pending(current))//当前进程是否有信号未处理
goto end_wait4;
schedule();//被调度.等待被子进程唤醒
goto repeat;
}
retval = -ECHILD;
end_wait4:
current->state = TASK_RUNNING;//将当前进程改为可运行状态
remove_wait_queue(¤t->wait_chldexit,&wait);
return retval;
}
下列条件之一得到满足时才结束,goto end_wait4:
1、所等待的子进程的状态变成TASK_STOPPED,TASK_ZOMBIE;
2、所等待的子进程存在,可不在上述两个状态,而调用参数options中的WHONANG标志位为1,或者当前进程接受到了其他的信号;
3、进程号pid的那个进程根本不存在,或者不是当前进程的子进程。
否则,当前进程将其自身的状态设成TASK_INTERRUPTIBLE,并调用schedule()。
释放残余的子进程资源
static void release_task(struct task_struct * p)//释放子进程留下的资源
{
if (p != current) {
#ifdef CONFIG_SMP
/*
* Wait to make sure the process isn‘t on the
* runqueue (active on some other CPU still)
*/
for (;;) {
task_lock(p);
if (!p->has_cpu)
break;
task_unlock(p);
do {
barrier();
} while (p->has_cpu);
}
task_unlock(p);
#endif
atomic_dec(&p->user->processes);//子进程数目减少
free_uid(p->user);//是否uid
unhash_process(p);//把子进程的pcb从队列摘下来
release_thread(p);//检查进程的LDT是否已释放
current->cmin_flt += p->min_flt + p->cmin_flt;
current->cmaj_flt += p->maj_flt + p->cmaj_flt;
current->cnswap += p->nswap + p->cnswap;
/*
* Potentially available timeslices are retrieved
* here - this way the parent does not get penalized
* for creating too many processes.
*
* (this cannot be used to artificially ‘generate‘
* timeslices, because any timeslice recovered here
* was given away by the parent in the first place.)
*/
current->counter += p->counter;
if (current->counter >= MAX_COUNTER)
current->counter = MAX_COUNTER;
free_task_struct(p);//将2个物理页大小的pcb释放
} else {
printk("task releasing itself\n");
}
}
以上是关于linux内核情景分析之exit与Wait的主要内容,如果未能解决你的问题,请参考以下文章