Linux内核学习第六周 进程描述与进程创建

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Linux内核学习第六周 进程描述与进程创建相关的知识,希望对你有一定的参考价值。

1.task_struct的数据结构

技术分享
1235struct task_struct {
1236    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
1237    void *stack;
1238    atomic_t usage;
1239    unsigned int flags;    /* per process flags, defined below */
1240    unsigned int ptrace;
1241
1242#ifdef CONFIG_SMP
1243    struct llist_node wake_entry;
1244    int on_cpu;
1245    struct task_struct *last_wakee;
1246    unsigned long wakee_flips;
1247    unsigned long wakee_flip_decay_ts;
1248
1249    int wake_cpu;
1250#endif
1251    int on_rq;
1252
1253    int prio, static_prio, normal_prio;
1254    unsigned int rt_priority;
1255    const struct sched_class *sched_class;
1256    struct sched_entity se;
1257    struct sched_rt_entity rt;
1258#ifdef CONFIG_CGROUP_SCHED
1259    struct task_group *sched_task_group;
1260#endif
1261    struct sched_dl_entity dl;
1262
1263#ifdef CONFIG_PREEMPT_NOTIFIERS
1264    /* list of struct preempt_notifier: */
1265    struct hlist_head preempt_notifiers;
1266#endif
1267
1268#ifdef CONFIG_BLK_DEV_IO_TRACE
1269    unsigned int btrace_seq;
1270#endif
1271
1272    unsigned int policy;
1273    int nr_cpus_allowed;
1274    cpumask_t cpus_allowed;
1275
1276#ifdef CONFIG_PREEMPT_RCU
1277    int rcu_read_lock_nesting;
1278    union rcu_special rcu_read_unlock_special;
1279    struct list_head rcu_node_entry;
1280#endif /* #ifdef CONFIG_PREEMPT_RCU */
1281#ifdef CONFIG_TREE_PREEMPT_RCU
1282    struct rcu_node *rcu_blocked_node;
1283#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1284#ifdef CONFIG_TASKS_RCU
1285    unsigned long rcu_tasks_nvcsw;
1286    bool rcu_tasks_holdout;
1287    struct list_head rcu_tasks_holdout_list;
1288    int rcu_tasks_idle_cpu;
1289#endif /* #ifdef CONFIG_TASKS_RCU */
1290
1291#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1292    struct sched_info sched_info;
1293#endif
1294
1295    struct list_head tasks;
1296#ifdef CONFIG_SMP
1297    struct plist_node pushable_tasks;
1298    struct rb_node pushable_dl_tasks;
1299#endif
1300
1301    struct mm_struct *mm, *active_mm;
1302#ifdef CONFIG_COMPAT_BRK
1303    unsigned brk_randomized:1;
1304#endif
1305    /* per-thread vma caching */
1306    u32 vmacache_seqnum;
1307    struct vm_area_struct *vmacache[VMACACHE_SIZE];
1308#if defined(SPLIT_RSS_COUNTING)
1309    struct task_rss_stat    rss_stat;
1310#endif
1311/* task state */
1312    int exit_state;
1313    int exit_code, exit_signal;
1314    int pdeath_signal;  /*  The signal sent when the parent dies  */
1315    unsigned int jobctl;    /* JOBCTL_*, siglock protected */
1316
1317    /* Used for emulating ABI behavior of previous Linux versions */
1318    unsigned int personality;
1319
1320    unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
1321                 * execve */
1322    unsigned in_iowait:1;
1323
1324    /* Revert to default priority/policy when forking */
1325    unsigned sched_reset_on_fork:1;
1326    unsigned sched_contributes_to_load:1;
1327
1328    unsigned long atomic_flags; /* Flags needing atomic access. */
1329
1330    pid_t pid;
1331    pid_t tgid;
1332
1333#ifdef CONFIG_CC_STACKPROTECTOR
1334    /* Canary value for the -fstack-protector gcc feature */
1335    unsigned long stack_canary;
1336#endif
1337    /*
1338     * pointers to (original) parent process, youngest child, younger sibling,
1339     * older sibling, respectively.  (p->father can be replaced with
1340     * p->real_parent->pid)
1341     */
1342    struct task_struct __rcu *real_parent; /* real parent process */
1343    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1344    /*
1345     * children/sibling forms the list of my natural children
1346     */
1347    struct list_head children;    /* list of my children */
1348    struct list_head sibling;    /* linkage in my parent‘s children list */
1349    struct task_struct *group_leader;    /* threadgroup leader */
1350
1351    /*
1352     * ptraced is the list of tasks this task is using ptrace on.
1353     * This includes both natural children and PTRACE_ATTACH targets.
1354     * p->ptrace_entry is p‘s link on the p->parent->ptraced list.
1355     */
1356    struct list_head ptraced;
1357    struct list_head ptrace_entry;
1358
1359    /* PID/PID hash table linkage. */
1360    struct pid_link pids[PIDTYPE_MAX];
1361    struct list_head thread_group;
1362    struct list_head thread_node;
1363
1364    struct completion *vfork_done;        /* for vfork() */
1365    int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
1366    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */
1367
1368    cputime_t utime, stime, utimescaled, stimescaled;
1369    cputime_t gtime;
1370#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1371    struct cputime prev_cputime;
1372#endif
1373#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1374    seqlock_t vtime_seqlock;
1375    unsigned long long vtime_snap;
1376    enum {
1377        VTIME_SLEEPING = 0,
1378        VTIME_USER,
1379        VTIME_SYS,
1380    } vtime_snap_whence;
1381#endif
1382    unsigned long nvcsw, nivcsw; /* context switch counts */
1383    u64 start_time;        /* monotonic time in nsec */
1384    u64 real_start_time;    /* boot based time in nsec */
1385/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1386    unsigned long min_flt, maj_flt;
1387
1388    struct task_cputime cputime_expires;
1389    struct list_head cpu_timers[3];
1390
1391/* process credentials */
1392    const struct cred __rcu *real_cred; /* objective and real subjective task
1393                     * credentials (COW) */
1394    const struct cred __rcu *cred;    /* effective (overridable) subjective task
1395                     * credentials (COW) */
1396    char comm[TASK_COMM_LEN]; /* executable name excluding path
1397                     - access with [gs]et_task_comm (which lock
1398                       it with task_lock())
1399                     - initialized normally by setup_new_exec */
1400/* file system info */
1401    int link_count, total_link_count;
1402#ifdef CONFIG_SYSVIPC
1403/* ipc stuff */
1404    struct sysv_sem sysvsem;
1405    struct sysv_shm sysvshm;
1406#endif
1407#ifdef CONFIG_DETECT_HUNG_TASK
1408/* hung task detection */
1409    unsigned long last_switch_count;
1410#endif
1411/* CPU-specific state of this task */
1412    struct thread_struct thread;
1413/* filesystem information */
1414    struct fs_struct *fs;
1415/* open file information */
1416    struct files_struct *files;
1417/* namespaces */
1418    struct nsproxy *nsproxy;
1419/* signal handlers */
1420    struct signal_struct *signal;
1421    struct sighand_struct *sighand;
1422
1423    sigset_t blocked, real_blocked;
1424    sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
1425    struct sigpending pending;
1426
1427    unsigned long sas_ss_sp;
1428    size_t sas_ss_size;
1429    int (*notifier)(void *priv);
1430    void *notifier_data;
1431    sigset_t *notifier_mask;
1432    struct callback_head *task_works;
1433
1434    struct audit_context *audit_context;
1435#ifdef CONFIG_AUDITSYSCALL
1436    kuid_t loginuid;
1437    unsigned int sessionid;
1438#endif
1439    struct seccomp seccomp;
1440
1441/* Thread group tracking */
1442       u32 parent_exec_id;
1443       u32 self_exec_id;
1444/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
1445 * mempolicy */
1446    spinlock_t alloc_lock;
1447
1448    /* Protection of the PI data structures: */
1449    raw_spinlock_t pi_lock;
1450
1451#ifdef CONFIG_RT_MUTEXES
1452    /* PI waiters blocked on a rt_mutex held by this task */
1453    struct rb_root pi_waiters;
1454    struct rb_node *pi_waiters_leftmost;
1455    /* Deadlock detection and priority inheritance handling */
1456    struct rt_mutex_waiter *pi_blocked_on;
1457#endif
1458
1459#ifdef CONFIG_DEBUG_MUTEXES
1460    /* mutex deadlock detection */
1461    struct mutex_waiter *blocked_on;
1462#endif
1463#ifdef CONFIG_TRACE_IRQFLAGS
1464    unsigned int irq_events;
1465    unsigned long hardirq_enable_ip;
1466    unsigned long hardirq_disable_ip;
1467    unsigned int hardirq_enable_event;
1468    unsigned int hardirq_disable_event;
1469    int hardirqs_enabled;
1470    int hardirq_context;
1471    unsigned long softirq_disable_ip;
1472    unsigned long softirq_enable_ip;
1473    unsigned int softirq_disable_event;
1474    unsigned int softirq_enable_event;
1475    int softirqs_enabled;
1476    int softirq_context;
1477#endif
1478#ifdef CONFIG_LOCKDEP
1479# define MAX_LOCK_DEPTH 48UL
1480    u64 curr_chain_key;
1481    int lockdep_depth;
1482    unsigned int lockdep_recursion;
1483    struct held_lock held_locks[MAX_LOCK_DEPTH];
1484    gfp_t lockdep_reclaim_gfp;
1485#endif
1486
1487/* journalling filesystem info */
1488    void *journal_info;
1489
1490/* stacked block device info */
1491    struct bio_list *bio_list;
1492
1493#ifdef CONFIG_BLOCK
1494/* stack plugging */
1495    struct blk_plug *plug;
1496#endif
1497
1498/* VM state */
1499    struct reclaim_state *reclaim_state;
1500
1501    struct backing_dev_info *backing_dev_info;
1502
1503    struct io_context *io_context;
1504
1505    unsigned long ptrace_message;
1506    siginfo_t *last_siginfo; /* For ptrace use.  */
1507    struct task_io_accounting ioac;
1508#if defined(CONFIG_TASK_XACCT)
1509    u64 acct_rss_mem1;    /* accumulated rss usage */
1510    u64 acct_vm_mem1;    /* accumulated virtual memory usage */
1511    cputime_t acct_timexpd;    /* stime + utime since last update */
1512#endif
1513#ifdef CONFIG_CPUSETS
1514    nodemask_t mems_allowed;    /* Protected by alloc_lock */
1515    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
1516    int cpuset_mem_spread_rotor;
1517    int cpuset_slab_spread_rotor;
1518#endif
1519#ifdef CONFIG_CGROUPS
1520    /* Control Group info protected by css_set_lock */
1521    struct css_set __rcu *cgroups;
1522    /* cg_list protected by css_set_lock and tsk->alloc_lock */
1523    struct list_head cg_list;
1524#endif
1525#ifdef CONFIG_FUTEX
1526    struct robust_list_head __user *robust_list;
1527#ifdef CONFIG_COMPAT
1528    struct compat_robust_list_head __user *compat_robust_list;
1529#endif
1530    struct list_head pi_state_list;
1531    struct futex_pi_state *pi_state_cache;
1532#endif
1533#ifdef CONFIG_PERF_EVENTS
1534    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1535    struct mutex perf_event_mutex;
1536    struct list_head perf_event_list;
1537#endif
1538#ifdef CONFIG_DEBUG_PREEMPT
1539    unsigned long preempt_disable_ip;
1540#endif
1541#ifdef CONFIG_NUMA
1542    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
1543    short il_next;
1544    short pref_node_fork;
1545#endif
1546#ifdef CONFIG_NUMA_BALANCING
1547    int numa_scan_seq;
1548    unsigned int numa_scan_period;
1549    unsigned int numa_scan_period_max;
1550    int numa_preferred_nid;
1551    unsigned long numa_migrate_retry;
1552    u64 node_stamp;            /* migration stamp  */
1553    u64 last_task_numa_placement;
1554    u64 last_sum_exec_runtime;
1555    struct callback_head numa_work;
1556
1557    struct list_head numa_entry;
1558    struct numa_group *numa_group;
1559
1560    /*
1561     * Exponential decaying average of faults on a per-node basis.
1562     * Scheduling placement decisions are made based on the these counts.
1563     * The values remain static for the duration of a PTE scan
1564     */
1565    unsigned long *numa_faults_memory;
1566    unsigned long total_numa_faults;
1567
1568    /*
1569     * numa_faults_buffer records faults per node during the current
1570     * scan window. When the scan completes, the counts in
1571     * numa_faults_memory decay and these values are copied.
1572     */
1573    unsigned long *numa_faults_buffer_memory;
1574
1575    /*
1576     * Track the nodes the process was running on when a NUMA hinting
1577     * fault was incurred.
1578     */
1579    unsigned long *numa_faults_cpu;
1580    unsigned long *numa_faults_buffer_cpu;
1581
1582    /*
1583     * numa_faults_locality tracks if faults recorded during the last
1584     * scan window were remote/local. The task scan period is adapted
1585     * based on the locality of the faults with different weights
1586     * depending on whether they were shared or private faults
1587     */
1588    unsigned long numa_faults_locality[2];
1589
1590    unsigned long numa_pages_migrated;
1591#endif /* CONFIG_NUMA_BALANCING */
1592
1593    struct rcu_head rcu;
1594
1595    /*
1596     * cache last used pipe for splice
1597     */
1598    struct pipe_inode_info *splice_pipe;
1599
1600    struct page_frag task_frag;
1601
1602#ifdef    CONFIG_TASK_DELAY_ACCT
1603    struct task_delay_info *delays;
1604#endif
1605#ifdef CONFIG_FAULT_INJECTION
1606    int make_it_fail;
1607#endif
1608    /*
1609     * when (nr_dirtied >= nr_dirtied_pause), it‘s time to call
1610     * balance_dirty_pages() for some dirty throttling pause
1611     */
1612    int nr_dirtied;
1613    int nr_dirtied_pause;
1614    unsigned long dirty_paused_when; /* start of a write-and-pause period */
1615
1616#ifdef CONFIG_LATENCYTOP
1617    int latency_record_count;
1618    struct latency_record latency_record[LT_SAVECOUNT];
1619#endif
1620    /*
1621     * time slack values; these are used to round up poll() and
1622     * select() etc timeout values. These are in nanoseconds.
1623     */
1624    unsigned long timer_slack_ns;
1625    unsigned long default_timer_slack_ns;
1626
1627#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1628    /* Index of current stored address in ret_stack */
1629    int curr_ret_stack;
1630    /* Stack of return addresses for return function tracing */
1631    struct ftrace_ret_stack    *ret_stack;
1632    /* time stamp for last schedule */
1633    unsigned long long ftrace_timestamp;
1634    /*
1635     * Number of functions that haven‘t been traced
1636     * because of depth overrun.
1637     */
1638    atomic_t trace_overrun;
1639    /* Pause for the tracing */
1640    atomic_t tracing_graph_pause;
1641#endif
1642#ifdef CONFIG_TRACING
1643    /* state flags for use by tracers */
1644    unsigned long trace;
1645    /* bitmask and counter of trace recursion */
1646    unsigned long trace_recursion;
1647#endif /* CONFIG_TRACING */
1648#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
1649    unsigned int memcg_kmem_skip_account;
1650    struct memcg_oom_info {
1651        struct mem_cgroup *memcg;
1652        gfp_t gfp_mask;
1653        int order;
1654        unsigned int may_oom:1;
1655    } memcg_oom;
1656#endif
1657#ifdef CONFIG_UPROBES
1658    struct uprobe_task *utask;
1659#endif
1660#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1661    unsigned int    sequential_io;
1662    unsigned int    sequential_io_avg;
1663#endif
1664};
task_struct

 2.进程创建

Linux 系统中fork 、vfork 、clone等函数都可以用来创建一个新的进程,调用函数如下所示:

SYSCALL_DEFINE0(fork) 
{ 
  return do_fork(SIGCHLD, 0, 0, NULL, NULL);
}

SYSCALL_DEFINE0(vfork)
{ 
  return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL);
}

SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 
                int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val)
{
  return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
}

可以看到,fork 、vfork 、clone 等函数对应的系统调用,也都是调用了 do_fork 函数实现的。在do_fork函数中,真正实现复制的是copy_process函数:


p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);
p = dup_task_struct(current);创建内核栈
retval = security_task_create(clone_flags);
retval = sched_fork(clone_flags, p);和调度相关的设置,cpu将调度这个task
retval = copy_thread(clone_flags, stack_start, stack_size, p); 复制父进程堆栈的内容到子进程的堆栈中去.

这其中,copy_thread函数中的语句p->thread.ip = (unsigned long) ret_from_fork决定了新进程的第一条指令地址。

创建栈函数dup_task_struct:

tsk = alloc_task_struct_node(node);开辟内存空间
ti = alloc_thread_info_node(tsk, node);ti指向thread_info的首地址,同时也是系统为新进程分配的两个连续页面的首地址。
err = arch_dup_task_struct(tsk, orig);复制父进程的task_struct信息到新的task_struct里
tsk->stack = ti;task对应栈
setup_thread_stack(tsk, orig);初始化thread info结构
set_task_stack_end_magic(tsk);栈结束的地址设置数据为栈结束标示

3.新进程开始执行

在之前的分析中,谈到copy_process中的copy_thread()函数,正是这个函数决定了子进程从系统调用中返回后的执行.ret_from_fork决定了新进程的第一条指令地址。p->thread.ip = (unsigned long)ret_from_fork;将子进程的ip设置为ret_from_fork的首地址,子进程从ret_from_fork开始执行。

4.执行起点与内核堆栈如何保持一致

  • 在ret_from_fork之前,也就是在copy_thread()函数中*childregs = *current_pt_regs();该句将父进程的regs参数赋值到子进程的内核堆栈,
  • *childregs的类型为pt_regs,里面存放了SAVE ALL中压入栈的参数
  •  故在之后的RESTORE ALL中能顺利执行下去
5.总结
linux通过复制父进程创建子进程,linux为每个新创建的进程动态分配一个task_struct结构,fork被调用一次,返回两次。

以上是关于Linux内核学习第六周 进程描述与进程创建的主要内容,如果未能解决你的问题,请参考以下文章

LINUX内核分析第六周学习总结——进程的描述和进程的创建

20135201李辰希《Linux内核分析》第六周 进程的描述与创建

《Linux内核分析》第六周学习总结

《Linux内核分析》第六周学习笔记

Linux内核分析——第六周学习笔记20135308

《Linux内核分析》第六周笔记 进程的描述和进程的创建