Linux如何创建一个新进程

Posted 秦时明月0515

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Linux如何创建一个新进程相关的知识,希望对你有一定的参考价值。

2016-03-31

张超《Linux内核分析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000

Linux如何创建一个新进程

1.我们先阅读理解task_struct数据结构

1235struct task_struct {
1236    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
1237    void *stack;
1238    atomic_t usage;
1239    unsigned int flags;    /* per process flags, defined below */
1240    unsigned int ptrace;
1241
1242#ifdef CONFIG_SMP
1243    struct llist_node wake_entry;
1244    int on_cpu;
1245    struct task_struct *last_wakee;
1246    unsigned long wakee_flips;
1247    unsigned long wakee_flip_decay_ts;
1248
1249    int wake_cpu;
1250#endif
1251    int on_rq;
1252
1253    int prio, static_prio, normal_prio;
1254    unsigned int rt_priority;
1255    const struct sched_class *sched_class;
1256    struct sched_entity se;
1257    struct sched_rt_entity rt;
1258#ifdef CONFIG_CGROUP_SCHED
1259    struct task_group *sched_task_group;
1260#endif
1261    struct sched_dl_entity dl;
1262
1263#ifdef CONFIG_PREEMPT_NOTIFIERS
1264    /* list of struct preempt_notifier: */
1265    struct hlist_head preempt_notifiers;
1266#endif
1267
1268#ifdef CONFIG_BLK_DEV_IO_TRACE
1269    unsigned int btrace_seq;
1270#endif
1271
1272    unsigned int policy;
1273    int nr_cpus_allowed;
1274    cpumask_t cpus_allowed;
1275
1276#ifdef CONFIG_PREEMPT_RCU
1277    int rcu_read_lock_nesting;
1278    union rcu_special rcu_read_unlock_special;
1279    struct list_head rcu_node_entry;
1280#endif /* #ifdef CONFIG_PREEMPT_RCU */
1281#ifdef CONFIG_TREE_PREEMPT_RCU
1282    struct rcu_node *rcu_blocked_node;
1283#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1284#ifdef CONFIG_TASKS_RCU
1285    unsigned long rcu_tasks_nvcsw;
1286    bool rcu_tasks_holdout;
1287    struct list_head rcu_tasks_holdout_list;
1288    int rcu_tasks_idle_cpu;
1289#endif /* #ifdef CONFIG_TASKS_RCU */
1290
1291#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1292    struct sched_info sched_info;
1293#endif
1294
1295    struct list_head tasks;
1296#ifdef CONFIG_SMP
1297    struct plist_node pushable_tasks;
1298    struct rb_node pushable_dl_tasks;
1299#endif
1300
1301    struct mm_struct *mm, *active_mm;
1302#ifdef CONFIG_COMPAT_BRK
1303    unsigned brk_randomized:1;
1304#endif
1305    /* per-thread vma caching */
1306    u32 vmacache_seqnum;
1307    struct vm_area_struct *vmacache[VMACACHE_SIZE];
1308#if defined(SPLIT_RSS_COUNTING)
1309    struct task_rss_stat    rss_stat;
1310#endif
1311/* task state */
1312    int exit_state;
1313    int exit_code, exit_signal;
1314    int pdeath_signal;  /*  The signal sent when the parent dies  */
1315    unsigned int jobctl;    /* JOBCTL_*, siglock protected */
1316
1317    /* Used for emulating ABI behavior of previous Linux versions */
1318    unsigned int personality;
1319
1320    unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
1321                 * execve */
1322    unsigned in_iowait:1;
1323
1324    /* Revert to default priority/policy when forking */
1325    unsigned sched_reset_on_fork:1;
1326    unsigned sched_contributes_to_load:1;
1327
1328    unsigned long atomic_flags; /* Flags needing atomic access. */
1329
1330    pid_t pid;
1331    pid_t tgid;
1332
1333#ifdef CONFIG_CC_STACKPROTECTOR
1334    /* Canary value for the -fstack-protector gcc feature */
1335    unsigned long stack_canary;
1336#endif
1337    /*
1338     * pointers to (original) parent process, youngest child, younger sibling,
1339     * older sibling, respectively.  (p->father can be replaced with
1340     * p->real_parent->pid)
1341     */
1342    struct task_struct __rcu *real_parent; /* real parent process */
1343    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1344    /*
1345     * children/sibling forms the list of my natural children
1346     */
1347    struct list_head children;    /* list of my children */
1348    struct list_head sibling;    /* linkage in my parent‘s children list */
1349    struct task_struct *group_leader;    /* threadgroup leader */
1350
1351    /*
1352     * ptraced is the list of tasks this task is using ptrace on.
1353     * This includes both natural children and PTRACE_ATTACH targets.
1354     * p->ptrace_entry is p‘s link on the p->parent->ptraced list.
1355     */
1356    struct list_head ptraced;
1357    struct list_head ptrace_entry;
1358
1359    /* PID/PID hash table linkage. */
1360    struct pid_link pids[PIDTYPE_MAX];
1361    struct list_head thread_group;
1362    struct list_head thread_node;
1363
1364    struct completion *vfork_done;        /* for vfork() */
1365    int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
1366    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */
1367
1368    cputime_t utime, stime, utimescaled, stimescaled;
1369    cputime_t gtime;
1370#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1371    struct cputime prev_cputime;
1372#endif
1373#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1374    seqlock_t vtime_seqlock;
1375    unsigned long long vtime_snap;
1376    enum {
1377        VTIME_SLEEPING = 0,
1378        VTIME_USER,
1379        VTIME_SYS,
1380    } vtime_snap_whence;
1381#endif
1382    unsigned long nvcsw, nivcsw; /* context switch counts */
1383    u64 start_time;        /* monotonic time in nsec */
1384    u64 real_start_time;    /* boot based time in nsec */
1385/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1386    unsigned long min_flt, maj_flt;
1387
1388    struct task_cputime cputime_expires;
1389    struct list_head cpu_timers[3];
1390
1391/* process credentials */
1392    const struct cred __rcu *real_cred; /* objective and real subjective task
1393                     * credentials (COW) */
1394    const struct cred __rcu *cred;    /* effective (overridable) subjective task
1395                     * credentials (COW) */
1396    char comm[TASK_COMM_LEN]; /* executable name excluding path
1397                     - access with [gs]et_task_comm (which lock
1398                       it with task_lock())
1399                     - initialized normally by setup_new_exec */
1400/* file system info */
1401    int link_count, total_link_count;
1402#ifdef CONFIG_SYSVIPC
1403/* ipc stuff */
1404    struct sysv_sem sysvsem;
1405    struct sysv_shm sysvshm;
1406#endif
1407#ifdef CONFIG_DETECT_HUNG_TASK
1408/* hung task detection */
1409    unsigned long last_switch_count;
1410#endif
1411/* CPU-specific state of this task */
1412    struct thread_struct thread;
1413/* filesystem information */
1414    struct fs_struct *fs;
1415/* open file information */
1416    struct files_struct *files;
1417/* namespaces */
1418    struct nsproxy *nsproxy;
1419/* signal handlers */
1420    struct signal_struct *signal;
1421    struct sighand_struct *sighand;
1422
1423    sigset_t blocked, real_blocked;
1424    sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
1425    struct sigpending pending;
1426
1427    unsigned long sas_ss_sp;
1428    size_t sas_ss_size;
1429    int (*notifier)(void *priv);
1430    void *notifier_data;
1431    sigset_t *notifier_mask;
1432    struct callback_head *task_works;
1433
1434    struct audit_context *audit_context;
1435#ifdef CONFIG_AUDITSYSCALL
1436    kuid_t loginuid;
1437    unsigned int sessionid;
1438#endif
1439    struct seccomp seccomp;
1440
1441/* Thread group tracking */
1442       u32 parent_exec_id;
1443       u32 self_exec_id;
1444/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
1445 * mempolicy */
1446    spinlock_t alloc_lock;
1447
1448    /* Protection of the PI data structures: */
1449    raw_spinlock_t pi_lock;
1450
1451#ifdef CONFIG_RT_MUTEXES
1452    /* PI waiters blocked on a rt_mutex held by this task */
1453    struct rb_root pi_waiters;
1454    struct rb_node *pi_waiters_leftmost;
1455    /* Deadlock detection and priority inheritance handling */
1456    struct rt_mutex_waiter *pi_blocked_on;
1457#endif
1458
1459#ifdef CONFIG_DEBUG_MUTEXES
1460    /* mutex deadlock detection */
1461    struct mutex_waiter *blocked_on;
1462#endif
1463#ifdef CONFIG_TRACE_IRQFLAGS
1464    unsigned int irq_events;
1465    unsigned long hardirq_enable_ip;
1466    unsigned long hardirq_disable_ip;
1467    unsigned int hardirq_enable_event;
1468    unsigned int hardirq_disable_event;
1469    int hardirqs_enabled;
1470    int hardirq_context;
1471    unsigned long softirq_disable_ip;
1472    unsigned long softirq_enable_ip;
1473    unsigned int softirq_disable_event;
1474    unsigned int softirq_enable_event;
1475    int softirqs_enabled;
1476    int softirq_context;
1477#endif
1478#ifdef CONFIG_LOCKDEP
1479# define MAX_LOCK_DEPTH 48UL
1480    u64 curr_chain_key;
1481    int lockdep_depth;
1482    unsigned int lockdep_recursion;
1483    struct held_lock held_locks[MAX_LOCK_DEPTH];
1484    gfp_t lockdep_reclaim_gfp;
1485#endif
1486
1487/* journalling filesystem info */
1488    void *journal_info;
1489
1490/* stacked block device info */
1491    struct bio_list *bio_list;
1492
1493#ifdef CONFIG_BLOCK
1494/* stack plugging */
1495    struct blk_plug *plug;
1496#endif
1497
1498/* VM state */
1499    struct reclaim_state *reclaim_state;
1500
1501    struct backing_dev_info *backing_dev_info;
1502
1503    struct io_context *io_context;
1504
1505    unsigned long ptrace_message;
1506    siginfo_t *last_siginfo; /* For ptrace use.  */
1507    struct task_io_accounting ioac;
1508#if defined(CONFIG_TASK_XACCT)
1509    u64 acct_rss_mem1;    /* accumulated rss usage */
1510    u64 acct_vm_mem1;    /* accumulated virtual memory usage */
1511    cputime_t acct_timexpd;    /* stime + utime since last update */
1512#endif
1513#ifdef CONFIG_CPUSETS
1514    nodemask_t mems_allowed;    /* Protected by alloc_lock */
1515    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
1516    int cpuset_mem_spread_rotor;
1517    int cpuset_slab_spread_rotor;
1518#endif
1519#ifdef CONFIG_CGROUPS
1520    /* Control Group info protected by css_set_lock */
1521    struct css_set __rcu *cgroups;
1522    /* cg_list protected by css_set_lock and tsk->alloc_lock */
1523    struct list_head cg_list;
1524#endif
1525#ifdef CONFIG_FUTEX
1526    struct robust_list_head __user *robust_list;
1527#ifdef CONFIG_COMPAT
1528    struct compat_robust_list_head __user *compat_robust_list;
1529#endif
1530    struct list_head pi_state_list;
1531    struct futex_pi_state *pi_state_cache;
1532#endif
1533#ifdef CONFIG_PERF_EVENTS
1534    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1535    struct mutex perf_event_mutex;
1536    struct list_head perf_event_list;
1537#endif
1538#ifdef CONFIG_DEBUG_PREEMPT
1539    unsigned long preempt_disable_ip;
1540#endif
1541#ifdef CONFIG_NUMA
1542    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
1543    short il_next;
1544    short pref_node_fork;
1545#endif
1546#ifdef CONFIG_NUMA_BALANCING
1547    int numa_scan_seq;
1548    unsigned int numa_scan_period;
1549    unsigned int numa_scan_period_max;
1550    int numa_preferred_nid;
1551    unsigned long numa_migrate_retry;
1552    u64 node_stamp;            /* migration stamp  */
1553    u64 last_task_numa_placement;
1554    u64 last_sum_exec_runtime;
1555    struct callback_head numa_work;
1556
1557    struct list_head numa_entry;
1558    struct numa_group *numa_group;
1559
1560    /*
1561     * Exponential decaying average of faults on a per-node basis.
1562     * Scheduling placement decisions are made based on the these counts.
1563     * The values remain static for the duration of a PTE scan
1564     */
1565    unsigned long *numa_faults_memory;
1566    unsigned long total_numa_faults;
1567
1568    /*
1569     * numa_faults_buffer records faults per node during the current
1570     * scan window. When the scan completes, the counts in
1571     * numa_faults_memory decay and these values are copied.
1572     */
1573    unsigned long *numa_faults_buffer_memory;
1574
1575    /*
1576     * Track the nodes the process was running on when a NUMA hinting
1577     * fault was incurred.
1578     */
1579    unsigned long *numa_faults_cpu;
1580    unsigned long *numa_faults_buffer_cpu;
1581
1582    /*
1583     * numa_faults_locality tracks if faults recorded during the last
1584     * scan window were remote/local. The task scan period is adapted
1585     * based on the locality of the faults with different weights
1586     * depending on whether they were shared or private faults
1587     */
1588    unsigned long numa_faults_locality[2];
1589
1590    unsigned long numa_pages_migrated;
1591#endif /* CONFIG_NUMA_BALANCING */
1592
1593    struct rcu_head rcu;
1594
1595    /*
1596     * cache last used pipe for splice
1597     */
1598    struct pipe_inode_info *splice_pipe;
1599
1600    struct page_frag task_frag;
1601
1602#ifdef    CONFIG_TASK_DELAY_ACCT
1603    struct task_delay_info *delays;
1604#endif
1605#ifdef CONFIG_FAULT_INJECTION
1606    int make_it_fail;
1607#endif
1608    /*
1609     * when (nr_dirtied >= nr_dirtied_pause), it‘s time to call
1610     * balance_dirty_pages() for some dirty throttling pause
1611     */
1612    int nr_dirtied;
1613    int nr_dirtied_pause;
1614    unsigned long dirty_paused_when; /* start of a write-and-pause period */
1615
1616#ifdef CONFIG_LATENCYTOP
1617    int latency_record_count;
1618    struct latency_record latency_record[LT_SAVECOUNT];
1619#endif
1620    /*
1621     * time slack values; these are used to round up poll() and
1622     * select() etc timeout values. These are in nanoseconds.
1623     */
1624    unsigned long timer_slack_ns;
1625    unsigned long default_timer_slack_ns;
1626
1627#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1628    /* Index of current stored address in ret_stack */
1629    int curr_ret_stack;
1630    /* Stack of return addresses for return function tracing */
1631    struct ftrace_ret_stack    *ret_stack;
1632    /* time stamp for last schedule */
1633    unsigned long long ftrace_timestamp;
1634    /*
1635     * Number of functions that haven‘t been traced
1636     * because of depth overrun.
1637     */
1638    atomic_t trace_overrun;
1639    /* Pause for the tracing */
1640    atomic_t tracing_graph_pause;
1641#endif
1642#ifdef CONFIG_TRACING
1643    /* state flags for use by tracers */
1644    unsigned long trace;
1645    /* bitmask and counter of trace recursion */
1646    unsigned long trace_recursion;
1647#endif /* CONFIG_TRACING */
1648#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
1649    unsigned int memcg_kmem_skip_account;
1650    struct memcg_oom_info {
1651        struct mem_cgroup *memcg;
1652        gfp_t gfp_mask;
1653        int order;
1654        unsigned int may_oom:1;
1655    } memcg_oom;
1656#endif
1657#ifdef CONFIG_UPROBES
1658    struct uprobe_task *utask;
1659#endif
1660#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1661    unsigned int    sequential_io;
1662    unsigned int    sequential_io_avg;
1663#endif
1664};
task_struct

关于task_struct的具体介绍,见

http://blog.csdn.net/npy_lp/article/details/7292563

它定义在linux-3.18.6/include/linux/sched.h文件中。

进程(Process)是系统进行资源分配和调度的基本单位,一个进程是一个程序的运行实例。而在Linux中,可以使用一个进程来创建另外一个进程。这样的话,Linux的进程的组织结

构其实有点像Linux目录树,是个层次结构的,可以使用 pstree命令来查看。在最上面是init程序的执行进程。它是所有进程的老祖宗。Linux提供了两个函数来创建进程。

1.fork() 

fork()提供了创建进程的基本操作,可以说它是Linux系统多任务的基础。该函数在/linux-3.18.6/kernel/fork.c

2.exec系列函数

如果只有fork(),肯定是不完美的,因为fork()只能参数一个父进程的副本。而exec系列函数则可以帮助我们建立一个全新的新进程。

在Linux系统中,一个进程的PCB是一个C语言的结构体task_struct来表示,而多个PCB之间是由一个双向链表组织起来的,在《Understanding the Linux Kernel》中,则是进一步描

述这个链表是一个双向循环链表。

在Linux中创建一个新进程的方法是使用fork函数,fork()执行一次但有两个返回值。

在父进程中,返回值是子进程的进程号;在子进程中,返回值为0。因此可通过返回值来判断当前进程是父进程还是子进程。

使用fork函数得到的子进程是父进程的一个复制品,它从父进程处复制了整个进程的地址空间,包括进程上下文,进程堆栈,内存信息,打开的文件描述符,信 号控制设定,进程优

先级,进程组号,当前工作目录,根目录,资源限制,控制终端等。而子进程所独有的只是它的进程号,资源使用和计时器等。可以看出,使用 fork函数的代价是很大的,它复制了

父进程中的代码段,数据段和堆栈段里的大部分内容,使得fork函数的执行速度并不快。

创建一个进程,至少涉及的函数:

sys_clone, do_fork, dup_task_struct, copy_process, copy_thread, ret_from_fork

技术分享

 这只是图中的fork一个分支

进程状态的切换过程和原因大致如下图:

技术分享

实验:

1、流程

 

添加fork()MenuOS

 

编译并启动MenuOS

 

GDB连接,添加breakpoints

 

根据观察copy_process是建立新进程,

 

weak_up_new_task则是运行这个新进程,所以要尝试添加这样一个断点

 

breakpoints list:b sys_clone

 

b sys_clone

b do_fork

 

b copy_process

 

b dup_task_struct

 

b alloc_task_struct_node

 

b arch_dup_task_struct

 

b copy_thread

 

b ret_from_fork

 

b wake_up_new_task

 跟踪fork执行

2、实验记录

2.1 添加并验证fork()可用

2.2 跟踪fork

技术分享

 

技术分享

 

四、总结

Fork创建的新进程是和父进程(除了PIDPPID)一样的副本,包括真实和有效的UIDGID、进程组合会话ID、环境、资源限制、打开的文件以及共享内存段。

根据代码的分析,do_fork中,copy_process管子进程运行的准备,wake_up_new_task作为子进程forking的完成。

以上是关于Linux如何创建一个新进程的主要内容,如果未能解决你的问题,请参考以下文章

Linux系统编程-进程创建(fork)外部程序调用(exec)

Linux系统基于fork()新进程的创建

实验六:分析Linux内核创建一个新进程的过程

linux父子进程执行问题

linux创建进程fork的方法步骤

linux-fork&vfork