Linux启动时间优化-内核和用户空间启动优化实践

Posted 2021-01-15 Arnold Lu@南京

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Linux启动时间优化-内核和用户空间启动优化实践相关的知识，希望对你有一定的参考价值。

关键词：initcall、bootgraph.py、bootchartd、pybootchart等。

启动时间的优化，分为两大部分，分别是内核部分和用户空间两大部分。

从内核timestamp 0.000000作为内核启动起点，到free_initmem()输出"Freeing init memory"作为内核启动的终点。

借助于bootgraph.py对内核的kmsg进行分析，输出bootgraph.html和initcall耗时csv文件。

在紧接着free_initmem()下面，是init进程的启动，作为用户空间的起点。内核的终点和用户空间的起点基本上可以任务无缝衔接。

用户空间借助bootchartd抓取/proc/uptime、/proc/stat、/proc/diskstats、/proc/xxx/stat、/proc/meminfo信息，最后打包到bootlog.tgz。

pybootchart.py对bootlog.tgz进行解析，并生成关于CPU占用率、Memory使用情况、磁盘吞吐率以及各进程执行情况的图标。

基于以上内核和用户空间输出，可以发现initcall和进程启动的异常情况。

比如哪个initcall耗时异常；哪个进程启动耗时过长，可以进入进程启动函数查看是否有阻塞等情况。

1. 内核启动优化

在内核源码中自带了一个工具(scripts/bootgraph.pl)用于分析启动时间，这个工具生成output.svg。

但是bootgraph.py生成的结果可读性更好，也更加容易发现问题。

1.1 准备工作

对内核的修改包括，initcall_debug和CONFIG_LOG_BUF_SHIFT。

1.1.1 打开initcall_debug

bool initcall_debug = true;

这样做的目的是在内核kmsg中记录每个initcall的calling和initcall时间，本工具分析依赖于这些kmsg。

static int __init_or_module do_one_initcall_debug(initcall_t fn)
{
    ktime_t calltime, delta, rettime;
    unsigned long long duration;
    int ret;

    printk(KERN_DEBUG "calling  %pF @ %i\\n", fn, task_pid_nr(current));-----------------------initcall开始log
    calltime = ktime_get();
    ret = fn();
    rettime = ktime_get();
    delta = ktime_sub(rettime, calltime);
    duration = (unsigned long long) ktime_to_ns(delta) >> 10;
    printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\\n", fn,
        ret, duration);-----------------------------------------------------------------------initcall结束log

    return ret;
}

int __init_or_module do_one_initcall(initcall_t fn)
{
    int count = preempt_count();
    int ret;

    if (initcall_debug)
        ret = do_one_initcall_debug(fn);
    else
        ret = fn();
...
}

1.1.2 增大log_buf空间

log_buf用于存放printk消息，他类似于RingBuffer，超出部分会覆盖头部。

#define __LOG_BUF_LEN    (1 << CONFIG_LOG_BUF_SHIFT)

static char __log_buf[__LOG_BUF_LEN];
static char *log_buf = __log_buf;

所以将CONFIG_LOG_BUF_SHIFT从16增加到18，即log_buf空间从64K增加到256K。

1.1.3 对bootgraph.py的改进

1.1.3.1 划分内核启动的起点终点

界定内核启动的起点很容易，从时间0开始。

用户空间的起点是init进程，所以将内核空间的终点放在启动init进程之前。

这样就可以清晰看到initcall在整个内核初始化中的位置。

static inline int free_area(unsigned long pfn, unsigned long end, char *s)
{
    unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10);
...
    if (size && s)
        printk(KERN_INFO "Freeing %s memory: %dK\\n", s, size);-------------输出“Freeing init memory:”到kmsg中。

    return pages;
}

void free_initmem(void)
{
...
    if (!machine_is_integrator() && !machine_is_cintegrator())
        totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
                        __phys_to_pfn(__pa(__init_end)),
                        "init");
}

static noinline int init_post(void)
{
    /* need to finish all async __init code before freeing the memory */
    async_synchronize_full();
    free_initmem();------------------------------------------------------------内核空间的终点
...
    run_init_process("/sbin/init");--------------------------------------------用户空间的起点
    run_init_process("/etc/init");
    run_init_process("/bin/init");
    run_init_process("/bin/sh");
...
}

基于“Freeing init memory”对内核和用户空间初始化进行划分，Split kernel and userspace by free_area()。

commit 6195fa73b5522ec5f2461932c894421c30fc3cd7
Author: Arnold Lu <arnoldlu@qq.com>
Date:   Tue Jun 19 22:49:09 2018 +0800

    Split kernel and userspace by free_area()

diff --git a/bootgraph.py b/bootgraph.py
index 8ee626c..dafe359 100755
--- a/bootgraph.py
+++ b/bootgraph.py
@@ -63,6 +63,7 @@ class SystemValues(aslib.SystemValues):
     timeformat = \'%.6f\'
     bootloader = \'grub\'
     blexec = []
+    last_init=0
     def __init__(self):
         self.hostname = platform.node()
         self.testtime = datetime.now().strftime(\'%Y-%m-%d_%H:%M:%S\')
@@ -223,7 +224,7 @@ class Data(aslib.Data):
             \'kernel\': {\'list\': dict(), \'start\': -1.0, \'end\': -1.0, \'row\': 0,
                 \'order\': 0, \'color\': \'linear-gradient(to bottom, #fff, #bcf)\'},
             \'user\': {\'list\': dict(), \'start\': -1.0, \'end\': -1.0, \'row\': 0,
-                \'order\': 1, \'color\': \'#fff\'}
+                \'order\': 1, \'color\': \'linear-gradient(to bottom, #456, #cde)\'}
         }
     def deviceTopology(self):
         return \'\'
@@ -345,17 +346,18 @@ def parseKernelLog():
         m = re.match(\'^initcall *(?P<f>.*)\\+.* returned (?P<r>.*) after (?P<t>.*) usecs\', msg)
         if(m):
             data.valid = True
-            data.end = ktime
+            sysvals.last_init = \'%.0f\'%(ktime*1000)
             f, r, t = m.group(\'f\', \'r\', \'t\')
             if(f in devtemp):
                 start, pid = devtemp[f]
                 data.newAction(phase, f, pid, start, ktime, int(r), int(t))
                 del devtemp[f]
             continue
-        if(re.match(\'^Freeing unused kernel memory.*\', msg)):
+        if(re.match(\'^Freeing init kernel memory.*\', msg)):
             data.tUserMode = ktime
             data.dmesg[\'kernel\'][\'end\'] = ktime
             data.dmesg[\'user\'][\'start\'] = ktime
+            data.end = ktime+0.1
             phase = \'user\'
 
     if tp.stamp:
@@ -531,8 +533,8 @@ def createBootGraph(data):
         print(\'ERROR: No timeline data\')
         return False
     user_mode = \'%.0f\'%(data.tUserMode*1000)
-    last_init = \'%.0f\'%(tTotal*1000)
-    devtl.html += html_timetotal.format(user_mode, last_init)
+    #last_init = \'%.0f\'%(tTotal*1000)
+    devtl.html += html_timetotal.format(user_mode, sysvals.last_init)
 
     # determine the maximum number of rows we need to draw
     devlist = []

1.1.3.2 将每个initcall启动记录到csv

图形化的好处就是直观，但是有时候需要更准确的数据进行排序分析。

这时候生成excel数据，进行处理就很方便了。

增加下面代码会在生成bootgraph.html的同时生成devinit.csv文件，Record data to csv file.。

commit 7bcb705ed30b1e1a0ca3385d01b412f8e6f23b4e
Author: Arnold Lu <arnoldlu@qq.com>
Date:   Tue Jun 19 22:52:43 2018 +0800

    Record data to csv file.

diff --git a/bootgraph.py b/bootgraph.py
index dafe359..7f43cb7 100755
--- a/bootgraph.py
+++ b/bootgraph.py
@@ -33,6 +33,7 @@ import shutil
 from datetime import datetime, timedelta
 from subprocess import call, Popen, PIPE
 import sleepgraph as aslib
+import csv
 
 # ----------------- CLASSES --------------------
 
@@ -48,6 +49,7 @@ class SystemValues(aslib.SystemValues):
     kernel = \'\'
     dmesgfile = \'\'
     ftracefile = \'\'
+    csvfile = \'devinit.csv\'
     htmlfile = \'bootgraph.html\'
     testdir = \'\'
     kparams = \'\'
@@ -300,6 +302,9 @@ def parseKernelLog():
         lf = open(sysvals.dmesgfile, \'r\')
     else:
         lf = Popen(\'dmesg\', stdout=PIPE).stdout
+    csvfile = open(sysvals.csvfile, \'wb\');
+    csvwriter = csv.writer(csvfile)
+    csvwriter.writerow([\'Func\', \'Start(ms)\', \'End(ms)\', \'Duration(ms)\', \'Return\'])
     for line in lf:
         line = line.replace(\'\\r\\n\', \'\')
         # grab the stamp and sysinfo
@@ -351,6 +356,7 @@ def parseKernelLog():
             if(f in devtemp):
                 start, pid = devtemp[f]
                 data.newAction(phase, f, pid, start, ktime, int(r), int(t))
+                csvwriter.writerow([f, start*1000, ktime*1000, float(t)/1000, int(r)]);
                 del devtemp[f]
             continue
         if(re.match(\'^Freeing init kernel memory.*\', msg)):
@@ -364,6 +370,7 @@ def parseKernelLog():
         sysvals.stamp = 0
         tp.parseStamp(data, sysvals)
     data.dmesg[\'user\'][\'end\'] = data.end
+    csvfile.close()
     lf.close()
     return data

1.2 生成测试结果

执行如下命令生成两个文件bootgraph.html和devinit.csv。

bootgraph.py依赖于kmsg中的“calling”/“initcall”识别initcall的起点终点，依赖“Freeing init memory”作为内核启动终点。

./bootgraph.py -dmesg kmsg.txt -addlogs

PS：下面两张截图都覆盖了函数名称。

1.2.1 bootgraph.html分析

从下面的图可以看出内核的初始化持续到2672ms处，然后整个内核初始化主要部分就是initcall。

同时从上面可以看出哪几个initcall占用时间较长，点击可以看到持续多久、是否成功等信息。

1.2.2 devinit.csv分析

相对于bootgraph.html，devinit.csv更容易进行量化。

对devinit.csv按照Duration进行降序，可以看出占用靠前的initcall。

1.3 优化实例

1.3.1 串口log优化

对于115200的串口速率来说，一个字符耗时大概1/(115200/10)=0.087ms。所以100个字符大概耗时8.7ms。

在内核初始化的时候，输出很多串口log是一件恐怖的事情。

虽然不是什么高深的技巧，但是却很有效。

1.3.1.1 初始状态

在没有打开initcall_debug，console_printk采用默认配置情况下，内核启动总共耗时2881ms。

<6>[ 2.881049] Freeing init memory: 340K

1.3.1.2 打开initcall_debug

在打开initcall_debug用于调试之后，引入了额外的开销。

但又不得不借助于initcall_debug来发现问题。

内核启动共耗时3404ms，引入了523ms开销。

关于initcall耗时列表如下：

1.3.1.3 打开initcall_debug，关闭console显示

在关闭了console显示过后，串口被最大化的关闭。

内核共耗时1281ms，相较原始状态减少了1600ms。也就是说整个内核初始化的一大半时间被节省了。

在关闭串口console之后，可以看出initcall的时间大大减少了。

1.3.2 优化耗时top10的initcall

参见上图列表，进入initcall进行优化。

2. 用户空间启动优化

用户空间的优化依赖于bootchartd获取log，然后使用pybootchart.py进行分析。

下面分几部分进行分析：如何在busybox中使能bootchartd；对bootchartd进行简单分析；对pybootchart.py进行简单分析；最后对测试结果进行分析。

2.1 使能bootchartd

要使能bootchartd，需要修改命令行参数以支持从bootchartd启动init；bootchartd本身以及tar、dmesg等支持。

2.1.1 bootloader中修改命令行参数增加

修改bootloader中传递给Linux的命令行参数，如果bootchartd放在ramfs中，使用rdinit=/sbin/bootchartd。

如果bootchartd放在非ramfs中：

init=/sbin/bootchartd

如此使用bootchartd作为init，然后再用bootchartd去启动/sbin/init。

Linux内核init_setup()函数从cmdline解析出init参数，赋给execute_command。

然后在init_post()中就会使用run_init_process()。

static int __init init_setup(char *str)
{
    unsigned int i;

    execute_command = str;------------------------------------------从cmdline中解析出init的值，赋给execute_command。
    /*
     * In case LILO is going to boot us with default command line,
     * it prepends "auto" before the whole cmdline which makes
     * the shell think it should execute a script with such name.
     * So we ignore all arguments entered _before_ init=... [MJ]
     */
    for (i = 1; i < MAX_INIT_ARGS; i++)
        argv_init[i] = NULL;
    return 1;
}
__setup("init=", init_setup);

static noinline int init_post(void)
{
...
    free_initmem();
...
    if (execute_command) {
        run_init_process(execute_command);---------------------------如果execute_command被赋值，那么作为init进程进行初始化。如果成功，后面的run_init_process()不会被执行。
        printk(KERN_WARNING "Failed to execute %s.  Attempting "
                    "defaults...\\n", execute_command);
    }
    run_init_process("/sbin/init");
    run_init_process("/etc/init");
    run_init_process("/bin/init");
    run_init_process("/bin/sh");

    panic("No init found.  Try passing init= option to kernel. "
          "See Linux Documentation/init.txt for guidance.");
}

2.1.2 内核中修改busybox

内核中需要打开bootchartd选项、同时还需要支持tar，因为需要对生成的文件进行打包。

由于需要获取内核kmsg，所以需要dmesg支持。

CONFIG_FEATURE_SEAMLESS_GZ=y
CONFIG_GUNZIP=y
CONFIG_GZIP=y
CONFIG_FEATURE_GZIP_LONG_OPTIONS=y
CONFIG_TAR=y
CONFIG_FEATURE_TAR_CREATE=y
CONFIG_FEATURE_TAR_AUTODETECT=y
CONFIG_FEATURE_TAR_FROM=y
CONFIG_FEATURE_TAR_OLDGNU_COMPATIBILITY=y
CONFIG_FEATURE_TAR_OLDSUN_COMPATIBILITY=y
CONFIG_FEATURE_TAR_GNU_EXTENSIONS=y
CONFIG_FEATURE_TAR_LONG_OPTIONS=y
CONFIG_FEATURE_TAR_TO_COMMAND=y
CONFIG_FEATURE_TAR_UNAME_GNAME=y
CONFIG_FEATURE_TAR_NOPRESERVE_TIME=y
CONFIG_BOOTCHARTD=y
CONFIG_FEATURE_BOOTCHARTD_BLOATED_HEADER=y
CONFIG_DMESG=y

2.1.3 对bootchartd的调整

对bootchartd的配置可以通过指定配置文件，ENABLE_FEATURE_BOOTCHARTD_CONFIG_FILE。

或者通过修改sample_period_us和process_accounting。

int bootchartd_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int bootchartd_main(int argc UNUSED_PARAM, char **argv)
{
...
    /* Read config file: */
    sample_period_us = 200 * 1000;-----------------------------------如果觉得粒度不够，丢失细节，可以提高采样频率查看更多细节。但代价是bootchard占用更多CPU资源。
    process_accounting = 0;
    if (ENABLE_FEATURE_BOOTCHARTD_CONFIG_FILE) {
        char* token[2];
        parser_t *parser = config_open2("/etc/bootchartd.conf" + 5, fopen_for_read);
        if (!parser)
            parser = config_open2("/etc/bootchartd.conf", fopen_for_read);
        while (config_read(parser, token, 2, 0, "#=", PARSE_NORMAL & ~PARSE_COLLAPSE)) {
            if (strcmp(token[0], "SAMPLE_PERIOD") == 0 && token[1])
                sample_period_us = atof(token[1]) * 1000000;
            if (strcmp(token[0], "PROCESS_ACCOUNTING") == 0 && token[1]
             && (strcmp(token[1], "on") == 0 || strcmp(token[1], "yes") == 0)
            ) {
                process_accounting = 1;
            }
        }
        config_close(parser);
        if ((int)sample_period_us <= 0)
            sample_period_us = 1; /* prevent division by 0 */
    }
...
    return EXIT_SUCCESS;
}

2.1.4 增加meminfo、dmesg

打开对/proc/meminfo的解析，原始数据保存在proc_meminfo.log中。

同时保存内核kmsg到dmesg中。

@@ -212,6 +212,7 @@
 {
     FILE *proc_stat = xfopen("proc_stat.log", "w");
     FILE *proc_diskstats = xfopen("proc_diskstats.log", "w");
+    FILE *proc_meminfo = xfopen("proc_meminfo.log", "w");
     //FILE *proc_netdev = xfopen("proc_netdev.log", "w");
     FILE *proc_ps = xfopen("proc_ps.log", "w");
     int look_for_login_process = (getppid() == 1);
@@ -240,6 +241,7 @@
 
         dump_file(proc_stat, "/proc/stat");
         dump_file(proc_diskstats, "/proc/diskstats");
+        dump_file(proc_meminfo, "/proc/meminfo");
         //dump_file(proc_netdev, "/proc/net/dev");
         if (dump_procs(proc_ps, look_for_login_process)) {
             /* dump_procs saw a getty or {g,k,x}dm
@@ -306,8 +308,11 @@
     }
     fclose(header_fp);
 
+    system(xasprintf("dmesg >dmesg"));
+
     /* Package log files */
-    system(xasprintf("tar -zcf /var/log/bootlog.tgz header %s *.log", process_accounting ? "kernel_pacct" : ""));
+    //system(xasprintf("tar -zcf /var/log/bootlog.tgz header %s *.log", process_accounting ? "kernel_pacct" : ""));
+    system(xasprintf("tar -zcf /var/log/bootlog.tgz header dmesg %s *.log", process_accounting ? "kernel_pacct" : ""));
     /* Clean up (if we are not in detached tmpfs) */
     if (tempdir) {
         unlink("header");
@@ -315,6 +320,7 @@
         unlink("proc_diskstats.log");
         //unlink("proc_netdev.log");
         unlink("proc_ps.log");
+        unlink("dmesg");
         if (process_accounting)
             unlink("kernel_pacct");
         rmdir(tempdir);

2.2 bootchartd分析

bootchartd的入口点是bootchartd_main()函数。

在bootchartd_main中主要就是解析start/init/stop参数。如果使能bootchartd.conf的话，解析出sample_period_us和process_accounting。

bootchartd_main()主要通过do_logging()收集log和finalize()做打包收尾工作。

static void do_logging(unsigned sample_period_us, int process_accounting)
{
    FILE *proc_stat = xfopen("proc_stat.log", "w");
    FILE *proc_diskstats = xfopen("proc_diskstats.log", "w");
    FILE *proc_meminfo = xfopen("proc_meminfo.log", "w");
    //FILE *proc_netdev = xfopen("proc_netdev.log", "w");
    FILE *proc_ps = xfopen("proc_ps.log", "w");
    int look_for_login_process = (getppid() == 1);
    unsigned count = 60*1000*1000 / sample_period_us; /* ~1 minute */--------------------------最长统计1分钟时间bootchart

    if (process_accounting) {
        close(xopen("kernel_pacct", O_WRONLY | O_CREAT | O_TRUNC));
        acct("kernel_pacct");
    }

    while (--count && !bb_got_signal) {--------------------------------------------------------如果满足count为0或者bb_got_signal，则停止采样。
        char *p;
        int len = open_read_close("/proc/uptime", G.jiffy_line, sizeof(G.jiffy_line)-2);
        if (len < 0)
            goto wait_more;
        /* /proc/uptime has format "NNNNNN.MM NNNNNNN.MM" */
        /* we convert it to "NNNNNNMM\\n" (using first value) */
        G.jiffy_line[len] = \'\\0\';
        p = strchr(G.jiffy_line, \'.\');
        if (!p)
            goto wait_more;
        while (isdigit(*++p))
            p[-1] = *p;
        p[-1] = \'\\n\';
        p[0] = \'\\0\';

        dump_file(proc_stat, "/proc/stat");---------------------------------------------------保存/proc/stat到proc_stat.og中
        dump_file(proc_diskstats, "/proc/diskstats");-----------------------------------------保存/proc/diskstats到proc_diskstats.log中
        dump_file(proc_meminfo, "/proc/meminfo");---------------------------------------------保存/proc/meminfo到proc_meminfo.log中
        //dump_file(proc_netdev, "/proc/net/dev");
        if (dump_procs(proc_ps, look_for_login_process)) {------------------------------------遍历/proc下所有进程到proc_ps.log中
            /* dump_procs saw a getty or {g,k,x}dm
             * stop logging in 2 seconds:
             */
            if (count > 2*1000*1000 / sample_period_us)
                count = 2*1000*1000 / sample_period_us;
        }
        fflush_all();
 wait_more:
        usleep(sample_period_us);-------------------------------------------------------------每次采样后睡眠sample_period_us，达到周期性的目的。
    }
}

dump_procs()处理/proc目录下每个pid的stat文件。

static int dump_procs(FILE *fp, int look_for_login_process)
{
    struct dirent *entry;
    DIR *dir = opendir("/proc");
    int found_login_process = 0;

    fputs(G.jiffy_line, fp);
    while ((entry = readdir(dir)) != NULL) {------------------------------------遍历/proc目录，返回entry是struct dirent数据结构
        char name[sizeof("/proc/%u/cmdline") + sizeof(int)*3];
        int stat_fd;
        unsigned pid = bb_strtou(entry->d_name, NULL, 10);----------------------这里只取数字类型，其它目录则continue。
        if (errno)
            continue;

        /* android\'s version reads /proc/PID/cmdline and extracts
         * non-truncated process name. Do we want to do that? */

        sprintf(name, "/proc/%u/stat", pid);
        stat_fd = open(name, O_RDONLY);
        if (stat_fd >= 0) {
            char *p;
            以上是关于Linux启动时间优化-内核和用户空间启动优化实践的主要内容，如果未能解决你的问题，请参考以下文章