为啥在 Node.js 中阻塞文件 I/O 时异步性能更差?

Posted

技术标签:

【中文标题】为啥在 Node.js 中阻塞文件 I/O 时异步性能更差?【英文标题】:Why is async performance worse synchronous on blocking file I/O in Node.js?为什么在 Node.js 中阻塞文件 I/O 时异步性能更差? 【发布时间】:2020-07-16 20:47:23 【问题描述】:

通过阅读 Stack Overflow 上有关同步与异步的一些帖子,似乎异步的开销应该很小,或者比阻塞 I/O 操作的同步调用更快:

我调查过的一些地方: Is non-blocking I/O really faster than multi-threaded blocking I/O? How? What is the overhead of javascript async functions

我写了一个小基准,将 4 个 256MB 的文件制作成 1GB 来查看fs.readFile() 的性能。

const performance = require('perf_hooks');
const fs = require('fs');
const execSync = require("child_process");

const sizes = [512, 1024, 256, 512]; //file sizes in MiB
function makeFiles() 
    for (let i = 0; i < sizes.length; i++) 
        execSync(`dd if=/dev/urandom of=file-$i.txt bs=1M count=$sizes[i]`, (error, stdout, stderr) => 
            console.log(`stdout: $stdout`);
        );
    


function syncTest() 
    const startTime = performance.now();
    const results = [];

    for (let i = 0; i < sizes.length; i++) 
        results.push(fs.readFileSync(`file-$i.txt`));
    
    console.log(`Sync version took $performance.now() - startTimems`);


async function asyncTest() 
    const startTime = performance.now();
    const results = [];

    for (let i = 0; i < sizes.length; i++) 
        results.push(fs.promises.readFile(`file-$i.txt`));
    
    await Promise.all(results);

    console.log(`Async version took $performance.now() - startTimems`);


makeFiles();
syncTest();
asyncTest();

输出:

> makeFiles();

512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 4.28077 s, 125 MB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 8.45918 s, 127 MB/s
256+0 records in
256+0 records out
268435456 bytes (268 MB, 256 MiB) copied, 1.96678 s, 136 MB/s
512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 4.32488 s, 124 MB/s
undefined
> syncTest();
Sync version took 1055.9131410121918ms
undefined
> asyncTest();
Promise  <pending> 
> Async version took 6991.523499011993ms

所以看起来异步版本比同步版本慢约 7 倍。如何解释这种放缓?什么时候应该使用同步版本?

Repl.it 链接:https://repl.it/repls/VioletredFatherlyDaemons

系统:Arch linux 5.5.4-arch1-1 上的节点 13.9.0

【问题讨论】:

同步文件 I/O 阻塞了事件循环,因此您的 node.js 在同步文件操作处理期间可以做任何其他事情。这会破坏服务器的可扩展性和性能。这与单个操作的性能差异无关。这是关于允许 node.js 在文件 I/O 正在进行时做其他事情,而不是阻塞整个事件循环。 那么除了初始化/启动过程之外,同步 I/O 是否有一般用例? 在服务器中,并不是真的因为它会破坏您的服务器可扩展性。但是,node.js 有许多不是服务器的用途。例如,我有构建脚本和磁盘维护脚本,它们是单用户的,并且使用同步文件 I/O 编写和调试不太复杂。 仅供参考,我将您的脚本移植到 Windows 并获得了 Sync version took 1502.4960010051727ms Async version took 2460.849498987198ms。 Still Sync 更快,但仅快 63%,而不是 700%。不知道你运行它时发生了什么。 仅供参考,我在 Windows 10 上运行 node v12.13.1。 【参考方案1】:

请参阅下面对第 2 版的修改以获得更快的版本。

版本 1

仅供参考,除了我上面所有的 cmets,这是我能获得异步版本的最快速度:

async function asyncTestStreamParallel(files) 
    const startTime = performance.now();
    let results = [];

    for (let filename of files) 
        results.push(new Promise((resolve, reject) => 
            const stream = fs.createReadStream(filename, highWaterMark: 64 * 1024 * 10);
            const data = [];
            stream.on('data', chunk => 
                data.push(chunk);
            ).on('end', () => 
                resolve(Buffer.concat(data));
            ).on('error', reject);
        ));
    
    await Promise.all(results);

    console.log(`Async stream parallel version took $performance.now() - startTimems`);

而且,结果如下:

还有,这是我在 Windows 10 节点 v12.13.1 上的结果:

node --expose_gc temp
Sync version took 1175.2680000066757ms
Async version took 2315.0439999699593ms
Async stream version took 1600.0085990428925ms
Async stream parallel version took 1111.310200035572ms
Async serial version took 4387.053400993347ms

注意,我稍微修改了方案,将文件名数组传递给每个测试,而不是每次都创建文件名,这样我就可以集中创建文件。

帮助我加快速度的事情是:

    使用更大的highWaterMark,大概是流缓冲区大小 在数组中收集数据,然后在最后将其连接起来(这大大减少了峰值内存消耗和 GC 工作)。 允许循环中的不同文件彼此并行运行

通过这些更改,它的速度与同步版本大致相同,有时稍慢,有时大致相同。

我还在每个测试的运行之间设置了 2 秒的延迟,并强制运行垃圾收集器以确保 GC 运行不会影响我的结果。

这是我可以在任何平台上运行的整个脚本。请注意,您必须使用--expose_gc 命令行参数,如node --expose_gc temp.js

// Run this with the --expose_gc command line option

const performance = require('perf_hooks');
const fs = require('fs');
const path = require('path')

const sizes = [512, 1024, 256, 512];   // file sizes in MB
const data = "0123456789\n";
const testDir = path.join(__dirname, "bigfile"); 

function makeFiles() 
    // make a bigger string to make fewer disk writes
    const bData = [];
    for (let i = 0; i < 1000; i++) 
        bData.push(data);
    
    const biggerData = bData.join("");
    try 
        fs.mkdirSync(testDir);    // ignore errors if it already exists
     catch(e) 
        // do nothing if it already exists
    
    const files = [];

    for (let i = 0; i < sizes.length; i++) 
        let targetLen = sizes[i] * 1024 * 1024;
        let f;
        try 
            let fname = `$path.join(testDir, "test")-$i.txt`;
            f = fs.openSync(fname, 'w');
            files.push(fname);
            let len = 0;
            while (len < targetLen) 
                fs.writeSync(f, biggerData);
                len += biggerData.length;
            
         catch(e) 
            console.log(e);
            process.exit(1);
         finally 
            if (f) fs.closeSync(f);
        
    
    return files;


function clearFiles(files) 
    for (let filename of files) 
        fs.unlinkSync(filename);
    
    fs.rmdirSync(testDir);



function syncTest(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        results.push(fs.readFileSync(filename));
    
    console.log(`Sync version took $performance.now() - startTimems`);


async function asyncTest(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        results.push(fs.promises.readFile(filename));
    
    await Promise.all(results);

    console.log(`Async version took $performance.now() - startTimems`);


async function asyncTestStream(files) 
    const startTime = performance.now();

    for (let filename of files) 
        await new Promise((resolve, reject) => 
            let stream = fs.createReadStream(filename, highWaterMark: 64 * 1024 * 10);
            let data = [];
            stream.on('data', chunk => 
                data.push(chunk);
            ).on('close', () => 
                resolve(Buffer.concat(data));
            ).on('error', reject);
        );
    

    console.log(`Async stream version took $performance.now() - startTimems`);


async function asyncTestStreamParallel(files) 
    const startTime = performance.now();
    let results = [];

    for (let filename of files) 
        results.push(new Promise((resolve, reject) => 
            const stream = fs.createReadStream(filename, highWaterMark: 64 * 1024 * 100);
            const data = [];
            stream.on('data', chunk => 
                data.push(chunk);
            ).on('end', () => 
                resolve(Buffer.concat(data));
            ).on('error', reject);
        ));
    
    await Promise.all(results);

    console.log(`Async stream parallel version took $performance.now() - startTimems`);


async function asyncTestSerial(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        results.push(await fs.promises.readFile(filename));
    

    console.log(`Async serial version took $performance.now() - startTimems`);


function delay(t) 
    return new Promise(resolve => 
        global.gc();
        setTimeout(resolve, t);
    );


// delay between each test to let any system stuff calm down
async function run() 
    const files = makeFiles();
    try 
        await delay(2000);
        syncTest(files);

        await delay(2000);
        await asyncTest(files)

        await delay(2000);
        await asyncTestStream(files);

        await delay(2000);
        await asyncTestStreamParallel(files);

        await delay(2000);
        await asyncTestSerial(files);
     catch(e) 
        console.log(e);
     finally 
        clearFiles(files);
    


run();

第 2 版

然后,我发现对于 2GB 以下的文件,我们可以为整个文件预先分配一个缓冲区,并在一次读取中读取它们,这样可以更快。此版本为syncTestSingleRead()asyncTestSingleReadSerial()asyncTestSingleReadParallel() 添加了几个新选项。

这些新选项都更快,并且有一次,异步选项始终比同步选项更快:

node --expose_gc temp
Sync version took 1602.546700000763ms
Sync single read version took 680.5937000513077ms
Async version took 2337.3639990091324ms
Async serial version took 4320.517499983311ms
Async stream version took 1625.9839000105858ms
Async stream parallel version took 1119.7469999790192ms
Async single read serial version took 580.7244000434875ms
Async single read parallel version took 360.47460001707077ms

并且,与这些匹配的代码:

// Run this with the --expose_gc command line option

const performance = require('perf_hooks');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path')

const sizes = [512, 1024, 256, 512];   // file sizes in MB
const data = "0123456789\n";
const testDir = path.join(__dirname, "bigfile"); 

function makeFiles() 
    // make a bigger string to make fewer disk writes
    const bData = [];
    for (let i = 0; i < 1000; i++) 
        bData.push(data);
    
    const biggerData = bData.join("");
    try 
        fs.mkdirSync(testDir);    // ignore errors if it already exists
     catch(e) 
        // do nothing if it already exists
    
    const files = [];

    for (let i = 0; i < sizes.length; i++) 
        let targetLen = sizes[i] * 1024 * 1024;
        let f;
        try 
            let fname = `$path.join(testDir, "test")-$i.txt`;
            f = fs.openSync(fname, 'w');
            files.push(fname);
            let len = 0;
            while (len < targetLen) 
                fs.writeSync(f, biggerData);
                len += biggerData.length;
            
         catch(e) 
            console.log(e);
            process.exit(1);
         finally 
            if (f) fs.closeSync(f);
        
    
    return files;


function clearFiles(files) 
    for (let filename of files) 
        fs.unlinkSync(filename);
    
    fs.rmdirSync(testDir);


function readFileSync(filename) 
    let handle = fs.openSync(filename, "r");
    try 
        let stats = fs.fstatSync(handle);
        let buffer = Buffer.allocUnsafe(stats.size);
        let bytesRead = fs.readSync(handle, buffer, 0, stats.size, 0);
        if (bytesRead !== stats.size) 
            throw new Error("bytesRead not full file size")
        
     finally 
        fs.closeSync(handle);
    



// read a file in one single read
async function readFile(filename) 
    let handle = await fsp.open(filename, "r");
    try 
        let stats = await handle.stat();
        let buffer = Buffer.allocUnsafe(stats.size);
        let bytesRead = await handle.read(buffer, 0, stats.size, 0);
        if (bytesRead !== stats.size) 
            throw new Error("bytesRead not full file size")
        
     finally 
        handle.close()
    




function syncTest(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        results.push(fs.readFileSync(filename));
    
    console.log(`Sync version took $performance.now() - startTimems`);


function syncTestSingleRead(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        readFileSync(filename);
    
    console.log(`Sync single read version took $performance.now() - startTimems`);


async function asyncTest(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        results.push(fs.promises.readFile(filename));
    
    await Promise.all(results);

    console.log(`Async version took $performance.now() - startTimems`);


async function asyncTestStream(files) 
    const startTime = performance.now();

    for (let filename of files) 
        await new Promise((resolve, reject) => 
            let stream = fs.createReadStream(filename, highWaterMark: 64 * 1024 * 10);
            let data = [];
            stream.on('data', chunk => 
                data.push(chunk);
            ).on('close', () => 
                resolve(Buffer.concat(data));
            ).on('error', reject);
        );
    

    console.log(`Async stream version took $performance.now() - startTimems`);


async function asyncTestStreamParallel(files) 
    const startTime = performance.now();
    let results = [];

    for (let filename of files) 
        results.push(new Promise((resolve, reject) => 
            const stream = fs.createReadStream(filename, highWaterMark: 64 * 1024 * 100);
            const data = [];
            stream.on('data', chunk => 
                data.push(chunk);
            ).on('end', () => 
                resolve(Buffer.concat(data));
            ).on('error', reject);
        ));
    
    await Promise.all(results);

    console.log(`Async stream parallel version took $performance.now() - startTimems`);


async function asyncTestSingleReadSerial(files) 
    const startTime = performance.now();
    let buffer;
    for (let filename of files) 
        let handle = await fsp.open(filename, "r");
        try 
            let stats = await handle.stat();
            if (!buffer || buffer.length < stats.size) 
                buffer = Buffer.allocUnsafe(stats.size);
            
            let bytesRead = await handle.read(buffer, 0, stats.size, 0);
            if (bytesRead !== stats.size) 
                throw new Error("bytesRead not full file size")
            
         finally 
            handle.close()
        
    
    console.log(`Async single read serial version took $performance.now() - startTimems`);


async function asyncTestSingleReadParallel(files) 
    const startTime = performance.now();

    await Promise.all(files.map(readFile));

    console.log(`Async single read parallel version took $performance.now() - startTimems`);


async function asyncTestSerial(files) 
    const startTime = performance.now();
    const results = [];

    for (let filename of files) 
        results.push(await fs.promises.readFile(filename));
    

    console.log(`Async serial version took $performance.now() - startTimems`);


function delay(t) 
    return new Promise(resolve => 
        global.gc();
        setTimeout(resolve, t);
    );


// delay between each test to let any system stuff calm down
async function run() 
    const files = makeFiles();
    try 
        await delay(2000);
        syncTest(files);

        await delay(2000);
        syncTestSingleRead(files);

        await delay(2000);
        await asyncTest(files)

        await delay(2000);
        await asyncTestSerial(files);

        await delay(2000);
        await asyncTestStream(files);

        await delay(2000);
        await asyncTestStreamParallel(files);

        await delay(2000);
        await asyncTestSingleReadSerial(files);

        await delay(2000);
        await asyncTestSingleReadParallel(files);
     catch(e) 
        console.log(e);
     finally 
        clearFiles(files);
    


run();

【讨论】:

以上是关于为啥在 Node.js 中阻塞文件 I/O 时异步性能更差?的主要内容,如果未能解决你的问题,请参考以下文章

为啥说nodejs是异步非阻塞

Node.js - 为啥我的一些回调没有异步执行?

node.js中events模块应用

同步阻塞同步非阻塞异步阻塞异步非阻塞

Node.js 回调函数

node js 回调函数