使用 Node.js 将文件系统中的目录结构转换为 JSON

Posted

技术标签:

【中文标题】使用 Node.js 将文件系统中的目录结构转换为 JSON【英文标题】:Convert a directory structure in the filesystem to JSON with Node.js 【发布时间】:2012-06-27 00:01:12 【问题描述】:

我有这样的文件结构:

root
|_ fruits
|___ apple
|______images
|________ apple001.jpg
|________ apple002.jpg
|_ animals
|___ cat
|______images
|________ cat001.jpg
|________ cat002.jpg

我想使用 javascript 和 Node.js,监听这个根目录和所有子目录,并创建一个反映这个目录结构的 JSON,每个节点都包含类型、名称、路径和子节点:

data = [
  
    type: "folder",
    name: "animals",
    path: "/animals",
    children: [
      
        type: "folder",
        name: "cat",
        path: "/animals/cat",
        children: [
          
            type: "folder",
            name: "images",
            path: "/animals/cat/images",
            children: [
              
                type: "file",
                name: "cat001.jpg",
                path: "/animals/cat/images/cat001.jpg"
              , 
                type: "file",
                name: "cat001.jpg",
                path: "/animals/cat/images/cat002.jpg"
              
            ]
          
        ]
      
    ]
  
];

这是一个咖啡脚本 JSON:

data = 
[
  type: "folder"
  name: "animals"
  path: "/animals"
  children  :
    [
      type: "folder"
      name: "cat"
      path: "/animals/cat"
      children:
        [
          type: "folder"
          name: "images"
          path: "/animals/cat/images"
          children: 
            [
              type: "file"
              name: "cat001.jpg"
              path: "/animals/cat/images/cat001.jpg"
            , 
              type: "file"
              name: "cat001.jpg"
              path: "/animals/cat/images/cat002.jpg"
            ]
        ]
    ]
]

如何在 django 视图中获取这种 json 数据格式?(python)

【问题讨论】:

这是获取 d3.js 分层数据的常见需求。我想用 d3.js 标记这个问题,但 Stack Overflow 最多允许 5 个 :( 我希望这些答案之一能够从标准输入读取路径,以便您可以将路径列表转换为 json 对象,如下所示:find | paths2json。这将通过管道充分利用 Unix 可组合性的强大功能。 【参考方案1】:

这是一个草图。错误处理留给读者练习。

var fs = require('fs'),
    path = require('path')

function dirTree(filename) 
    var stats = fs.lstatSync(filename),
        info = 
            path: filename,
            name: path.basename(filename)
        ;

    if (stats.isDirectory()) 
        info.type = "folder";
        info.children = fs.readdirSync(filename).map(function(child) 
            return dirTree(filename + '/' + child);
        );
     else 
        // Assuming it's a file. In real life it could be a symlink or
        // something else!
        info.type = "file";
    

    return info;


if (module.parent == undefined) 
    // node dirTree.js ~/foo/bar
    var util = require('util');
    console.log(util.inspect(dirTree(process.argv[2]), false, null));

【讨论】:

这在第一层效果很好,但是,children 看起来像:children: [Object] ...你看到这里有什么问题吗? 是的。创建的对象很好,但默认情况下,console.log 仅将对象打印到有限的深度。我编辑了代码以打印完整的树。 谢谢你的功能。我想最好使用 path.join 而不是a + '/' + breturn dirTree( path.join(filename, child)); 如何对输出进行排序,使目录首先出现(按字母顺序),然​​后是文件(也按字母顺序)? @peterButcher 如果它们以树形结构打印,您将如何订购它们?但是您可以使用 lodash 来构造返回的对象。它只是一个常规对象,所以像其他任何东西一样对其进行排序:)【参考方案2】:

它有一个 NPM 模块

https://www.npmjs.com/package/directory-tree

创建一个表示目录树的对象。

发件人:

photos
├── summer
│   └── june
│       └── windsurf.jpg
└── winter
    └── january
        ├── ski.png
        └── snowboard.jpg

收件人:


  "path": "",
  "name": "photos",
  "type": "directory",
  "children": [
    
      "path": "summer",
      "name": "summer",
      "type": "directory",
      "children": [
        
          "path": "summer/june",
          "name": "june",
          "type": "directory",
          "children": [
            
              "path": "summer/june/windsurf.jpg",
              "name": "windsurf.jpg",
              "type": "file"
            
          ]
        
      ]
    ,
    
      "path": "winter",
      "name": "winter",
      "type": "directory",
      "children": [
        
          "path": "winter/january",
          "name": "january",
          "type": "directory",
          "children": [
            
              "path": "winter/january/ski.png",
              "name": "ski.png",
              "type": "file"
            ,
            
              "path": "winter/january/snowboard.jpg",
              "name": "snowboard.jpg",
              "type": "file"
            
          ]
        
      ]
    
  ]

用法

var tree = directoryTree('/some/path');

您还可以按扩展名过滤:

var filteredTree = directoryTree('/some/path', ['.jpg', '.png']);

【讨论】:

这正是我所需要的。谢谢。效果很好。【参考方案3】:

接受的答案有效,但它是同步的,会严重影响您的性能,尤其是对于大型目录树。 我强烈建议您使用以下异步解决方案,它更快且无阻塞。 基于并行解决方案here。

var fs = require('fs');
var path = require('path');

var diretoryTreeToObj = function(dir, done) 
    var results = [];

    fs.readdir(dir, function(err, list) 
        if (err)
            return done(err);

        var pending = list.length;

        if (!pending)
            return done(null, name: path.basename(dir), type: 'folder', children: results);

        list.forEach(function(file) 
            file = path.resolve(dir, file);
            fs.stat(file, function(err, stat) 
                if (stat && stat.isDirectory()) 
                    diretoryTreeToObj(file, function(err, res) 
                        results.push(
                            name: path.basename(file),
                            type: 'folder',
                            children: res
                        );
                        if (!--pending)
                            done(null, results);
                    );
                
                else 
                    results.push(
                        type: 'file',
                        name: path.basename(file)
                    );
                    if (!--pending)
                        done(null, results);
                
            );
        );
    );
;

示例用法:

var dirTree = ('/path/to/dir');

diretoryTreeToObj(dirTree, function(err, res)
    if(err)
        console.error(err);

    console.log(JSON.stringify(res));
);

【讨论】:

次要问题:您在 diretoryTreeToObj 中有错字,我认为应该是 directoryTreeToObj 没有?【参考方案4】:

基于 Miika 解决方案的我的 CS 示例(带快递):

fs = require 'fs' #file system module
path = require 'path' # file path module

# returns json tree of directory structure
tree = (root) ->
    # clean trailing '/'(s)
    root = root.replace /\/+$/ , ""
    # extract tree ring if root exists
    if fs.existsSync root
        ring = fs.lstatSync root
    else
        return 'error: root does not exist'
    # type agnostic info
    info = 
        path: root
        name: path.basename(root)
    # dir   
    if ring.isDirectory()
        info.type = 'folder'
        # execute for each child and call tree recursively
        info.children = fs.readdirSync(root) .map (child) ->
            tree root + '/' + child
    # file
    else if ring.isFile()
        info.type = 'file'
    # link
    else if ring.isSymbolicLink()
        info.type = 'link'
    # other
    else
        info.type = 'unknown'
    # return tree 
    info

# error handling
handle = (e) ->
    return 'uncaught exception...'

exports.index = (req, res) ->
    try
        res.send tree './test/'
    catch e
        res.send handle e

【讨论】:

【参考方案5】:

这是一个异步解决方案:

 function list(dir) 
   const walk = entry => 
     return new Promise((resolve, reject) => 
       fs.exists(entry, exists => 
         if (!exists) 
           return resolve();
         
         return resolve(new Promise((resolve, reject) => 
           fs.lstat(entry, (err, stats) => 
             if (err) 
               return reject(err);
             
             if (!stats.isDirectory()) 
               return resolve(
                 // path: entry,
                 // type: 'file',
                 name: path.basename(entry),
                 time: stats.mtime,
                 size: stats.size
               );
             
             resolve(new Promise((resolve, reject) => 
               fs.readdir(entry, (err, files) => 
                 if (err) 
                   return reject(err);
                 
                 Promise.all(files.map(child => walk(path.join(entry, child)))).then(children => 
                   resolve(
                     // path: entry,
                     // type: 'folder',
                     name: path.basename(entry),
                     time: stats.mtime,
                     entries: children
                   );
                 ).catch(err => 
                   reject(err);
                 );
               );
             ));
           );
         ));
       );
     );
   

   return walk(dir);
 

请注意,当目录不存在时,将返回空结果而不是抛出错误。

这是一个示例结果:


    "name": "root",
    "time": "2017-05-09T07:46:26.740Z",
    "entries": [
        
            "name": "book.txt",
            "time": "2017-05-09T07:24:18.673Z",
            "size": 0
        ,
        
            "name": "cheatsheet-a5.pdf",
            "time": "2017-05-09T07:24:18.674Z",
            "size": 262380
        ,
        
            "name": "docs",
            "time": "2017-05-09T07:47:39.507Z",
            "entries": [
                
                    "name": "README.md",
                    "time": "2017-05-08T10:02:09.651Z",
                    "size": 19229
                
            ]
        
    ]

这将是:

root
|__ book.txt
|__ cheatsheet-a5.pdf
|__ docs
      |__ README.md

【讨论】:

【参考方案6】:

您可以使用此项目中的代码,但应根据需要调整代码:

https://github.com/NHQ/Node-FileUtils/blob/master/src/file-utils.js#L511-L593

发件人:

a
|- b
|  |- c
|  |  |- c1.txt
|  |
|  |- b1.txt
|  |- b2.txt
|
|- d
|  |
|
|- a1.txt
|- a2.txt

收件人:


    b: 
        "b1.txt": "a/b/b1.txt",
        "b2.txt": "a/b/b2.txt",
        c: 
            "c1.txt": "a/b/c/c1.txt"
        
    ,
    d: ,
    "a2.txt": "a/a2.txt",
    "a1.txt": "a/a1.txt"

在做:

new File ("a").list (function (error, files)
    //files...
);

【讨论】:

我从 github 中删除了该项目。这个链接是一个分叉。 @GabrielLlamas 为什么从 github 中删除了这个?对我来说似乎是一个相当有用的项目。【参考方案7】:

在这种情况下,我使用了“walk”lib,它获取您的根路径并递归遍历文件和目录,并发出目录/文件事件,其中包含您需要从节点获取的所有信息, 查看该实现 -->

const walk = require('walk');

class FsTree 

    constructor()

    

    /**
     * @param rootPath
     * @returns Promise
     */
    getFileSysTree(rootPath)
        return new Promise((resolve, reject)=>

            const root = rootPath || __dirname; // if there's no rootPath use exec location
            const tree = [];
            const nodesMap = ;
            const walker  = walk.walk(root,  followLinks: false); // filter doesn't work well

            function addNode(node, path)
                if ( node.name.indexOf('.') === 0 || path.indexOf('/.') >= 0) // ignore hidden files
                    return;
                
                var relativePath = path.replace(root,'');

                node.path = relativePath + '/' + node.name;
                nodesMap[node.path] = node;

                if ( relativePath.length === 0 ) //is root
                    tree.push(node);
                    return;
                
                node.parentPath = node.path.substring(0,node.path.lastIndexOf('/'));
                const parent = nodesMap[node.parentPath];
                parent.children.push(node);

            

            walker.on('directory', (path, stats, next)=>
                addNode( name: stats.name, type:'dir',children:[], path);
                next();
            );

            walker.on('file', (path,stats,next)=>
                addNode(name:stats.name, type:'file',path);
                next();
            );

            walker.on('end',()=>
                resolve(tree);
            );

            walker.on('errors',  (root, nodeStatsArray, next) => 
                reject(nodeStatsArray);
                next();
            );
        );

    



const fsTreeFetcher = new FsTree();

fsTreeFetcher.getFileSysTree(__dirname).then((result)=>
    console.log(result);
);

【讨论】:

【参考方案8】:

总结Sean C.的回答。

我很喜欢它,但是使用 async await 使它更具可读性。

import fs from 'fs';
import 
  lstat,
  readdir,
  access,
 from 'fs/promises';
import path from 'path';

async function existsAsync(file) 
  return access(file, fs.constants.F_OK)
    .then(() => true)
    .catch(() => false);


async function listFileTreeRecursive(dir) 
  const recurse = async (entry) => 
    if (!(await existsAsync(entry))) 
      return ;
    

    const stats = await lstat(entry);
    if (!stats.isDirectory()) 
      return 
        name: path.basename(entry),
        time: stats.mtime,
        size: stats.size,
      ;
    

    const files = await readdir(entry);
    const childEntries = await Promise.all(
      files.map((child) => recurse(path.join(entry, child))),
    );
    return 
      name: path.basename(entry),
      time: stats.mtime,
      entries: childEntries,
    ;
  ;

  return recurse(dir);

【讨论】:

以上是关于使用 Node.js 将文件系统中的目录结构转换为 JSON的主要内容,如果未能解决你的问题,请参考以下文章

如何使用 Node.js 从 Supabase Storage 保存文件(将 blob 转换为文件)

如何将此 Python 代码转换为 Node.js

node.js - 错误:ENOENT:没有这样的文件或目录,取消链接

如何使用 node.js 将 .pcm 文件转换为 wav 文件?

如何使用 Node.js 将 CSV 文件转换为 JSON 脚本?

Node.js:使用“fs”迭代目录时检查文件是不是为符号链接