markdown CSV解析器

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了markdown CSV解析器相关的知识,希望对你有一定的参考价值。

const fs = require('fs');
const { promisify } = require('util');
const fs_open = promisify(fs.open);
const fs_read = promisify(fs.read);
const fs_close = promisify(fs.close);

let debug;
try {
  debug = require('debug');
} catch(error) {
  debug = () => () => {};
}

module.exports = class CSVReader {

  constructor(filename, {
    chunkSize = 1 * 1024 * 1024,
    separator = ',',
    rowSeparator = '\n',
  } = {}) {
    this.log = debug(`CSVReader "${filename}"`)
    this.chunkSize = chunkSize;
    this.separator = separator;
    this.rowSeparator = rowSeparator;
    this.file = filename;
    this.buffer = null;
    this.cursor = 0;
    this.fd = null;
    this.rest = '';
    this.header = null;
    this._isOver = false;
  }

  get isOver() {
    return this._isOver;
  }

  async open() {
    this.fd = await fs_open(this.file, 'r');
    this.buffer = Buffer.alloc(this.chunkSize);
    this.log('Open.');
  }

  async _readHeader() {
    const line = await this._getNextLine();
    this.header = line.split(this.separator);
    this.log(`Columns: ${this.header}`);
    return line !== '';
  }

  async readNext() {
    if (!this.header) {
      if (!await this._readHeader()) {
        this.log('File is empty. Returning null.');
        return null;
      }
    }

    const line = await this._getNextLine();

    if (!line && this._isOver) {
      this.log('File is over. Returning null.');
      return null;
    }

    const cells = line.split(this.separator);
    const result = {};

    for (let i = 0; i < this.header.length; i++) {
      result[this.header[i]] = cells[i];
    }

    return result;
  }

  async _getNextLine() {
    if (this._isOver) {
      throw new Error(`Can't read! file is over: "${this.file}"`);
    }

    const buffer = this.buffer;
    let content = this.rest;

    while (!content.includes(this.rowSeparator)) {
      const { bytesRead: bytes } = await fs_read(this.fd, buffer, 0, this.chunkSize, this.cursor);
      this.cursor += bytes;

      this.log(`Reading ${bytes} bytes.`);

      if (bytes === 0) {
        this.close();
        this.rest = '';        
        return content;
      }

      if (bytes < this.chunkSize) {
        this.close();
        content += buffer.slice(0, bytes).toString();
      } else {
        content += buffer.toString();
      }
    }

    const index = content.indexOf(this.rowSeparator);

    if (index === -1) {
      this.rest = '';
      return content;
    }

    this.rest = content.slice(index + 1);
    return content.slice(0, index);
  }

  async close() {
    if (this._isOver) {
      return;
    }

    this.log('Closed.');
    this._isOver = true;
    return fs_close(this.fd);
  }
};
const fs = require('fs');

let debug;
try {
  debug = require('debug');
} catch(error) {
  debug = () => () => {};
}

module.exports = class CSVWritter {

  constructor(filename, {
    separator = ',',
    rowSeparator = '\n',
  } = {}) {
    this.log = debug(`CSVWritter "${filename}"`);
    this.separator = separator;
    this.rowSeparator = rowSeparator;
    this.file = filename;
    this.header = null;
    this.stream = null;
    this._isClosed = false;
  }

  async open() {
    this.stream = fs.createWriteStream(this.file);
    this.log('Open.');
  }

  async _writeHeader(row) {
    this.header = Object.keys(row);
    this.log(`Columns: ${this.header}.`);
    await this.stream.write(`${this.header.join(this.separator)}${this.rowSeparator}`);
  }

  async write(row) {
    if (!this.header) {
      await this._writeHeader(row);
    }

    const cells = [];

    for (const col of this.header) {
        cells.push(row[col]);
    }

    const line = cells.join(this.separator);
    return this.stream.write(`${line}${this.rowSeparator}`);
  }

  async close() {
    if (this._isClosed) {
      return null;
    }

    this.stream.close();
    this._isClosed = true;
    this.log('Closed.');
  }
};
# CSV Parsers

This provides a set of parser classes for CSV to and from JSON.
If the `debug` library is available (`npm i debug`) you can see debug information with `DEBUG='CSV*'` environment variable.

## CSV Reader

This will read a CSV file and return JSON objects from it.
It will use the first row in the CSV file to create the properties in the result object.

### Usage

```javascript
(async () => {
  const reader = new CSVReader('input.csv', {/* options */);
  await reader.open();
  
  while (!reader.isOver) {
    const entry = await reader.readNext();
    
    // It is possible in certain cases that the last return entry is null
    if (!entry) break;
    
    console.log(entry.myNamedColumn);
  }
  
  await reader.close();
})();
```

### Options

- `chunkSize`: Amount of bytes to read in each chunk. Defaults to `1 * 1024 * 1024` (1mb).
- `separator`: Column separator in CSV file. Defaults to comma (`,`).
- `rowSeparator`: Row separator in CSV file. Defaults to line break (`\n`).


## CSV Writter

This will write a CSV file from the JSON objects passed to it.
It will use the properties in the first JSON object as the columns for the CSV file.
Any property not existing in the first passed object will not be added to the CSV file!

### Usage

```javascript
(async () => {
  const writter = new CSVWritter('output.csv', {/* options */);
  await writter.open();
  
  for (let i = 0; i < 100000; i++) {
    await writter.write({
      id: i,
      otherColumn: 'test',
    });
  }
  
  await writter.close();
})();
```

### Options

- `separator`: Column separator in CSV file. Defaults to comma (`,`).
- `rowSeparator`: Row separator in CSV file. Defaults to line break (`\n`).

以上是关于markdown CSV解析器的主要内容,如果未能解决你的问题,请参考以下文章

HyperDown.js 这个markdown解析器在浏览器中怎么使用

正则表达式之简易markdown文件解析器

Javascript中的Markdown解析器[关闭]

Google Adwords CSV 文件解析器

markdown 用Swift 2.0编写的markdown-ish解析器

Python使用Mistune对markdown自定义规则解析