ruby 一个简单而不完整的JSON解析器,受到okjson.rb的启发

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby 一个简单而不完整的JSON解析器,受到okjson.rb的启发相关的知识,希望对你有一定的参考价值。

#!/usr/bin/env ruby

module PJ
  module Parser
    extend self

    def decode(string)
      tokens = lex(string)

      raise ParseError, "too few tokens" if tokens.length < 2

      obj, tokens = parse_tokens(tokens)

      raise ParseError, 'trailing garbage' if tokens.length > 0

      obj
    end

    def parse_tokens(tokens)
      case tokens[0][0]
      when '['
        parse_array(tokens)
      when '{'
        parse_object(tokens)
      end
    end

    def parse_value(tokens)
      raise ParseError, "empty tokens" if tokens.empty?

      type, lexeme, value = tokens[0]

      case type
      when '{' then parse_object(tokens)
      when '[' then parse_array(tokens)
      when :value, :string then [value, tokens[1..-1]]
      else
        raise ParseError, "unexpected #{tokens[0].inspect}"
      end
    end

    def parse_array(tokens)
      tokens = eat('[', tokens)
      array = []

      raise ParseError, "imbalance [] pairs" if tokens.empty?

      if tokens[0][0] == ']'
        return array, tokens[1..-1]
      end

      value, tokens = parse_value(tokens)
      array << value

      if tokens[0][0] == ']'
        return array, tokens[1..-1]
      end

      while tokens.length > 0
        tokens = eat(',', tokens)

        value, tokens = parse_value(tokens)
        array << value

        if tokens[0][0] == ']'
          return array, tokens[1..-1]
        end
      end

      [array, tokens]
    end

    def parse_object(tokens)
      tokens = eat('{', tokens)
      object = {}

      raise ParseError, "imbalanced {} pairs" if tokens.empty?

      if tokens[0][0] == '}'
        return object, tokens[1..-1]
      end

      key, value, tokens = parse_pair(tokens)
      object[key] = value

      if tokens[0][0] == '}'
        return object, tokens[1..-1]
      end

      while tokens.length > 0
        tokens = eat(',', tokens)

        key, value, tokens = parse_pair(tokens)
        object[key] = value

        if tokens[0][0] == '}'
          return object, tokens[1..-1]
        end
      end

      [object, tokens]
    end

    def parse_pair(tokens)
      raise ParseError, "object key is not string: #{tokens[0].inspect}" unless tokens[0][0] == :string

      key, tokens = parse_value(tokens)

      tokens = eat(':', tokens)

      value, tokens = parse_value(tokens)

      [key, value, tokens]
    end

    def eat(type, tokens)
      raise ParseError, "expecting #{type}, got #{tokens[0].inspect}" unless tokens[0][0] == type

      tokens[1..-1]
    end

    def lex(string)
      tokens = []
      while string.length > 0
        type, lexeme, value = tokenize(string)

        raise ParseError, "invalid token at #{string[0,10].inspect}" if type.nil?

        unless type == :space
          tokens << [type, lexeme, value]
        end

        string = string[lexeme.length..-1]
      end

      tokens
    end

    def tokenize(string)
      case string[0]
      when *%w{ [ ] { } : , }
        [string[0]] * 3
      when '"'
        tokenize_string(string)
      when /\d/
        tokenize_number(string)
      when " ", "\t", "\r", "\n"
        [:space, string[0], string[0]]
      when 'n'
        raise ParseError, "unrecognized token at: #{string[0,10].inspect}" unless string[0,4] == 'null'
        [:value, 'null', nil]
      when 'f'
        raise ParseError, "unrecognized token at: #{string[0,10].inspect}" unless string[0,5] == 'false'
        [:value, 'false', false]
      when 't'
        raise ParseError, "unrecognized token at: #{string[0,10].inspect}" unless string[0,4] == 'true'
        [:value, 'true', true]
      else
        raise ParseError, "unrecognized token at: #{string[0,10].inspect}"
      end
    end

    def tokenize_string(quoted_string)
      m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(quoted_string)
      if ! m
        raise Error, "invalid string literal at #{quoted_string[0,10].inspect}"
      end
      [:string, m[0], unquote(m[0])]
    end

    # incomplete
    def unquote(quoted_string)
      quoted_string[1...-1]
    end

    # from okjson
    def tokenize_number(string)
      m = /(-?(?:[1-9][0-9]+|[0-9]))([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(string)
      if m && m.begin(0) == 0
        if !m[2] && !m[3]
          [:value, m[0], Integer(m[0])]
        elsif m[2]
          [:value, m[0], Float(m[0])]
        else
          [:value, m[0], Integer(m[1])*(10**m[3][1..-1].to_i(10))]
        end
      else
        []
      end
    end
  end

  class ParseError < StandardError; end
end

require 'minitest/autorun'

class TestParser < Minitest::Test
  def test_empty_array
    assert_equal [], PJ::Parser.decode('[]')
  end

  def test_simple_array
    assert_equal [1], PJ::Parser.decode('[1]')
  end

  def test_simple_array_2
    assert_equal ["foo"], PJ::Parser.decode('["foo"]')
  end

  def test_array_of_multi_items
    assert_equal [1, "foo"], PJ::Parser.decode('[1, "foo"]')
  end

  def test_space
    assert_equal ["foo"], PJ::Parser.decode(%{[  "foo" \t \n \r  ]})
  end

  def test_null
    assert_equal [nil], PJ::Parser.decode(%{[ null ]})
  end

  def test_null_2
    assert_equal ["null"], PJ::Parser.decode(%{[ "null" ]})
  end

  def test_true
    assert_equal [true], PJ::Parser.decode(%{[ true ]})
  end

  def test_false
    assert_equal [false], PJ::Parser.decode(%{[ false ]})
  end

  def test_empty_object
    assert_equal Hash.new, PJ::Parser.decode('{}')
  end

  def test_simple_object
    assert_equal Hash[{ "foo" => "bar" }], PJ::Parser.decode('{ "foo" : "bar" }')
  end

  def test_object_of_multi_pairs
    assert_equal Hash[{ "foo" => nil, "bar" => false }], PJ::Parser.decode('{ "foo" : null, "bar" : false }')
  end

  def test_single_quote
    assert_raises(PJ::ParseError) { PJ::Parser.decode(%{ { "foo" : 'bar' } }) }
  end

  def test_simple_nesting
    assert_equal [{}], PJ::Parser.decode('[{}]')
  end

  def test_simple_nesting_2
    assert_equal Hash[{ "foo" => [] }], PJ::Parser.decode('{ "foo" : [] }')
  end

  def test_bad_object_key
    assert_raises(PJ::ParseError) { PJ::Parser.decode('{ 1 : "bar" }') }
  end

  def test_not_closed_array
    assert_raises(PJ::ParseError) { PJ::Parser.decode('[') }
  end

  def test_not_closed_array_2
    assert_raises(PJ::ParseError) { PJ::Parser.decode('[[') }
  end

  def test_not_closed_object
    assert_raises(PJ::ParseError) { PJ::Parser.decode('{') }
  end

  def test_not_closed_object_2
    assert_raises(PJ::ParseError) { PJ::Parser.decode('{{') }
  end
end

以上是关于ruby 一个简单而不完整的JSON解析器,受到okjson.rb的启发的主要内容,如果未能解决你的问题,请参考以下文章

Ruby命令行选项解析器

ruby Ruby中的简单命令行解析器

在 Ruby 中解析 JSON 字符串

在 Ruby 中实现贝叶斯分类器?

springboot可以返回string而不走视图解析器吗

JObject 解析器停止执行而不给出任何输出或错误