ruby 一个简单而不完整的JSON解析器,受到okjson.rb的启发
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby 一个简单而不完整的JSON解析器,受到okjson.rb的启发相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env ruby
module PJ
module Parser
extend self
def decode(string)
tokens = lex(string)
raise ParseError, "too few tokens" if tokens.length < 2
obj, tokens = parse_tokens(tokens)
raise ParseError, 'trailing garbage' if tokens.length > 0
obj
end
def parse_tokens(tokens)
case tokens[0][0]
when '['
parse_array(tokens)
when '{'
parse_object(tokens)
end
end
def parse_value(tokens)
raise ParseError, "empty tokens" if tokens.empty?
type, lexeme, value = tokens[0]
case type
when '{' then parse_object(tokens)
when '[' then parse_array(tokens)
when :value, :string then [value, tokens[1..-1]]
else
raise ParseError, "unexpected #{tokens[0].inspect}"
end
end
def parse_array(tokens)
tokens = eat('[', tokens)
array = []
raise ParseError, "imbalance [] pairs" if tokens.empty?
if tokens[0][0] == ']'
return array, tokens[1..-1]
end
value, tokens = parse_value(tokens)
array << value
if tokens[0][0] == ']'
return array, tokens[1..-1]
end
while tokens.length > 0
tokens = eat(',', tokens)
value, tokens = parse_value(tokens)
array << value
if tokens[0][0] == ']'
return array, tokens[1..-1]
end
end
[array, tokens]
end
def parse_object(tokens)
tokens = eat('{', tokens)
object = {}
raise ParseError, "imbalanced {} pairs" if tokens.empty?
if tokens[0][0] == '}'
return object, tokens[1..-1]
end
key, value, tokens = parse_pair(tokens)
object[key] = value
if tokens[0][0] == '}'
return object, tokens[1..-1]
end
while tokens.length > 0
tokens = eat(',', tokens)
key, value, tokens = parse_pair(tokens)
object[key] = value
if tokens[0][0] == '}'
return object, tokens[1..-1]
end
end
[object, tokens]
end
def parse_pair(tokens)
raise ParseError, "object key is not string: #{tokens[0].inspect}" unless tokens[0][0] == :string
key, tokens = parse_value(tokens)
tokens = eat(':', tokens)
value, tokens = parse_value(tokens)
[key, value, tokens]
end
def eat(type, tokens)
raise ParseError, "expecting #{type}, got #{tokens[0].inspect}" unless tokens[0][0] == type
tokens[1..-1]
end
def lex(string)
tokens = []
while string.length > 0
type, lexeme, value = tokenize(string)
raise ParseError, "invalid token at #{string[0,10].inspect}" if type.nil?
unless type == :space
tokens << [type, lexeme, value]
end
string = string[lexeme.length..-1]
end
tokens
end
def tokenize(string)
case string[0]
when *%w{ [ ] { } : , }
[string[0]] * 3
when '"'
tokenize_string(string)
when /\d/
tokenize_number(string)
when " ", "\t", "\r", "\n"
[:space, string[0], string[0]]
when 'n'
raise ParseError, "unrecognized token at: #{string[0,10].inspect}" unless string[0,4] == 'null'
[:value, 'null', nil]
when 'f'
raise ParseError, "unrecognized token at: #{string[0,10].inspect}" unless string[0,5] == 'false'
[:value, 'false', false]
when 't'
raise ParseError, "unrecognized token at: #{string[0,10].inspect}" unless string[0,4] == 'true'
[:value, 'true', true]
else
raise ParseError, "unrecognized token at: #{string[0,10].inspect}"
end
end
def tokenize_string(quoted_string)
m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(quoted_string)
if ! m
raise Error, "invalid string literal at #{quoted_string[0,10].inspect}"
end
[:string, m[0], unquote(m[0])]
end
# incomplete
def unquote(quoted_string)
quoted_string[1...-1]
end
# from okjson
def tokenize_number(string)
m = /(-?(?:[1-9][0-9]+|[0-9]))([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(string)
if m && m.begin(0) == 0
if !m[2] && !m[3]
[:value, m[0], Integer(m[0])]
elsif m[2]
[:value, m[0], Float(m[0])]
else
[:value, m[0], Integer(m[1])*(10**m[3][1..-1].to_i(10))]
end
else
[]
end
end
end
class ParseError < StandardError; end
end
require 'minitest/autorun'
class TestParser < Minitest::Test
def test_empty_array
assert_equal [], PJ::Parser.decode('[]')
end
def test_simple_array
assert_equal [1], PJ::Parser.decode('[1]')
end
def test_simple_array_2
assert_equal ["foo"], PJ::Parser.decode('["foo"]')
end
def test_array_of_multi_items
assert_equal [1, "foo"], PJ::Parser.decode('[1, "foo"]')
end
def test_space
assert_equal ["foo"], PJ::Parser.decode(%{[ "foo" \t \n \r ]})
end
def test_null
assert_equal [nil], PJ::Parser.decode(%{[ null ]})
end
def test_null_2
assert_equal ["null"], PJ::Parser.decode(%{[ "null" ]})
end
def test_true
assert_equal [true], PJ::Parser.decode(%{[ true ]})
end
def test_false
assert_equal [false], PJ::Parser.decode(%{[ false ]})
end
def test_empty_object
assert_equal Hash.new, PJ::Parser.decode('{}')
end
def test_simple_object
assert_equal Hash[{ "foo" => "bar" }], PJ::Parser.decode('{ "foo" : "bar" }')
end
def test_object_of_multi_pairs
assert_equal Hash[{ "foo" => nil, "bar" => false }], PJ::Parser.decode('{ "foo" : null, "bar" : false }')
end
def test_single_quote
assert_raises(PJ::ParseError) { PJ::Parser.decode(%{ { "foo" : 'bar' } }) }
end
def test_simple_nesting
assert_equal [{}], PJ::Parser.decode('[{}]')
end
def test_simple_nesting_2
assert_equal Hash[{ "foo" => [] }], PJ::Parser.decode('{ "foo" : [] }')
end
def test_bad_object_key
assert_raises(PJ::ParseError) { PJ::Parser.decode('{ 1 : "bar" }') }
end
def test_not_closed_array
assert_raises(PJ::ParseError) { PJ::Parser.decode('[') }
end
def test_not_closed_array_2
assert_raises(PJ::ParseError) { PJ::Parser.decode('[[') }
end
def test_not_closed_object
assert_raises(PJ::ParseError) { PJ::Parser.decode('{') }
end
def test_not_closed_object_2
assert_raises(PJ::ParseError) { PJ::Parser.decode('{{') }
end
end
以上是关于ruby 一个简单而不完整的JSON解析器,受到okjson.rb的启发的主要内容,如果未能解决你的问题,请参考以下文章