DIY 一个 JSON解析器。
Posted bywayboy
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了DIY 一个 JSON解析器。相关的知识,希望对你有一定的参考价值。
从最早使用的C-JSON,性能没什么问题,缺点是最大只支持32层嵌套,不过可以通过修改宏来增加嵌套层数, 最近使用LUA开发服务器业务部分,找了几个JSON库,要么标准支持不尽人意,要么则是使用LUA实现的,性能无法满足要求。与其漫无目的在网络上到处寻找,不如自己动手写一个。反正这东西也不复杂。定了个简单的设计要求:
- 使用C语言开发,编译为LUA模块.
- 支持UTF-8 JSON 中一般是 \\uxxxx这类编码.
- 无限级对象数组嵌套.
- 支持object array string number 类型.
- 提供2个最简单的接口 json_encode json_decode
下面是代码
lua-json.h
#ifndef _LUA_JSON_PARSER_H
#define _LUA_JSON_PARSER_H
/*
License: MIT
Author: bywayboy<bywayboy@gmail.com>
Date: 2014-11-21
*/
int lua_json_decode(lua_State * L);
int lua_json_encode(lua_State * L);
#endif
lua-json.c
/*
License: MIT
Author: bywayboy<bywayboy@gmail.com>
Date: 2014-11-21
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lua.h"
#include "lauxlib.h"
#include "lua-json.h"
#include "automem.h"
enum
json_error_ok = 0,
json_error_bad_value,
json_error_bad_separator,
json_error_bad_character,
json_error_bad_string,
json_error_array_noclose,
json_error_object_noclose,
;
enum
lt_string,
lt_string_escape,
lt_string_escape_unicode,
lt_array_normal,
lt_array_sep,
lt_object_normal,
lt_object_sep, // :
lt_object_sep2, // ,
lt_object_value,
;
#define ch_is_space( x) (x == ' ') || (x == '\\t') || (x == '\\r') || (x == '\\n')
#define ch_is_hex( x ) \\
(('0'<=(x) && '9'>=(x)) || ('a'<=(x) && 'f'>=(x)) || ('A'<=(x) && 'F'>=(x)))
typedef struct jsontok jsontok_t;
typedef struct jsonenc jsonenc_t;
struct jsontok
lua_State * L;
char * jstr;
int pos,size;
int row;// line
int col; // charator
int err;
automem_t mem;numberx
;
struct jsonenc
lua_State * L;
automem_t mem;
int unescape_utf;
;
#define JSONTOK_NEWLINE( tok ) (tok)->row++; (tok)->col=(tok)->pos
static int _lua_json_parse_value(jsontok_t * tok);
static int _lua_json_parser_key(jsontok_t *tok)
int i=0;
char c;
while(tok->pos < tok->size)
c = tok->jstr[tok->pos+i];
//如果是合法的,继续下一个
if(('a'<=c && 'z'>=c) || ('A'<=c && 'Z'>=c) ) goto parser_key_continue; //-- 属性名称可以是字母
if(i==0 && '_'==c) goto parser_key_continue; //-- 属性名称可以是下划线开头.
if(i >0 && ('0'<=c && '9'>=c))goto parser_key_continue; //-- 属性名称中间是可以允许出现数字的.
// key 还是有数据的.
if(i >0)
lua_pushlstring(tok->L, &tok->jstr[tok->pos],i);
tok->pos+=i;
return 0;
break;
parser_key_continue:
i++;tok->pos;
continue;
tok->err = json_error_bad_character;
tok->pos = i;
return -1;
static int _lua_json_parse_string(jsontok_t * tok, const char quote)
char c;
int ps =tok->pos,s =lt_string;
if(0 == quote)
return _lua_json_parser_key(tok);
while(tok->pos < tok->size)
c = tok->jstr[tok->pos];
switch(s)
case lt_string:
switch(c)
case '\\n': tok->pos++; JSONTOK_NEWLINE(tok); break; // new line.
case '\\\\': tok->pos++; s=lt_string_escape;break; // switch to escape string.
case '\\'':case '"':
if(c != quote)
tok->pos++;automem_append_byte(&tok->mem,c);break;
//TODO: finish the string.
lua_pushlstring(tok->L, (char *)tok->mem.pdata,tok->mem.size);
tok->mem.size=0;
tok->pos++;
return 0;
default:
tok->pos++;
automem_append_byte(&tok->mem, c);
break;
break;
case lt_string_escape:
switch(c)
case 'b': automem_append_byte(&tok->mem, '\\b');tok->pos++;s=lt_string;break;
case 'n': automem_append_byte(&tok->mem, '\\n');tok->pos++;s=lt_string;break;
case 'r': automem_append_byte(&tok->mem, '\\r');tok->pos++;s=lt_string;break;
case 't': automem_append_byte(&tok->mem, '\\t');tok->pos++;s=lt_string;break;
case 'f': automem_append_byte(&tok->mem, '\\f');tok->pos++;s=lt_string;break;
case '\\\\': automem_append_byte(&tok->mem, '\\\\');tok->pos++;s=lt_string;break;
case '/': automem_append_byte(&tok->mem, '/');tok->pos++;s=lt_string;break;
case 'u':
s=lt_string_escape_unicode;
tok->pos++;
break;
default:
automem_append_byte(&tok->mem,'\\\\');automem_append_byte(&tok->mem,c);tok->pos++;s=lt_string;break;
break;
case lt_string_escape_unicode:
//--处理 4个字节的UNICODE
char * pt=&tok->jstr[tok->pos];
if(ch_is_hex(pt[0]) && ch_is_hex(pt[1]) && ch_is_hex(pt[2]) && ch_is_hex(pt[3]))
//是正确的 UNICODE.
unsigned short uni = (hex2byte((unsigned char *)&pt[0]) << 8) | hex2byte((unsigned char *)&pt[2]);
if(0x80 > uni) //1 bytes
automem_append_byte(&tok->mem, uni & 0x7F);
else if(0x800 > uni) //2 bytes
automem_append_byte(&tok->mem, (0xC0 | ((uni >> 6) & 0x3F)));
automem_append_byte(&tok->mem, 0x80 | (0x3F & uni));
else if(0x10000 > uni) //3 bytes
automem_append_byte(&tok->mem, (0xE0 | ((uni >> 12))));
automem_append_byte(&tok->mem, (0xC0 | ((uni >> 6) & 0x3F)));
automem_append_byte(&tok->mem, 0x80 | (0x3F & uni));
else if(0x110000 > uni) // 4 bytes.
automem_append_byte(&tok->mem, (0xF0 | ((uni >> 18) & 0x07)));
automem_append_byte(&tok->mem, (0xE0 | ((uni >> 12) & 0x3F)));
automem_append_byte(&tok->mem, (0xC0 | ((uni >> 6) & 0x3F)));
automem_append_byte(&tok->mem, 0x80 | (0x3F & uni));
tok->pos+=4;
s=lt_string;
break;
tok->pos++;
s=lt_string;
automem_append_voidp(&tok->mem,"\\\\u",2);automem_append_byte(&tok->mem,c);
break;
tok->err=json_error_bad_string;
return -1;
static int _lua_json_parse_array(jsontok_t * tok)
char c,s=lt_array_normal;
int i=0,r,idx=1;
lua_checkstack(tok->L, 5);
lua_newtable(tok->L);
while(tok->pos < tok->size)
c= tok->jstr[tok->pos];
switch(s)
case lt_array_normal:
switch(c)
case '\\n': tok->pos++; JSONTOK_NEWLINE(tok); break; // new line.
case '\\r':case '\\t':case ' ': tok->pos++; break; // skip space character
case ',': // double ,
tok->err = json_error_bad_separator;
return -1;
case '[':
tok->pos++;
if(0 > (r = _lua_json_parse_array(tok)))
return r;
s=lt_array_sep;
lua_rawseti(tok->L,-2,idx++);
break;
case ']':
return 0;
default:
if(0 > (r=_lua_json_parse_value(tok)))
lua_pop(tok->L,1);
return r;
s=lt_array_sep;
lua_rawseti(tok->L,-2,idx++);
break;
break;
case lt_array_sep:
switch(c)
case '\\n': tok->pos++; JSONTOK_NEWLINE(tok);break; // new line.
case '\\r':case '\\t':case ' ': tok->pos++;break; // skip space character
case ',': tok->pos++; s=lt_array_normal; break; // has to next array elm.
case ']': tok->pos++; return 0; // array parse filish.
default: //!!! got error .
tok->err = json_error_bad_character;
return -1;
break;
tok->err = json_error_array_noclose;
return -1;
static int _lua_json_parser_object(jsontok_t * tok)
char c,s = lt_object_normal;
int r;
lua_checkstack(tok->L, 5);
lua_newtable(tok->L);
while(tok->pos < tok->size)
c=tok->jstr[tok->pos];
switch(s)
case lt_object_normal: //解析属性名称的部分.
switch(c)
case '\\n': tok->pos++;JSONTOK_NEWLINE(tok);break;
case '\\r':case '\\t':case ' ':tok->pos++;break;
case '"':
case '\\'':
tok->pos++;
if(0 > (r= _lua_json_parse_string(tok, c)))
return r;
s=lt_object_sep;
break;
case '':
tok->pos++;
return 0;
default:
if('_'==c || ('a'<=c && 'z'>=c) || ('A'<=c && 'Z'>=c))
if(0 > (r= _lua_json_parse_string(tok, '\\0')))
return r;
s=lt_object_sep;
break;
tok->err = json_error_bad_character;
return -1;
break;
case lt_object_sep:
switch(c)
case '\\n': tok->pos++;JSONTOK_NEWLINE(tok);break;
case '\\r':case '\\t':case ' ':tok->pos++;break;
case ':':
tok->pos++;
s=lt_object_value;
break;
default:
tok->err = json_error_bad_character;
return -1;
break;
case lt_object_value:
if(0 > (r= _lua_json_parse_value(tok)))
return r;
lua_settable(tok->L, -3);
s=lt_object_sep2;
break;
case lt_object_sep2:
switch(c)
case '\\n': tok->pos++; JSONTOK_NEWLINE(tok);break; // new line.
case '\\r':case '\\t':case ' ': tok->pos++;break; // skip space character
case ',': tok->pos++; s=lt_object_normal; break; // has to next array elm.
case '': tok->pos++; return 0; // array parse filish.
default: //!!! got error .
tok->err = json_error_bad_character;
return -1;
break;
tok->err =json_error_object_noclose;
return -1;
static int _lua_json_parser_number(jsontok_t * tok)
char c;
int i=tok->pos ,isnum;
lua_Number num;
do
c = tok->jstr[i++];
if(('+'==c || '-' == c) || (c >='0' && c <='9') || (c >='a' && c <='f') || (c >='A' && c <='F') || ('.'==c || 'e'==c || 'E'==c || 'x'==c || 'X'==c))
continue;
break;
while(1);
lua_pushlstring(tok->L, &tok->jstr[tok->pos], i-tok->pos-1);
num = lua_tonumberx(tok->L, -1, &isnum);
if(isnum == 0)
tok->err = json_error_bad_value;
return -1;
;
lua_pushnumber(tok->L, num);
lua_replace(tok->L, -2);
tok->pos +=(i-tok->pos-1);
return 0;
// 解析器入口.
static int _lua_json_parse_value(jsontok_t * tok)
size_t i =0,r;
char c;
while(tok->pos < tok->size)
c = tok->jstr[tok->pos];
switch(c)
case '\\'':
case '"':
tok->pos++;
return _lua_json_parse_string(tok, c);
//puts(tok->jstr[tok->pos]);
break;
case '\\n':
tok->pos++;JSONTOK_NEWLINE(tok);
break;
case '\\r':case '\\t':case ' ': // space character.
tok->pos++;
break;
case '[':
tok->pos++;
return r = _lua_json_parse_array(tok);
break;
case '':
tok->pos++;
return _lua_json_parser_object(tok);
case 't':case 'T':
lua_pushboolean(tok->L,1);
tok->pos +=4;
return 0;
//maby is true.
break;
case 'f':case 'F':
lua_pushboolean(tok->L,0);
tok->pos +=5;
return 0;
// maby is false.
break;
case 'n':case 'N':
//maby is null.
lua_pushnil(tok->L);
tok->pos +=4;
return 0;
case '+':case '-':
case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
return _lua_json_parser_number(tok);
default:
tok->pos++;
break;
tok->err=json_error_bad_character;
return -1;
static const char * json_errstr(int v)
char * errstr = "unknown error";
switch(v)
case json_error_bad_value:
errstr="bad value";
break;
case json_error_bad_separator:
errstr="bad separator";
break;
case json_error_bad_character:
errstr="bad character";
break;
case json_error_bad_string:
errstr="bad string";
break;
case json_error_array_noclose:
errstr="array no close ']'";
break;
case json_error_object_noclose:
errstr="object no close ''";
break;
return errstr;
int lua_json_decode(lua_State * L)
size_t lstr;
int r,top;
const char * str = lua_tolstring(L, 1, &lstr);
if(NULL != str && lstr > 0)
jsontok_t tok;
tok.pos = tok.row=tok.col =0;
tok.jstr = (char *)str;
tok.size = lstr;
tok.err = 0;
tok.L = L;
top = lua_gettop(L);
automem_init(&tok.mem,128);
if(0 >(r = _lua_json_parse_value(&tok)))
lua_settop(L, top);
lua_pushboolean(L, 0);
lua_pushfstring(L,"json error: %s at line:%d col:%d", json_errstr(tok.err), tok.row,tok.pos-tok.col);
goto json_parser_final;
lua_pushboolean(L,1);
lua_insert(L,-2);
json_parser_final:
automem_uninit(&tok.mem);
return 2;
return 2;
static int json_encode_string(jsonenc_t * enc, char * vstr, int vlen)
int i=0;
unsigned char c,*utf8;
unsigned short ucs2;
automem_append_byte(&enc->mem,'"');
while(i < vlen)
c=vstr[i];
switch(c)
case '"':
i++;automem_append_voidp(&enc->mem,"\\\\\\"",2);break;
case '\\'':
i++;automem_append_voidp(&enc->mem,"\\\\\\'",2);break;
case '\\n':
i++;automem_append_voidp(&enc->mem,"\\\\\\n",2);break;
case '\\t':
i++;automem_append_voidp(&enc->mem,"\\\\\\t",2);break;
case '\\r':
i++;automem_append_voidp(&enc->mem,"\\\\\\r",2);break;
case '\\b':
i++;automem_append_voidp(&enc->mem,"\\\\\\b",2);break;
case '\\f':
i++;automem_append_voidp(&enc->mem,"\\\\\\f",2);break;
default:
if(enc->unescape_utf)
i++;automem_append_byte(&enc->mem,c);
break;
if(c < 0x80)
i++;automem_append_byte(&enc->mem,c);
else if( 0xC0== (c & 0xE0)) // 2 bytes.
utf8 = (unsigned char *)&vstr[i];
ucs2 = (*utf8 & 0x3F)<<6 | (*(utf8+1) & 0x3F);
automem_append_voidp(&enc->mem,"\\\\u",2);
automem_append_voidp(&enc->mem, byte2hex((ucs2 >> 8) & 0xFF),2);
automem_append_voidp(&enc->mem, byte2hex(ucs2 & 0xFF),2);
i+=2;
else if(0xE0 == (c & 0xF0))// 3 bytes.
utf8 = (unsigned char *)&vstr[i];
ucs2 = (*utf8 & 0x1F)<<12 | (*(utf8+1) & 0x3F)<<6 | (*(utf8+2) & 0x3F);
automem_append_voidp(&enc->mem,"\\\\u",2);
automem_append_voidp(&enc->mem, byte2hex((ucs2 >> 8) & 0xFF),2);
automem_append_voidp(&enc->mem, byte2hex(ucs2 & 0xFF),2);
i+=3;
else if(0xF0 == (c & 0xF8))// 4 bytes. !!!ucs2 out of range
i++;automem_append_byte(&enc->mem,c);
break;
automem_append_byte(&enc->mem,'"');
return enc->mem.size;
static int json_encode_value(jsonenc_t * enc)
int i=0,t=lua_type(enc->L, -1);
size_t lkey;
char s_close=']', * key;
lua_checkstack(enc->L, 5);
switch(t)
case LUA_TTABLE:
lua_pushnil(enc->L);
while(lua_next(enc->L, -2))
if(i == 0)
if(LUA_TSTRING == lua_type(enc->L, -2))
automem_append_byte(&enc->mem, '');
s_close = '';
key = (char *)luaL_tolstring(enc->L, -2, &lkey);
json_encode_string(enc, key,lkey);
lua_pop(enc->L, 1); //-- lua checklstring change stack
automem_append_byte(&enc->mem, ':');
json_encode_value(enc);
else
automem_append_byte(&enc->mem, '[');
json_encode_value(enc);
i++;
else
automem_append_byte(&enc->mem, ',');
if(s_close == '')
key = (char *)luaL_tolstring(enc->L, -2, &lkey);
json_encode_string(enc, key,lkey);
lua_pop(enc->L, 1); //-- lua checklstring change stack
automem_append_byte(&enc->mem, ':');
json_encode_value(enc);
else
json_encode_value(enc);
lua_pop(enc->L,1);// -- popup the value
if(0==i)
automem_append_byte(&enc->mem, '[');
automem_append_byte(&enc->mem, s_close);
break;
case LUA_TSTRING:
key = (char *)luaL_tolstring(enc->L, -1, &lkey);
json_encode_string(enc, key,lkey);
lua_pop(enc->L,1); //-- lua checklstring change stack
break;
case LUA_TNUMBER:
key = (char *)luaL_tolstring(enc->L, -1, &lkey);
automem_append_voidp(&enc->mem, key, lkey);
lua_pop(enc->L,1); //-- lua checklstring change stack
break;
case LUA_TBOOLEAN:
if(lua_toboolean(enc->L,-1))
automem_append_voidp(&enc->mem, "true",4);
else
automem_append_voidp(&enc->mem, "false",5);
break;
default:
automem_append_voidp(&enc->mem, "null",4);
break;
return 0;
int lua_json_encode(lua_State * L)
jsonenc_t enc;
char * str;
size_t lstr;
int v,t=lua_type(L, 1);
enc.L=L;
enc.unescape_utf = 0;
printf("arguments = %d\\n",lua_gettop(L));
if(lua_gettop(L) > 1)
enc.unescape_utf = lua_toboolean(L, 2);
switch(t)
case LUA_TTABLE:
enc.L = L;
automem_init(&enc.mem,128);
lua_pushvalue(L,1); // -- argument 1 to stack top.
json_encode_value(&enc);
lua_pushlstring(L,(char *)enc.mem.pdata, enc.mem.size);
automem_uninit(&enc.mem);
break;
case LUA_TSTRING:
automem_init(&enc.mem,128);
str = (char *)lua_tolstring(L,1,&lstr);
json_encode_string(&enc,str,lstr);
lua_pushlstring(L,(char *)enc.mem.pdata, enc.mem.size);
automem_uninit(&enc.mem);
break;
case LUA_TNUMBER:
str = (char *)luaL_tolstring(L, 1, &lstr);
lua_pushlstring(L, str, lstr);
break;
case LUA_TBOOLEAN:
v = lua_toboolean(L,1);
lua_pushstring(L, v?"true":"false");
break;
case LUA_TNIL:
case LUA_TNONE:
lua_pushstring(L,"null");
break;
default:
lua_pushstring(L,"null"); //-- 其它未知类型
break;
return 1;
以上是关于DIY 一个 JSON解析器。的主要内容,如果未能解决你的问题,请参考以下文章