URL 解析极简版
Posted 车斗
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了URL 解析极简版相关的知识,希望对你有一定的参考价值。
URL 解析极简版
网上找了很多,要么太复杂,要么有BUG。我写了一个解析URL的代码,从头到尾不分配内存,只有一个头文件:urlcodec.h。
/**
* @file urlcodec.h
* @brief url encode and decode
* https://www.bejson.com/enc/urlencode/
* @author zhang
* @version
* @date 2021
* @note
* @since 2015
*/
#ifndef URL_CODEC_INCLUDED
#define URL_CODEC_INCLUDED
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// #include <assert.h>
#ifdef __cplusplus
extern "C"
#endif
static int url_param_print(int paramid, void *cbarg, int namelen, const char *name, int valuelen, const char *value)
printf(" :param%d %.*s=%.*s\\n", paramid, namelen, name, valuelen, value);
/* 0: break, 1: continue */
return 1;
/**
* const char testUrl[] = "https://cn.bing.com/search?q=news&form=QBLH&sp=-1&pq=news&sc=8-4&qs=n&sk=&cvid=AFDF6C399DF44C41977CB0ED4AE87B33";
*
* char encodedUrl[256];
* char decodedUrl[256];
*
* size_t outsize = url_encode(testUrl, encodedUrl, sizeof(encodedUrl));
* if (outsize < sizeof(encodedUrl))
* printf("success url_encode(): %s\\n", encodedUrl);
*
* outsize = url_decode(encodedUrl, decodedUrl, sizeof(decodedUrl));
* if (outsize < sizeof(decodedUrl))
* printf("success url_decode(): %s\\n", decodedUrl);
*
* if (memcmp(testUrl, decodedUrl, outsize))
* printf("fatal error: urlcodec.h has bugs!\\n");
*
* else
* printf("failed url_decode: required outbuf at least: %d bytes\\n", (int)outsize);
*
* else
* printf("failed url_encode: required outbuf at least: %d bytes\\n", (int)outsize);
*
*/
static size_t url_encode(const char *url, char *outbuf, size_t bufsize)
static const char hex_char_table[] = "0123456789ABCDEF";
char *p = (char *) url;
char *buf = outbuf;
size_t blen = 0;
while (*p)
if (isalnum(*p) || *p == '-' || *p == '_' || *p == '.' || *p == '~')
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf++ = *p;
else if (*p == ' ')
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf++ = '+';
else
blen += 3;
if ((size_t)(buf - outbuf + 2) < bufsize)
*buf++ = '%';
*buf++ = hex_char_table[(*p >> 4) & 15];
*buf++ = hex_char_table[(*p & 15) & 15];
p++;
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf = '\\0';
return blen;
static size_t url_decode(const char *encurl, char *outbuf, size_t bufsize)
#define url_decode_from_hex(ch) ((char)(isdigit(ch)? ((ch) - '0') : (tolower(ch) - 'a' + 10)))
char *p = (char *) encurl;
char *buf = outbuf;
size_t blen = 0;
while (*p)
if (*p == '%')
if (p[1] && p[2])
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf++ = url_decode_from_hex(p[1]) << 4 | url_decode_from_hex(p[2]);
p += 2;
else if (*p == '+')
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf++ = ' ';
else
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf++ = *p;
p++;
blen++;
if ((size_t)(buf - outbuf) < bufsize)
*buf = '\\0';
return blen;
#undef url_decode_from_hex
/**
* protocol://user:password@host:port/directory/file.extension
* https://john:hello@www.aspxfans.com:8080/world/news/index.asp?boardID=5&ID=24618&page=1#name
*/
typedef struct
char *protocol; /* [mandatory] https */
int protocollen; /* 5 */
char *user; /* <optional> john */
int userlen; /* 4 */
char *password; /* <optional> hello */
int passwordlen;
char *host; /* [mandatory] www.aspxfans.com */
int hostlen;
char *port; /* <optional> 8080 */
int portlen;
char *path; /* <optional> /world/news/ */
int pathlen;
char *file; /* <optional> index.asp */
int filelen;
char *query; /* <optional> boardID=5&ID=24618&page=1 */
int querylen;
char *fragment; /* <optional> name */
int fragmentlen;
url_parts_t;
/**
* URL Specification: https://blog.csdn.net/fjb2080/article/details/80552213
*/
static int url_parse(const char *url, url_parts_t *parts)
char *p;
memset(parts, 0, sizeof(url_parts_t));
parts->protocol = (char *)url;
p = parts->protocol;
while (*p)
if (*p == ':' && p[1] == '/' && p[2] == '/')
parts->protocollen = (int)(p - parts->protocol);
parts->user = &p[3];
parts->host = parts->user;
break;
p++;
if (!parts->protocollen)
return -1;
p = parts->user;
while (*p)
if (*p == '@')
parts->host = &p[1];
p = parts->user;
while (p < parts->host)
if (*p == ':')
parts->password = &p[1];
parts->passwordlen = (int)(parts->host - parts->password - 1);
break;
else
parts->userlen++;
p++;
break;
p++;
p = parts->host;
while (*p)
if (*p == '/')
parts->path = p;
p = parts->host;
while (p < parts->path)
if (*p == ':')
parts->port = &p[1];
parts->portlen = (int)(parts->path - parts->port);
break;
else
parts->hostlen++;
p++;
break;
p++;
if (!parts->hostlen)
parts->hostlen = (int)(p - parts->host);
return parts->hostlen? 0 : -1;
p = parts->path;
while (*p++)
if (*p == '/')
parts->file = &p[1];
parts->pathlen = (int)(p - parts->path + 1);
if (! parts->pathlen)
parts->file = parts->path;
// assert(*parts->file == '/');
parts->file++;
p = parts->file;
while (*p)
if (*p == '?')
parts->query = &p[1];
break;
else if (*p == '#')
parts->fragment = &p[1];
break;
else
parts->filelen++;
p++;
if (!parts->query && !parts->fragment)
return 0;
p = parts->query;
while(p && *p)
if (*p == '#')
parts->fragment = &p[1];
break;
parts->querylen++;
p++;
if (parts->fragment)
p = parts->fragment;
while(*p++)
parts->fragmentlen++;
return 0;
/**
* url_parts_t parts;
* if (url_parse("https://john:hello@www.aspxfans.com:8080/world/news/index.asp?boardID=5&ID=24618&page=1#name", &parts) == 0)
* // parse success
* if (parts.querylen)
* // "boardID=5&ID=24618&page=1"
* url_query_parse(parts.query, parts.querylen, url_param_print, NULL);
*
*
*/
static int url_query_parse(const char *query, int querylen, int (*url_param_cb)(int, void *, int, const char *, int, const char *), void *cbarg)
int params = 0;
char *p = (char *) query;
const char *end = &query[querylen];
char *name = p;
int namelen = 0;
char *value = NULL;
int valuelen = 0;
while (p != end)
if (*p == '=')
value = &p[1];
valuelen = 0;
namelen = (int)(value - name - 1);
else if (*p == '&')
if (value)
valuelen = (int)(p - value);
if (namelen && valuelen)
params++;
if (! url_param_cb(params, cbarg, namelen, name, valuelen, value))
return params;
name = &p[1];
namelen = 0;
value = NULL;
valuelen = 0;
p++;
if (value)
valuelen = (int)(end - value);
if (namelen && valuelen)
params++;
if (url_param_cb)
if (! url_param_cb(params, cbarg, namelen, name, valuelen, value))
return params;
return params;
static void url_parts_print(const url_parts_t *parts)
printf("url=%s\\n", parts->protocol);
printf(" .%-10s %.*s\\n", "protocol", parts->protocollen, parts->protocol);
printf(" .%-10s %.*s\\n", "user", parts->userlen, parts->user);
printf(" .%-10s %.*s\\n", "password", parts->passwordlen, parts->password);
printf(" .%-10s %.*s\\n", "host", parts->hostlen, parts->host);
printf(" .%-10s %.*s\\n", "port", parts->portlen, parts->port);
printf(" .%-10s %.*s\\n", "path", parts->pathlen, parts->path);
printf(" .%-10s %.*s\\n", "file", parts->filelen, parts->file);
printf(" .%-10s %.*s\\n", "fragment", parts->fragmentlen, parts->fragment);
printf(" .%-10s %.*s\\n", "query", parts->querylen, parts->query);
if (parts->querylen)
url_query_parse(parts->query, parts->querylen, url_param_print, NULL);
#ifdef __cplusplus
#endif
#endif /* URL_CODEC_INCLUDED */
以上是关于URL 解析极简版的主要内容,如果未能解决你的问题,请参考以下文章