c++ 高效解析url算法
Posted qianbo_insist
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了c++ 高效解析url算法相关的知识,希望对你有一定的参考价值。
协议解析url
用处
http协议,rtp协议,rtmp协议,rtsp中解析url,很多语言都有封装的解析URL的工具类库,在c++ 写的server的中如果需要解析url,需要写一个高效率的解析封装方法。这里使用c++,仅仅使用c++ STL 的string 类 以及c++的封装特性,解析尽量使用了c的方式,以便于改进。
比较和查找
在url中查找字符串,如?,&,等等
static inline int judge_equeal(const char *pos, const char *compare, size_t &clen)
{
for (size_t i = 0; i < clen; i++)
{
if (pos[i] != compare[i])
return -1;
}
return 0;
}
//这个函数返回位置长度
static inline int string_find(const char *u, const char *compare)
{
size_t clen = strlen(compare);
size_t ulen = strlen(u);
if (clen > ulen)
return -1;
const char *pos = u;
const char *posend = u + ulen - 1;
for (; pos <= posend - clen; pos++)
{
if (judge_equeal(pos, compare, clen) == 0)
{
return (int)(pos - u);
//return 0;
}
}
return -1;
}
//这个函数返回字符串位置
//如 abcdef def
| |
static inline const char* string_find_pos(const char *u, const char *compare)
{
size_t clen = strlen(compare);
size_t ulen = strlen(u);
if (clen > ulen)
return NULL;
const char *pos = u;
const char *posend = u + ulen - 1;
for (; pos <= posend - clen; pos++)
{
if (judge_equeal(pos, compare, clen) == 0)
{
return pos;
//return 0;
}
}
return NULL;
}
数据结构定义
数据结构中分别为协议,主机字符串,主机端口号,以及uri,如果需要param参数,如
http://aaa.com:8080/?x=123
求取x的值,需要调用GetParam函数,具体请看后面main函数sample
typedef struct UrlParam
{
string protocol;
string host;
unsigned short port = 80;
string uri;
void clear()
{
protocol.clear();
host.clear();
uri.clear();
port = 80;
}
}TUrlParam;
解析类封装
class TParseUrl
{
protected:
static int parse_domain(const char *pos, const char *posend, TUrlParam & param)
{
int point = string_find(pos, ":");
if(point>=0)
{
param.host = string(pos, point);
pos += point + 1;
string tmp = string(pos, posend - pos) ;
if(IsNumber(tmp.c_str()))
param.port = atoi(tmp.c_str());
return 0;
}
return -1;
}
static bool IsNumber(const char * num)
{
int length = (int)strlen(num);
for (int i = 0; i < length; i++)
{
if (i == 0 && (num[i] == '+' || num[i] == '-'))
{
if (length > 1)
continue;
return false;
}
if (!isdigit(num[i]))
return false;
}
return true;
}
public:
TParseUrl(const char * url) {
ParseUrl(url, v_param);
}
virtual ~TParseUrl() {};
TUrlParam v_param;
#define POS_JUDGE if(pos>=posend) return -1
#define POS_JUDGE_OK if(pos>=posend) return 0
static int ParseUrl(const char *url, TUrlParam ¶m)
{
//memset(¶m, 0, sizeof(param));
const char * posend = url + strlen(url) - 1;
param.uri = url;
const char * pos = url;
int point = 0;
if ((point = string_find(pos, "://")) >= 0)
{
param.protocol = string(url, point);
}
else
return -1;
pos += point + 3; //strlen("://")
POS_JUDGE;
if ((point = string_find(pos, "/")) >= 0)
{
param.host = string(pos, point);
const char *end = pos + point;
parse_domain(pos, end, param);
param.uri = string(pos + point + 1);
}
else
{
//the left all is domain
int hlen = (int)(posend - pos + 1);
param.host = string(pos,hlen);
const char *end = pos + hlen - 1;
parse_domain(pos, end, param);
param.uri = "/";
return 0;
}
return 0;
}
string GetParam(const char *param)
{
int point = -1;
const char *ustart = v_param.uri.c_str();
const char * start = string_find_pos(ustart, "?");
if (start != NULL)
{
++start;
//?a=abc&b=ddd
string par = param;
par +="=";
start = string_find_pos(start, par.c_str());
if (start != NULL )
{
const char * j = start - 1;
char c = *j;
if (c == '&' || c == '?')
{
start += par.length();
const char * end = string_find_pos(start, "&");
if (end != NULL)
{
return string(start, end);
}
return string(start);
}
}
}
return "";
}
void SetUrl(const char *url)
{
v_param.clear();
ParseUrl(url, v_param);
}
};
调用
在GetParam的时候,如果没有
if (c == ‘&’ || c == ‘?’)
的判断是不行的,为了加快判决我们直接是模式匹配,但是有如 “abc=” 如果不小心把“c=” 的模式配置在url中查找到就去取值是不对的,判断前一个字节是?或者& 是明智的做法。
int main()
{
cout << "======================================================"<<endl;
const char * url = "rtp://234.5.6.7:8000/live/1000/s1?a=abc&b=ddd";
cout << url << endl;
TParseUrl parse(url);
cout << parse.v_param.protocol << endl;
cout << parse.v_param.host<< endl;
cout << parse.v_param.port << endl;
cout << parse.v_param.uri << endl;
cout<< parse.GetParam("a")<<endl;
cout << parse.GetParam("b") << endl;
cout << endl << endl;
cout << "======================================================"<<endl;
url = "https://127.0.0.1:9001/abc/qianbo/ss?abc=qianbo&abcd=test";
cout << url << endl;
parse.SetUrl(url);
cout << parse.v_param.protocol << endl;
cout << parse.v_param.host << endl;
cout << parse.v_param.port << endl;
cout << parse.v_param.uri << endl;
cout << parse.GetParam("c") << endl;
cout << parse.GetParam("d") << endl;
cout << parse.GetParam("abc") << endl;
cout << parse.GetParam("abcd") << endl;
}
测试
花了一小段时间写这个解析,希望有使用的人提出bug,留言。
全部代码,复制可用
/*
Author:钱波
email: 418511899@qq.com
wei: 18091589062
func :类
time: 2021年6月9日
*/
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include <string>
#include <iostream>
using namespace std;
static inline int judge_equeal(const char *pos, const char *compare, size_t &clen)
{
for (size_t i = 0; i < clen; i++)
{
if (pos[i] != compare[i])
return -1;
}
return 0;
}
static inline int string_find(const char *u, const char *compare)
{
size_t clen = strlen(compare);
size_t ulen = strlen(u);
if (clen > ulen)
return -1;
const char *pos = u;
const char *posend = u + ulen - 1;
for (; pos <= posend - clen; pos++)
{
if (judge_equeal(pos, compare, clen) == 0)
{
return (int)(pos - u);
//return 0;
}
}
return -1;
}
static inline const char* string_find_pos(const char *u, const char *compare)
{
size_t clen = strlen(compare);
size_t ulen = strlen(u);
if (clen > ulen)
return NULL;
const char *pos = u;
const char *posend = u + ulen - 1;
for (; pos <= posend - clen; pos++)
{
if (judge_equeal(pos, compare, clen) == 0)
{
return pos;
//return 0;
}
}
return NULL;
}
typedef struct UrlParam
{
string protocol;
string host;
unsigned short port = 80;
string uri;
void clear()
{
protocol.clear();
host.clear();
uri.clear();
port = 80;
}
}TUrlParam;
class TParseUrl
{
protected:
static int parse_domain(const char *pos, const char *posend, TUrlParam & param)
{
int point = string_find(pos, ":");
if(point>=0)
{
param.host = string(pos, point);
pos += point + 1;
string tmp = string(pos, posend - pos) ;
if(IsNumber(tmp.c_str()))
param.port = atoi(tmp.c_str());
return 0;
}
return -1;
}
static bool IsNumber(const char * num)
{
int length = (int)strlen(num);
for (int i = 0; i < length; i++)
{
if (i == 0 && (num[i] == '+' || num[i] == '-'))
{
if (length > 1)
continue;
return false;
}
if (!isdigit(num[i]))
return false;
}
return true;
}
public:
TParseUrl(const char * url) {
ParseUrl(url, v_param);
}
virtual ~TParseUrl() {};
TUrlParam v_param;
#define POS_JUDGE if(pos>=posend) return -1
#define POS_JUDGE_OK if(pos>=posend) return 0
static int ParseUrl(const char *url, TUrlParam ¶m)
{
//memset(¶m, 0, sizeof(param));
const char * posend = url + strlen(url) - 1;
param.uri = url;
const char * pos = url;
int point = 0;
if ((point = string_find(pos, "://")) >= 0)
{
param.protocol = string(url, point);
}
else
return -1;
pos += point + 3; //strlen("://")
POS_JUDGE;
if ((point = string_find(pos, "/")) >= 0)
{
param.host = string(pos, point);
const char *end = pos + point;
parse_domain(pos, end, param);
param.uri以上是关于c++ 高效解析url算法的主要内容,如果未能解决你的问题,请参考以下文章
华为OD机试 - 热点网络统计(JavaScript) | 机试题+算法思路+考点+代码解析 2023