使用 Winsock 接收分块的 HTTP 数据
Posted
技术标签:
【中文标题】使用 Winsock 接收分块的 HTTP 数据【英文标题】:Receiving Chunked HTTP Data With Winsock 【发布时间】:2011-11-06 04:05:56 【问题描述】:我在使用 winsock 读取一些分块的 HTTP 响应数据时遇到问题。 我发送请求很好并得到以下回复:
HTTP/1.1 200 OK
Server: LMAX/1.0
Content-Type: text/xml; charset=utf-8
Transfer-Encoding: chunked
Date: Mon, 29 Aug 2011 16:22:19 GMT
使用winsock recv。然而,此时它只是挂起。我让侦听器在无限循环中运行,但没有收到任何内容。
我认为这是一个 C++ 问题,但也可能与我通过 stunnel 推送连接以将其包裹在 HTTPS 中的事实有关。我有一个使用 C# 中的一些库的测试应用程序,它可以通过 stunnel 完美运行。我很困惑为什么我的循环在初始接收后没有接收到 C++ 分块数据。
这是有问题的循环...在上面的分块 ok 响应之后调用...
while(true)
recvBuf= (char*)calloc(DEFAULT_BUFLEN, sizeof(char));
iRes = recv(ConnectSocket, recvBuf, DEFAULT_BUFLEN, 0);
cout << WSAGetLastError() << endl;
cout << "Recv: " << recvBuf << endl;
if (iRes==SOCKET_ERROR)
cout << recvBuf << endl;
err = WSAGetLastError();
wprintf(L"WSARecv failed with error: %d\n", err);
break;
有什么想法吗?
【问题讨论】:
我建议您将代码更改为不在循环中分配,否则您会泄漏内存,一次一个 DEFAULT_BUFLEN。另外,循环的停止条件是什么?您是否有可能在到达recv
之前消耗了数据?
是的,我知道它正在泄漏内存,但我现在对此并不太在意。我可以轻松地将其切换到 memset。每次接收后我都会进行打印,这表明数据永远不会到达。
如果您在此位之前发布代码可能会有所帮助,以查看您是否不小心使用了数据。另请注意,如果recv
返回错误,则永远不会修改recvBuf
,因此打印毫无意义。
cout
【参考方案1】:
您需要更改阅读代码。您无法像尝试那样使用固定长度的缓冲区读取chunked
数据。数据以可变长度的块发送,其中每个块都有一个标头,以字节为单位指定块的实际长度,数据的最终块的长度为 0。您需要读取分块的标头才能正确处理块。请阅读RFC 2616 Section 3.6.1。你的逻辑需要更像下面的伪代码:
send request;
status = recv() a line of text until CRLF;
parse status as needed;
response-code = extract response-code from status;
response-version = extract response-version from status;
do
line = recv() a line of text until CRLF;
if (line is blank)
break;
store line in headers list;
while (true);
parse headers list as needed;
if ((response-code is not in [1xx, 204, 304]) and (request was not "HEAD"))
if (Transfer-Encoding header is present and not "identity")
do
line = recv a line of text until CRLF;
length = extract length from line;
extensions = extract extensions from line;
process extensions as needed; // optional
if (length == 0)
break;
recv() length number of bytes into destination buffer;
recv() and discard bytes until CRLF;
while (true);
do
line = recv a line of text until CRLF;
if (line is blank)
break;
store line in headers list as needed;
while (true);
re-parse headers list as needed;
else if (Content-Length header is present)
recv() Content-Length number of bytes into destination buffer;
else if (Content-Type header starts with "multipart/")
boundary = extract boundary from Content-Type's "boundary" attribute;
recv() data into destination buffer until MIME termination boundary is reached;
else
recv() data into destination buffer until disconnected;
if (not disconnected)
if (response-version is "HTTP/1.1")
if (Connection header is "close")
close connection;
else
if (Connection header is not "keep-alive")
close connection;
check response-code for errors;
process destination buffer, per info in headers list;
【讨论】:
【参考方案2】:确实,您没有收到分块,但内容是分块的。您必须为自己绘制一张图片,您收到的任何缓冲区可能看起来如何。这不像你当时收到一大块。有时你有前一个块的一些数据,表示新块大小的行,后面是一些块数据。其他时候,您只收到一点块数据。另一次是一些块数据和表示新块的行的一部分等。想象最坏的情况,这并不容易。阅读:http://www.jmarshall.com/easy/http/
在您可以使用以下代码之前接收所有标题,直到空行。缓冲区中内容开始的位置是nContentStart
。该代码使用了一些我无法共享的内部类,但您应该明白这一点;)据我测试,它按预期工作并且不会泄漏内存。虽然这并不容易,但我不能完全确定!
if (bChunked)
int nOffset = nContentStart;
int nChunkLen = 0;
int nCopyLen;
while (true)
if (nOffset >= nDataLen)
pData->SetSize(0); Close(); ASSERTRETURN(false);
// copy data of previous chunk to caller's buffer
if (nChunkLen > 0)
nCopyLen = min(nChunkLen, nDataLen - nOffset);
n = pData->GetSize();
pData->SetSize(n + nCopyLen);
memcpy(pData->GetPtr() + n, buf.GetPtr() + nOffset, nCopyLen);
nChunkLen -= nCopyLen;
ASSERT(nChunkLen >= 0);
nOffset += nCopyLen;
if (nChunkLen == 0)
nOffset += strlen(lpszLineBreak);
ASSERT(nOffset <= nDataLen);
// when previous chunk is copied completely, process new chunk
if (nChunkLen == 0 && nOffset < nDataLen)
// chunk length is specified on first line
p1 = buf.GetPtr() + nOffset;
p2 = strstr(p1, lpszLineBreak);
while (!p2) // if we can't find the line break receive more data until we do
buf.SetSize(nDataLen + RECEIVE_BUFFER_SIZE + 1);
nReceived = m_socket.Receive((BYTE*)buf.GetPtr() + nDataLen, RECEIVE_BUFFER_SIZE);
if (nReceived == -1)
pData->SetSize(0); Close(); ASSERTRETURN(false); // connection error
if (nReceived == 0)
pData->SetSize(0); Close(); ASSERTRETURN(false); // all data already received but did not find line break
nDataLen += nReceived;
buf[nDataLen] = 0;
p1 = buf.GetPtr() + nOffset; // address of buffer likely changed
p2 = strstr(p1, lpszLineBreak);
*p2 = 0;
p2 += strlen(lpszLineBreak);
p3 = strchr(p1, ';');
if (p3)
*p3 = 0;
if (sscanf(p1, "%X", &nChunkLen) != 1)
pData->SetSize(0); Close(); ASSERTRETURN(false);
if (nChunkLen < 0)
pData->SetSize(0); Close(); ASSERTRETURN(false);
if (nChunkLen == 0)
break; // last chunk received
// copy the following chunk data to caller's buffer
nCopyLen = min(nChunkLen, buf.GetPtr() + nDataLen - p2);
n = pData->GetSize();
pData->SetSize(n + nCopyLen);
memcpy(pData->GetPtr() + n, p2, nCopyLen);
nChunkLen -= nCopyLen;
ASSERT(nChunkLen >= 0);
nOffset = (p2 - buf.GetPtr()) + nCopyLen;
if (nChunkLen == 0)
nOffset += strlen(lpszLineBreak);
if (nChunkLen == 0 && nOffset < nDataLen)
continue; // a new chunk starts in this buffer at nOffset, no need to receive more data
// receive more data
buf.SetSize(RECEIVE_BUFFER_SIZE + 1);
nDataLen = m_socket.Receive((BYTE*)buf.GetPtr(), RECEIVE_BUFFER_SIZE);
if (nDataLen == -1)
pData->SetSize(0); Close(); ASSERTRETURN(false);
if (nDataLen == 0)
pData->SetSize(0); Close(); ASSERTRETURN(false);
buf[nDataLen] = 0;
nOffset = 0;
// TODO: receive optional footers and add them to m_headers
【讨论】:
以上是关于使用 Winsock 接收分块的 HTTP 数据的主要内容,如果未能解决你的问题,请参考以下文章
#WEB安全基础 : HTTP协议 | 0x11 HTTP的分块传输模块