PHP/Curl:在下载正文之前检查响应标头
Posted
技术标签:
【中文标题】PHP/Curl:在下载正文之前检查响应标头【英文标题】:PHP/Curl: inspecting response headers before downloading body 【发布时间】:2013-06-27 17:43:55 【问题描述】:在 php 中使用 Curl,有没有办法在 下载正文之前检查 HTTP 响应标头?
假设我向某个 URI 发出 GET 请求,并且我只想在 Content-type
是 text/html
时获取内容。我知道我可以先发出 HEAD 请求,然后再决定是否使用 GET,但是否可以只使用一个请求?
我基本上是在寻找以下 C# 代码的等效项:
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://some.uri");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if(response.ContentType.Equals("text/html"))
string body = new StreamReader(response.GetResponseStream()).ReadToEnd();
else
// do nothing
有可能吗?
【问题讨论】:
你试过CURLOPT_HEADER
和CURLOPT_NOBODY
选项吗?
@bystwn22 CURLOPT_NOBODY
使 curl 发送 HEAD 请求。我知道可以使用 HEAD + 条件 GET 来完成;我正在寻找的是一种在一个请求中执行此操作的方法。
毫米。是的,明白了,看看下面我的回答,我希望它会起作用:) 如果没有,请原谅我,我是菜鸟 :)
虽然默认情况下未启用,但http extension 更像 java/c#,如果您愿意的话。
【参考方案1】:
是的,我知道了,在这种情况下,您可以使用 CURLOPT_WRITEFUNCTION
。
为了让事情变得更快,我在所有项目中都使用了我自己的 curl
类。
让我也发布一下。
请参阅下面的代码。
<?php
require_once( "curl.class.php" );
/** variable to store response body **/
$response_body = null;
/** total size of header in response **/
$header_size = null;
/** initialize the curl object **/
$curl = new ipCurl( "http://example.com/images/someimage.jpg" );
/** set a read callback **/
$curl->setReadCallback( "my_read_callback" );
/** Include header in response **/
$curl->includeHeader();
/** start curl **/
$curl->createCurl();
if ( $curl->getError() !== 0 )
/** something went wrong, print the error message, and error code **/
echo $curl->getError().":".$curl->getErrorMessage();
if ( $curl->getError() === 23 )
// its not an image file (Failed writing body....)
else
/** Everything fine, lets parse the body from response **/
$response_body = substr( $response_body, $header_size );
file_put_contents( "image.jpg", $response_body );
/** The function for our curl write callback **/
function my_read_callback( &$ch, $data )
global $response_body, $header_size;
/** Get the content type **/
$content_type = trim( curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ) );
/** Give it time to get the content-type string from header **/
if ( !empty( $content_type ) )
/** stop executing curl if its not an image **/
if ( $content_type !== "image/jpeg" )
return false;
/** size of the header **/
$header_size = curl_getinfo( $ch, CURLINFO_HEADER_SIZE );
/** append resposne body to $response_body **/
$response_body .= $data;
/** return current response length **/
return strlen( $data );
?>
curl.class.php
文件内容
<?php
class ipCurl
private $ch = null;
private $url = null;
private $_error = 0;
private $_errmsg = null;
private $_header = null;
private $_webpage = null;
private $_status = 0;
public function __construct( $url = null )
$this->url = $url;
if ( !function_exists( "curl_init" ) )
throw new Exception( "Fatal Error: Module 'Curl' is not installed properly" );
$this->ch = curl_init();
curl_setopt( $this->ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $this->ch, CURLOPT_FRESH_CONNECT, false );
curl_setopt( $this->ch, CURLOPT_FORBID_REUSE, false );
$this->setTimout( 40 );
$this->setConnTimout( 30 );
$this->followLocation();
$this->setMaxRedirects( 4 );
$this->excludeHeader();
$this->includeBody();
$this->verifySSL();
$this->setBinaryTransfer();
$this->setReferer( $_SERVER["SERVER_NAME"].$_SERVER["REQUEST_URI"] );
$this->setUserAgent();
return $this;
public function __destruct()
curl_close( $this->ch );
$this->ch = null;
public function setReadCallback( $callback = null )
curl_setopt( $this->ch, CURLOPT_WRITEFUNCTION, $callback );
return $this;
public function setProgressCallback( $callback = null, $buffer = 128 )
curl_setopt( $this->ch, CURLOPT_NOPROGRESS, false );
curl_setopt( $this->ch, CURLOPT_PROGRESSFUNCTION, $callback );
curl_setopt( $this->ch, CURLOPT_BUFFERSIZE, $buffer );
return $this;
public function includeHeader()
curl_setopt( $this->ch, CURLOPT_HEADER, true );
return $this;
public function excludeHeader()
curl_setopt( $this->ch, CURLOPT_HEADER, false );
return $this;
public function includeBody()
curl_setopt( $this->ch, CURLOPT_NOBODY, false );
return $this;
public function excludeBody()
curl_setopt( $this->ch, CURLOPT_NOBODY, true );
return $this;
public function setMaxRedirects( $redirects = 4 )
if ( $this->is_safe_mode() )
return $this;
curl_setopt( $this->ch, CURLOPT_MAXREDIRS, $redirects );
return $this;
public function followLocation()
if ( $this->is_safe_mode() )
return $this->unfollowLocation();
curl_setopt( $this->ch, CURLOPT_FOLLOWLOCATION, true );
return $this;
public function unfollowLocation()
curl_setopt( $this->ch, CURLOPT_FOLLOWLOCATION, false );
return $this;
public function setReferer( $referer = null )
curl_setopt( $this->ch, CURLOPT_REFERER, $referer );
return $this;
public function setBinaryTransfer( $binary = false )
curl_setopt( $this->ch, CURLOPT_BINARYTRANSFER, $binary );
return $this;
public function setTimout( $timeout )
curl_setopt( $this->ch, CURLOPT_TIMEOUT, $timeout );
return $this;
public function setConnTimout( $timeout )
curl_setopt( $this->ch, CURLOPT_CONNECTTIMEOUT, $timeout );
return $this;
public function setUserAgent( $userAgent = null )
$userAgent = ( !$userAgent ) ? "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31" : $userAgent;
curl_setopt( $this->ch, CURLOPT_USERAGENT, $userAgent );
return $this;
public function setProxy( $url = null, $port = 0, $username = null, $password = null )
curl_setopt( $this->ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC );
curl_setopt( $this->ch, CURLOPT_PROXY, $url.( ( $port ) > 0 ? ":".$port : null ) );
if ( $port > 0 )
curl_setopt( $this->ch, CURLOPT_PROXYPORT, $port );
if ( $username )
curl_setopt( $this->ch, CURLOPT_PROXYUSERPWD, $username.":".$password );
return $this;
public function setAuth( $username = null, $password = null )
curl_setopt( $this->ch, CURLOPT_USERPWD, $username.':'.$password );
return $this;
public function setCookiFile( $file = "cookie.txt" )
if ( !$file )
return $this;
curl_setopt( $this->ch, CURLOPT_COOKIEJAR, $file );
curl_setopt( $this->ch, CURLOPT_COOKIEFILE, $file );
return $this;
public function verifySSL( $ssl = false )
if ( !$ssl )
curl_setopt( $this->ch, CURLOPT_SSL_VERIFYPEER, false );
curl_setopt( $this->ch, CURLOPT_SSL_VERIFYHOST, 2 );
else
curl_setopt( $this->ch, CURLOPT_SSL_VERIFYPEER, true );
return $this;
public function setPost( $postFields = null, $keep_array = false )
if ( is_array( $postFields ) && !$keep_array )
$postFields = http_build_query( $postFields );
curl_setopt( $this->ch, CURLOPT_POST, true );
curl_setopt( $this->ch, CURLOPT_POSTFIELDS, $postFields );
return $this;
public function setFile( $file = null )
if ( $file !== null )
$file = realpath( $file );
if ( $file && is_readable( $file ) )
$fp = fopen( $file, "w" );
curl_setopt( $this->ch, CURLOPT_FILE, $fp );
return $this;
public function setHeader( $header = array( "Expect:" ) )
curl_setopt( $this->ch, CURLOPT_HTTPHEADER, $header );
return $this;
public function createCurl( $url = null )
$url = ( $url ) ? trim( $url ) : trim( $this->url );
if ( !$url )
throw new Exception( "Fatal Error: you must provide a valid url before calling 'createCurl'" );
curl_setopt( $this->ch, CURLOPT_URL, $url );
$this->_webpage = curl_exec( $this->ch );
$this->_status = (int)curl_getinfo( $this->ch, CURLINFO_HTTP_CODE );
$this->_error = (int)curl_errno( $this->ch );
$this->_errmsg = curl_error( $this->ch );
$this->_header = curl_getinfo( $this->ch );
if ( !$this->_errmsg )
$this->_errmsg = $this->parse_http_code( $this->_status );
return $this;
private function parse_http_code( $code = 404 )
$code = (int)$code;
if ( !class_exists( "ipStatusCodes" ) )
return null;
return ipStatusCodes::info( $code );
private function is_safe_mode()
return ( @ini_get( 'open_basedir' ) != '' && @ini_get( 'safe_mode' ) != 'Off' );
public function getStatus()
return $this->_status;
public function getResponse()
return $this->_webpage;
public function getHeader()
return $this->_header;
public function getError()
return $this->_error;
public function getErrorMessage()
return $this->_errmsg;
?>
我不是强迫你使用我的课程,你也可以像这样简单地使用它......
<?php
curl_setopt( $ch, CURLOPT_WRITEFUNCTION, "my_callback" );
function my_callback()
// same like the "my_read_callback" function in my above code
?>
【讨论】:
缺少的关键信息是让回调返回 -1,这会触发错误并中止传输。 @chris 如果CURLINFO_CONTENT_LENGTH_DOWNLOAD
返回-1
作为长度,它工作正常,现在刚刚检查:) 没有错误,成功获得响应正文:)
回调选项(CURLOPT_HEADERFUNCTION
、CURLOPT_READFUNCTION
、CURLOPT_WRITEFUNCTION
和 CURLOPT_PROGRESSFUNCTION
)正是我所需要的!我不知道他们所以+1并接受。谢谢!
在你的函数中 my_read_callback( &$ch, $data ) var $ch 之前的 & 是什么/意味着什么【参考方案2】:
使用 curl 可以在执行请求后单独读取标头。通过执行以下代码
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
// Then, after your curl_exec call:
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
$body = substr($response, $header_size);
【讨论】:
这不是我要问的。 这是唯一的方法 下载一个 2GB 的 avi 文件是唯一知道它是Content-type
是 video/avi
的方法吗?不是真的。
使用 curl 现在的工作方式,这是您的选择,使用其他方法,例如打开原始端口并读取标题以上是关于PHP/Curl:在下载正文之前检查响应标头的主要内容,如果未能解决你的问题,请参考以下文章
我可以使用 tcpdump 获取 HTTP 请求、响应标头和响应正文吗?
JSON Web 令牌 (JWT):我应该使用响应标头还是正文进行令牌传输?
如何记录 spring-webflux WebClient 请求 + 响应详细信息(正文、标头、elasped_time)?