ThinkPHP Http工具类(用于远程采集 远程下载) phpSimpleHtmlDom采集类库_Jquery筛选方式 使用phpQuery轻松采集网页内容
Posted caigan的技术博客
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ThinkPHP Http工具类(用于远程采集 远程下载) phpSimpleHtmlDom采集类库_Jquery筛选方式 使用phpQuery轻松采集网页内容相关的知识,希望对你有一定的参考价值。
[php]代码库
<?php |
// +---------------------------------------------------------------------- |
// | ThinkPHP [ WE CAN DO IT JUST THINK IT ] |
// +---------------------------------------------------------------------- |
// | Copyright (c) 2009 http://thinkphp.cn All rights reserved. |
// +---------------------------------------------------------------------- |
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 ) |
// +---------------------------------------------------------------------- |
// | Author: liu21st <[email protected]> |
// +---------------------------------------------------------------------- |
/** |
* Http 工具类 |
* 提供一系列的Http方法 |
* @category ORG |
* @package ORG |
* @subpackage Net |
* @author liu21st <[email protected]> |
*/ |
class Http { |
/** |
* 采集远程文件 |
* @access public |
* @param string $remote 远程文件名 |
* @param string $local 本地保存文件名 |
* @return mixed |
*/ |
static public function curlDownload( $remote , $local ) { |
$cp = curl_init( $remote ); |
$fp = fopen ( $local , "w" ); |
curl_setopt( $cp , CURLOPT_FILE, $fp ); |
curl_setopt( $cp , CURLOPT_HEADER, 0); |
curl_exec( $cp ); |
curl_close( $cp ); |
fclose( $fp ); |
} |
/** |
* 使用 fsockopen 通过 HTTP 协议直接访问(采集)远程文件 |
* 如果主机或服务器没有开启 CURL 扩展可考虑使用 |
* fsockopen 比 CURL 稍慢,但性能稳定 |
* @static |
* @access public |
* @param string $url 远程URL |
* @param array $conf 其他配置信息 |
* int limit 分段读取字符个数 |
* string post post的内容,字符串或数组,key=value&形式 |
* string cookie 携带cookie访问,该参数是cookie内容 |
* string ip 如果该参数传入,$url将不被使用,ip访问优先 |
* int timeout 采集超时时间 |
* bool block 是否阻塞访问,默认为true |
* @return mixed |
*/ |
static public function fsockopenDownload( $url , $conf = array ()) { |
$return = ‘‘ ; |
if (! is_array ( $conf )) return $return ; |
$matches = parse_url ( $url ); |
!isset( $matches [ ‘host‘ ]) && $matches [ ‘host‘ ] = ‘‘ ; |
!isset( $matches [ ‘path‘ ]) && $matches [ ‘path‘ ] = ‘‘ ; |
!isset( $matches [ ‘query‘ ]) && $matches [ ‘query‘ ] = ‘‘ ; |
!isset( $matches [ ‘port‘ ]) && $matches [ ‘port‘ ] = ‘‘ ; |
$host = $matches [ ‘host‘ ]; |
$path = $matches [ ‘path‘ ] ? $matches [ ‘path‘ ].( $matches [ ‘query‘ ] ? ‘?‘ . $matches [ ‘query‘ ] : ‘‘ ) : ‘/‘ ; |
$port = ! empty ( $matches [ ‘port‘ ]) ? $matches [ ‘port‘ ] : 80; |
$conf_arr = array ( |
‘limit‘ => 0, |
‘post‘ => ‘‘ , |
‘cookie‘ => ‘‘ , |
‘ip‘ => ‘‘ , |
‘timeout‘ => 15, |
‘block‘ => TRUE, |
); |
foreach ( array_merge ( $conf_arr , $conf ) as $k => $v ) ${ $k } = $v ; |
if ( $post ) { |
if ( is_array ( $post )) |
{ |
$post = http_build_query( $post ); |
} |
$out = "POST $path HTTP/1.0\r\n" ; |
$out .= "Accept: */*\r\n" ; |
//$out .= "Referer: $boardurl\r\n"; |
$out .= "Accept-Language: zh-cn\r\n" ; |
$out .= "Content-Type: application/x-www-form-urlencoded\r\n" ; |
$out .= "User-Agent: $_SERVER[HTTP_USER_AGENT]\r\n" ; |
$out .= "Host: $host\r\n" ; |
$out .= ‘Content-Length: ‘ . strlen ( $post ). "\r\n" ; |
$out .= "Connection: Close\r\n" ; |
$out .= "Cache-Control: no-cache\r\n" ; |
$out .= "Cookie: $cookie\r\n\r\n" ; |
$out .= $post ; |
} else { |
$out = "GET $path HTTP/1.0\r\n" ; |
$out .= "Accept: */*\r\n" ; |
//$out .= "Referer: $boardurl\r\n"; |
$out .= "Accept-Language: zh-cn\r\n" ; |
$out .= "User-Agent: $_SERVER[HTTP_USER_AGENT]\r\n" ; |
$out .= "Host: $host\r\n" ; |
$out .= "Connection: Close\r\n" ; |
$out .= "Cookie: $cookie\r\n\r\n" ; |
} |
$fp = @ fsockopen (( $ip ? $ip : $host ), $port , $errno , $errstr , $timeout ); |
if (! $fp ) { |
return ‘‘ ; |
} else { |
stream_set_blocking( $fp , $block ); |
stream_set_timeout( $fp , $timeout ); |
@fwrite( $fp , $out ); |
$status = stream_get_meta_data( $fp ); |
if (! $status [ ‘timed_out‘ ]) { |
while (! feof ( $fp )) { |
if (( $header = @ fgets ( $fp )) && ( $header == "\r\n" || $header == "\n" )) { |
break ; |
} |
} |
$stop = false; |
while (! feof ( $fp ) && ! $stop ) { |
$data = fread ( $fp , ( $limit == 0 || $limit > 8192 ? 8192 : $limit )); |
$return .= $data ; |
if ( $limit ) { |
$limit -= strlen ( $data ); |
$stop = $limit <= 0; |
} |
} |
} |
@fclose( $fp ); |
return $return ; |
} |
} |
/** |
* 下载文件 |
* 可以指定下载显示的文件名,并自动发送相应的Header信息 |
* 如果指定了content参数,则下载该参数的内容 |
* @static |
* @access public |
* @param string $filename 下载文件名 |
* @param string $showname 下载显示的文件名 |
* @param string $content 下载的内容 |
* @param integer $expire 下载内容浏览器缓存时间 |
* @return void |
*/ |
static public function download ( $filename , $showname = ‘‘ , $content = ‘‘ , $expire =180) { |
if ( is_file ( $filename )) { |
$length = filesize ( $filename ); |
} elseif ( is_file (UPLOAD_PATH. $filename )) { |
$filename = UPLOAD_PATH. $filename ; |
$length = filesize ( $filename ); |
} elseif ( $content != ‘‘ ) { |
$length = strlen ( $content ); |
} else { |
throw_exception( $filename .L( ‘下载文件不存在!‘ )); |
} |
if ( empty ( $showname )) { |
$showname = $filename ; |
} |
$showname = basename ( $showname ); |
if (! empty ( $filename )) { |
$type = mime_content_type( $filename ); |
} else { |
$type = "application/octet-stream" ; |
} |
//发送Http Header信息 开始下载 |
header( "Pragma: public" ); |
header( "Cache-control: max-age=" . $expire ); |
//header(‘Cache-Control: no-store, no-cache, must-revalidate‘); |
header( "Expires: " . gmdate ( "D, d M Y H:i:s" ,time()+ $expire ) . "GMT" ); |
header( "Last-Modified: " . gmdate ( "D, d M Y H:i:s" ,time()) . "GMT" ); |
header( "Content-Disposition: attachment; filename=" . $showname ); |
header( "Content-Length: " . $length ); |
header( "Content-type: " . $type ); |
header( ‘Content-Encoding: none‘ ); |
header( "Content-Transfer-Encoding: binary" ); |
if ( $content == ‘‘ ) { |
readfile( $filename ); |
} else { |
echo ( $content ); |
} |
exit (); |
} |
/** |
* 显示HTTP Header 信息 |
* @return string |
*/ |
static function getHeaderInfo( $header = ‘‘ , $echo =true) { |
ob_start(); |
$headers = getallheaders (); |
if (! empty ( $header )) { |
$info = $headers [ $header ]; |
echo ( $header . ‘:‘ . $info . "\n" ); ; |
} else { |
foreach ( $headers as $key => $val ) { |
echo ( "$key:$val\n" ); |
} |
} |
$output = ob_get_clean(); |
if ( $echo ) { |
echo ( nl2br ( $output )); |
} else { |
return $output ; |
} |
} |
/** |
* HTTP Protocol defined status codes |