ThinkPHP Http工具类(用于远程采集 远程下载) phpSimpleHtmlDom采集类库_Jquery筛选方式 使用phpQuery轻松采集网页内容

Posted caigan的技术博客

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ThinkPHP Http工具类(用于远程采集 远程下载) phpSimpleHtmlDom采集类库_Jquery筛选方式 使用phpQuery轻松采集网页内容相关的知识,希望对你有一定的参考价值。

[php]代码库

<?php
// +----------------------------------------------------------------------
// | ThinkPHP [ WE CAN DO IT JUST THINK IT ]
// +----------------------------------------------------------------------
// | Copyright (c) 2009 http://thinkphp.cn All rights reserved.
// +----------------------------------------------------------------------
// +----------------------------------------------------------------------
// | Author: liu21st <[email protected]>
// +----------------------------------------------------------------------
 
/**
 * Http 工具类
 * 提供一系列的Http方法
 * @category   ORG
 * @package  ORG
 * @subpackage  Net
 * @author    liu21st <[email protected]>
 */
class Http {
 
    /**
     * 采集远程文件
     * @access public
     * @param string $remote 远程文件名
     * @param string $local 本地保存文件名
     * @return mixed
     */
    static public function curlDownload($remote,$local) {
        $cp = curl_init($remote);
        $fp = fopen($local,"w");
        curl_setopt($cp, CURLOPT_FILE, $fp);
        curl_setopt($cp, CURLOPT_HEADER, 0);
        curl_exec($cp);
        curl_close($cp);
        fclose($fp);
    }
 
   /**
    * 使用 fsockopen 通过 HTTP 协议直接访问(采集)远程文件
    * 如果主机或服务器没有开启 CURL 扩展可考虑使用
    * fsockopen 比 CURL 稍慢,但性能稳定
    * @static
    * @access public
    * @param string $url 远程URL
    * @param array $conf 其他配置信息
    *        int   limit 分段读取字符个数
    *        string post  post的内容,字符串或数组,key=value&形式
    *        string cookie 携带cookie访问,该参数是cookie内容
    *        string ip    如果该参数传入,$url将不被使用,ip访问优先
    *        int    timeout 采集超时时间
    *        bool   block 是否阻塞访问,默认为true
    * @return mixed
    */
    static public function fsockopenDownload($url, $conf = array()) {
        $return = ‘‘;
        if(!is_array($conf)) return $return;
 
        $matches = parse_url($url);
        !isset($matches[‘host‘])    && $matches[‘host‘]     = ‘‘;
        !isset($matches[‘path‘])    && $matches[‘path‘]     = ‘‘;
        !isset($matches[‘query‘])   && $matches[‘query‘]    = ‘‘;
        !isset($matches[‘port‘])    && $matches[‘port‘]     = ‘‘;
        $host = $matches[‘host‘];
        $path = $matches[‘path‘] ? $matches[‘path‘].($matches[‘query‘] ? ‘?‘.$matches[‘query‘] : ‘‘) : ‘/‘;
        $port = !empty($matches[‘port‘]) ? $matches[‘port‘] : 80;
 
        $conf_arr = array(
            ‘limit‘     =>   0,
            ‘post‘      =>   ‘‘,
            ‘cookie‘    =>   ‘‘,
            ‘ip‘        =>   ‘‘,
            ‘timeout‘   =>   15,
            ‘block‘     =>   TRUE,
            );
 
        foreach (array_merge($conf_arr, $conf) as $k=>$v) ${$k} = $v;
 
        if($post) {
            if(is_array($post))
            {
                $post = http_build_query($post);
            }
            $out  = "POST $path HTTP/1.0\r\n";
            $out .= "Accept: */*\r\n";
            //$out .= "Referer: $boardurl\r\n";
            $out .= "Accept-Language: zh-cn\r\n";
            $out .= "Content-Type: application/x-www-form-urlencoded\r\n";
            $out .= "User-Agent: $_SERVER[HTTP_USER_AGENT]\r\n";
            $out .= "Host: $host\r\n";
            $out .= ‘Content-Length: ‘.strlen($post)."\r\n";
            $out .= "Connection: Close\r\n";
            $out .= "Cache-Control: no-cache\r\n";
            $out .= "Cookie: $cookie\r\n\r\n";
            $out .= $post;
        } else {
            $out  = "GET $path HTTP/1.0\r\n";
            $out .= "Accept: */*\r\n";
            //$out .= "Referer: $boardurl\r\n";
            $out .= "Accept-Language: zh-cn\r\n";
            $out .= "User-Agent: $_SERVER[HTTP_USER_AGENT]\r\n";
            $out .= "Host: $host\r\n";
            $out .= "Connection: Close\r\n";
            $out .= "Cookie: $cookie\r\n\r\n";
        }
        $fp = @fsockopen(($ip ? $ip : $host), $port, $errno, $errstr, $timeout);
        if(!$fp) {
            return ‘‘;
        } else {
            stream_set_blocking($fp, $block);
            stream_set_timeout($fp, $timeout);
            @fwrite($fp, $out);
            $status = stream_get_meta_data($fp);
            if(!$status[‘timed_out‘]) {
                while (!feof($fp)) {
                    if(($header = @fgets($fp)) && ($header == "\r\n" ||  $header == "\n")) {
                        break;
                    }
                }
 
                $stop = false;
                while(!feof($fp) && !$stop) {
                    $data = fread($fp, ($limit == 0 || $limit > 8192 ? 8192 : $limit));
                    $return .= $data;
                    if($limit) {
                        $limit -= strlen($data);
                        $stop = $limit <= 0;
                    }
                }
            }
            @fclose($fp);
            return $return;
        }
    }
 
    /**
     * 下载文件
     * 可以指定下载显示的文件名,并自动发送相应的Header信息
     * 如果指定了content参数,则下载该参数的内容
     * @static
     * @access public
     * @param string $filename 下载文件名
     * @param string $showname 下载显示的文件名
     * @param string $content  下载的内容
     * @param integer $expire  下载内容浏览器缓存时间
     * @return void
     */
    static public function download ($filename, $showname=‘‘,$content=‘‘,$expire=180) {
        if(is_file($filename)) {
            $length = filesize($filename);
        }elseif(is_file(UPLOAD_PATH.$filename)) {
            $filename = UPLOAD_PATH.$filename;
            $length = filesize($filename);
        }elseif($content != ‘‘) {
            $length = strlen($content);
        }else {
            throw_exception($filename.L(‘下载文件不存在!‘));
        }
        if(empty($showname)) {
            $showname = $filename;
        }
        $showname = basename($showname);
        if(!empty($filename)) {
            $type = mime_content_type($filename);
        }else{
            $type    =   "application/octet-stream";
        }
        //发送Http Header信息 开始下载
        header("Pragma: public");
        header("Cache-control: max-age=".$expire);
        //header(‘Cache-Control: no-store, no-cache, must-revalidate‘);
        header("Expires: " . gmdate("D, d M Y H:i:s",time()+$expire) . "GMT");
        header("Last-Modified: " . gmdate("D, d M Y H:i:s",time()) . "GMT");
        header("Content-Disposition: attachment; filename=".$showname);
        header("Content-Length: ".$length);
        header("Content-type: ".$type);
        header(‘Content-Encoding: none‘);
        header("Content-Transfer-Encoding: binary" );
        if($content == ‘‘ ) {
            readfile($filename);
        }else {
            echo($content);
        }
        exit();
    }
 
    /**
     * 显示HTTP Header 信息
     * @return string
     */
    static function getHeaderInfo($header=‘‘,$echo=true) {
        ob_start();
        $headers    = getallheaders();
        if(!empty($header)) {
            $info   = $headers[$header];
            echo($header.‘:‘.$info."\n"); ;
        }else {
            foreach($headers as $key=>$val) {
                echo("$key:$val\n");
            }
        }
        $output     = ob_get_clean();
        if ($echo) {
            echo (nl2br($output));
        }else {
            return $output;
        }
 
    }
 
    /**
     * HTTP Protocol defined status codes

以上是关于ThinkPHP Http工具类(用于远程采集 远程下载) phpSimpleHtmlDom采集类库_Jquery筛选方式 使用phpQuery轻松采集网页内容的主要内容,如果未能解决你的问题,请参考以下文章

使用ThinkPHP自带的Http类下载远程图片到本地的实现代码

完虐ThinkPHP 5.* 远不止这些

Java模拟http请求远程调用接口工具类

ThinkPHP - 验证码

UNIX 通信工具

扩展thinkphp5的redis类方法

(c)2006-2024 SYSTEM All Rights Reserved IT常识