CURL多线程采集
Posted zhangfu
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了CURL多线程采集相关的知识,希望对你有一定的参考价值。
set_time_limit(0);
$dbConfig =array(
‘dbHost‘ => ‘localhost‘,
‘dbName‘ => ‘ymall‘,
‘dbUser‘ => ‘root‘,
‘dbPass‘ => ‘cinternet_245.‘,
);
$store_id=327;
$ducpath=$_SERVER[‘DOCUMENT_ROOT‘].‘/‘;
$caij="http://www.yimengbuy.com/";
$conn=mysql_connect($dbConfig[‘dbHost‘],$dbConfig[‘dbUser‘],$dbConfig[‘dbPass‘],$dbConfig[‘dbName‘])or die(‘数据库连接失败‘);
mysql_select_db($dbConfig[‘dbName‘],$conn) or die(‘数据库选择失败‘);
mysql_query(‘set names ‘utf8‘‘);
$sql = "SELECT description FROM ecm_goods where store_id=$store_id and description like ‘%<img%‘";
$query = mysql_query($sql);
while($description= mysql_fetch_array($query))
{
preg_match_all(‘/<img(.*?)src="(.*?)"(.*?)>/i‘,$description[‘description‘],$conimg);
foreach($conimg[2] as $keyimg=>$valimg)
{
if (!stristr($valimg,‘http://‘))
{
if (!is_file($ducpath.$valimg))
{
$valimg=str_replace(‘\‘,"/",$valimg);
if ($valimg[‘0‘]==‘/‘)
{
$valimg=substr($valimg,1);
}
if ($valimg[‘0‘]==‘d‘)
{
$imgarray[$valimg]=$caij.$valimg;
}
}
}
}
}
$dataimg =Curl_http($imgarray,‘20‘);
ob_clean();
foreach ((array)$dataimg as $kk=>$vv)
{
if($vv !=‘‘){
file_put_contents($kk, $vv);
}
else
{
unset($kk,$vv);
}
}
function Curl_http($array,$timeout=‘15‘)
{
$res = array();
$mh = curl_multi_init();//创建多个curl语柄
foreach($array as $k=>$url)
{
$conn[$k]=curl_init($url);//初始化
curl_setopt($conn[$k], CURLOPT_TIMEOUT, $timeout);//设置超时时间
curl_setopt($conn[$k], CURLOPT_USERAGENT, ‘Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)‘);
curl_setopt($conn[$k], CURLOPT_MAXREDIRS, 7);//HTTp定向级别 ,7最高
curl_setopt($conn[$k], CURLOPT_HEADER, false);//这里不要header,加块效率
curl_setopt($conn[$k], CURLOPT_FOLLOWLOCATION, 1); // 302 redirect
curl_setopt($conn[$k], CURLOPT_RETURNTRANSFER,1);//要求结果为字符串且输出到屏幕上
curl_setopt($conn[$k], CURLOPT_HTTPGET, true);
curl_multi_add_handle ($mh,$conn[$k]);
}
do
{
$mrc = curl_multi_exec($mh,$active);//当无数据,active=true
}
while ($mrc == CURLM_CALL_MULTI_PERFORM);//当正在接受数据时
while ($active and $mrc == CURLM_OK)
{//当无数据时或请求暂停时,active=true
if (curl_multi_select($mh) != -1)
{
do {
$mrc = curl_multi_exec($mh, $active);
}
while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
foreach ($array as $k => $url)
{
if(!curl_errno($conn[$k]))
{
$data[$k]=curl_multi_getcontent($conn[$k]);//数据转换为array
$header[$k]=curl_getinfo($conn[$k]);//返回http头信息
curl_close($conn[$k]);//关闭语柄
curl_multi_remove_handle($mh , $conn[$k]); //释放资源
}
else
{
unset($k,$url);
}
}
curl_multi_close($mh);
return $data;
}
以上是关于CURL多线程采集的主要内容,如果未能解决你的问题,请参考以下文章