|
本帖最后由 syzdh 于 2012-4-2 17:59 编辑
方维团购导航或方维分享,想用火车头采集可以吗?现有的采集无法采集商区数据!哪位高手可以帮忙?请加我Q:307041
<?php
/*
* api_spider.php
* 表前缀 fanwe_
* by 39473700
*/
ignore_user_abort();
set_time_limit(0);
date_default_timezone_set("PRC");
$timefile='api_spider.time';
$logfile='api_spider.log'; //日志文件
//判断间隔时间
$now = time();
$lasttime=@file_get_contents($timefile);
($now-$lasttime)<(3*1) && exit; //6小时间隔
file_put_contents($timefile,$now);
$lasttime= date('m-d H:i',@filectime($logfile));
@unlink($logfile);
mklog($logfile,'上次采集时间: '.$lasttime);
$db = include "public/db_config.php";
$conn= mysql_connect($db['DB_HOST'],$db['DB_USER'],$db['DB_PWD']) or die(mysql_error());
mysql_query("set names 'utf8'");
mysql_select_db($db['DB_NAME']);
//读出类别关键词
$cats = array();
$sql = mysql_query("select id,keywords from `fanwe_goods_cate` order by sort asc");
while($rs = mysql_fetch_assoc($sql)){
$id = $rs['id'];
$cats[$id] = $rs['keywords'];
}
$sql=mysql_query("select id,sort,name from `fanwe_site` where close_collect=0 order by sort asc");
while($rs = mysql_fetch_assoc($sql)){
$site_id = $rs['id'];
$sort = $rs['sort'];
$site = $rs['name'];
//读取采集规则
$rsql = mysql_query("select name,rule from `fanwe_site_rules` where id='$site_id'");
$rules = array();
while($rs2 = mysql_fetch_assoc($rsql)){
$k = $rs2['name'];
$v = $rs2['rule'];
$rules[$k] = $v;
}
$url = $rules['collect_url'];
if(!$url){
mklog($logfile,$site.' api格式错误: 采集地址为空');
continue;
}
$strXml = @file_get_contents($url);
if(!$strXml){
mklog($logfile,$site.' 采集地址无法打开: '.$url);
continue;
}
$Xmlarray= @simplexml_load_string($strXml,'SimpleXMLElement', LIBXML_NOCDATA);
if($Xmlarray){
$data0 = get_object_vars_final($Xmlarray);
$dpost = explode('.',$rules['group_object_api']);
unset($dpost[0]);
$sign = implode('.',$dpost);
$data = getvalue($data0,$sign);
if(!$data && $rules['city_array']){
$dpost = explode('.',$rules['city_array']);
unset($dpost[0]);
$citys = getvalue($data0,implode('.',$dpost));
if(!$citys){
mklog($logfile,$site.' 城市列表解析错误: '.$url);
continue;
}else{
mklog($logfile,$site.' 城市列表解析成功');
}
$data = array();
$apiurl = str_replace('"','',stripslashes($rules['city_url_api']));
foreach($citys as $c){
$sis = explode('+',$apiurl);
$si = $sis[1];
$cityid = getvalue($c,$si);
$url = str_replace('+'.$si,$cityid,$apiurl);
$strXml = @file_get_contents($url);
$Xmlarray= @simplexml_load_string($strXml,'SimpleXMLElement', LIBXML_NOCDATA);
$data2 = get_object_vars_final($Xmlarray);
if(!$data2){
break;
}
$data2 = getvalue($data2,$sign);
if($data2){
foreach($data2 as $d)
$data[]=$d;
}
}
}
if($data){
foreach($data as $v){
$name = getvalue($v,$rules['group_name_api']); //商品名称
if(!$name || strlen($name)<16)
continue;
$url = getvalue($v,$rules['group_url_api']); //商品地址
if(!$url){
mklog($logfile,$site.' 商品地址错误');
continue;
}
$beginTime = getvalue($v,$rules['group_begin_time_api']); //开始时间
$endTime = getvalue($v,$rules['group_end_time_api']); //结束时间
if(@strpos($endTime,'-'))
$endTime = strtotime($endTime);
$endTime = $endTime-8*3600; //时间校正
$cate_id = getcate($cats, $name); //归类
$city = getvalue($v,$rules['group_city_api']); //城市
$bought = getvalue($v,$rules['group_bought_api']); //已卖
$value = getvalue($v,$rules['group_market_price_api']); //原价
$price = getvalue($v,$rules['group_shop_price_api']); //现价
if($price>$value){
$price = $value;
$value = getvalue($v,$rules['group_shop_price_api']);
}
$image = getvalue($v,$rules['group_image_api']); //图片地址
$collect_key = getvalue($v,$rules['group_key_api']); //?
$brief = getvalue($v,$rules['group_brief_api']); //团购提示
$content = getvalue($v,$rules['group_content_api']); //描述内容
$now = time();
if(!$content)
$content='';
//判断重复,先判断地址
$check=mysql_query("select id,end_time from `fanwe_goods` where url='$url'");
$n= @mysql_num_rows($check);
if($n>0){
$rs3 = mysql_fetch_assoc($check);
//再判断结束时间,更新商品信息
if($endTime != $rs3['end_time']){
$id = $rs3['id'];
//这里只更新部分字段
@mysql_query("update `fanwe_goods` set name='$name',cate_id='$cate_id',end_time='$endTime',bought='$bought' where id='$id'");
@mysql_query("update `fanwe_goods_key` set cate_id='$cate_id',end_time='$endTime' where id='$id'");
@mysql_query("update `fanwe_goods_now` set name='$name',cate_id='$cate_id',end_time='$endTime',bought='$bought' where id='$id'");
@mysql_query("update `fanwe_goods_now_key` set cate_id='$cate_id',end_time='$endTime' where id='$id'");
}
}else{
//添加
$insert="insert into `fanwe_goods`
(site_id,site_name,name,city,cate_id,url,shop_price,market_price,begin_time,end_time,brief,sort,small_img,big_img,
add_time,update_time,bought,collect_key,collect_url,collect_name,content) values
('$site_id','$site','$name','$city','$cate_id','$url','$price','$value','$beginTime','$endTime','$brief','$sort','$image','$image','$now','$now',
'$bought','$collect_key','$url','$name','$content')";
mysql_query($insert);
$id = mysql_insert_id();
//存其他表
if($id){
$insert="insert into `fanwe_goods_key`
(id,site_id,city,cate_id,sort,r_sort,cr_sort,d_sort,dc_sort,e_sort,ec_sort,end_time) values
('$id','$site_id','$city','$cate_id','$sort',0,0,0,0,0,0,'$endTime')";
@mysql_query($insert);
$insert="insert into `fanwe_goods_now`
(id,site_id,site_name,name,city,cate_id,url,shop_price,market_price,begin_time,end_time,brief,sort,small_img,big_img,
add_time,bought) values
('$id','$site_id','$site','$name','$city','$cate_id','$url','$price','$value','$beginTime','$endTime','$brief','$sort','$image','$image','$now',
'$bought')";
@mysql_query($insert);
$insert="insert into `fanwe_goods_now_key`
(id,site_id,city,cate_id,sort,r_sort,cr_sort,d_sort,dc_sort,e_sort,ec_sort,end_time) values
('$id','$site_id','$city','$cate_id','$sort',0,0,0,0,0,0,'$endTime')";
@mysql_query($insert);
}
}
}
mklog($logfile,$site.' 采集成功');
}else{
mklog($logfile,$site.' api解析错误: '.$url);
}
}else{
mklog($logfile,$site.' api读取失败: '.$url);
}
}
mysql_close($conn);
mklog($logfile,'本次采集结束');
function get_object_vars_final($obj){
if(is_object($obj)){
$obj=get_object_vars($obj);
}
if(is_array($obj)){
foreach ($obj as $key=>$value){
$obj[$key]=get_object_vars_final($value);
}
}
return $obj;
}
function getvalue($data=array(),$pos){
$nav = explode('.',$pos);
unset($nav[0]);
$value = $data;
foreach($nav as $v){
$value = $value[$v];
}
return $value;
}
function getcate($cats=array(),$name){
foreach($cats as $k=>$v){
$keys=explode(',',$v);
foreach($keys as $key){
if(@strpos($name, $key))
return $k;
}
}
return $k;
}
function mklog($fname,$str){
$fp = fopen($fname, 'a+');
$time = date('m-d H:i');
fwrite($fp,$time." -> ".$str."\n");
fclose($fp);
}
?> |
|