一个多页采集的棘手问题,打开多页网址就进入默认页了,这样怎么能采集数据呢?
一个P2P平台的页面有一个导航栏上面有这几项选择:借款详情个人资料账户详情还款信用 资料审核 投标记录
默认打开是进入“借款详情”的网址是:http://www.xinlongct.com/invest/detail.html?borrowid=2515#jkxq,我要采集“投标记录”里的数据,问题是只能手动点击打开http://www.xinlongct.com/invest/detail.html?borrowid=2515#tbinfo进入“投标记录”,输入网址后实际是进入“借款详情” 页面的。我在采集器里打开查看源文件也是默认页的内容,即使手动点击“投标记录”,刷新浏览器也会显示的默认页面“借款详情”的内容,在“投标记录”右键选择查看源文件也是看到的默认页面内容。这样采集器应该采集不到“投标记录”里的数据把,能不能模拟点击“投标记录”动作进入?初学数据采集,请高手指点。
用抓包工具抓包分析到真地址再采集 303718 发表于 2014-2-25 16:21 static/image/common/back.gif
用抓包工具抓包分析到真地址再采集
你好,谢谢回复。有地址,问题是在浏览器输入“投标记录”地址http://www.xinlongct.com/invest/detail.html?borrowid=2515#tbinfo后,显示的是“借款详情” 页面http://www.xinlongct.com/invest/detail.html?borrowid=2515#jkxq内容,手动点击“投标记录”可以显示投标记录,但查看源文件,没有那些记录文字。 javascript:;页面显示内容 查看源文件得到的部分源码如下,这个要怎样采集投标记录呢?
<!--投标记录-->
<div class="list-tab-con tab-contentbox second-tabcontent hide" id="tbinfo">
<style>
.list-page{padding-top:25px;padding-left:48px;}
.list-page a{ width:20px; height:18px; line-height:18px; text-align:center; border:2px solid #666666;
margin:3px 5px; display:inline-block;*zoom:1;*display:inline;cursor:pointer;}
.list-page a.prev,.list-page a.next{ width:40px; height:18px; line-height:18px;}
.list-page a.current{ border:2px solid #d23434;}
</style>
<ul class="clearfix" id="tbjlbox">
<li class="clearfix">
<dl>
<dd class="d1"><strong>序号</strong></dd>
<dd class="d3"><strong>投标人</strong></dd>
<dd class="d3"><strong>投标金额</strong>
<dd class="d3"><strong>有效金额</strong></dd>
<dd class="d1"><strong>奖励</strong></dd>
<dd class="d5"><strong>投标时间</strong></dd>
</dl>
</li>
</ul>
<div class="list-page"></div><!--分页-->
<script type="text/javascript">
Date.prototype.format = function(format){
var o = {
"M+" : this.getMonth()+1, //month
"d+" : this.getDate(), //day
"h+" : this.getHours(), //hour
"m+" : this.getMinutes(), //minute
"s+" : this.getSeconds(), //second
"q+" : Math.floor((this.getMonth()+3)/3), //quarter
"S" : this.getMilliseconds() //millisecond
}
if(/(y+)/.test(format))
format=format.replace(RegExp.$1,(this.getFullYear()+"").substr(4 - RegExp.$1.length));
for(var k in o)
if(new RegExp("("+ k +")").test(format))
format = format.replace(RegExp.$1,RegExp.$1.length==1 ? o :("00"+ o).substr((""+ o).length));
return format;
}
function format(pnumber,decimals){
if (isNaN(pnumber)) { return 0};
if (pnumber=='') { return 0};
var snum = new String(pnumber);
var sec = snum.split('.');
var whole = parseFloat(sec);
var result = '';
if(sec.length > 1){
var dec = new String(sec);
dec = String(parseFloat(sec)/Math.pow(10,(dec.length - decimals)));
dec = String(whole + Math.round(parseFloat(dec))/Math.pow(10,decimals));
var dot = dec.indexOf('.');
if(dot == -1){
dec += '.';
dot = dec.indexOf('.');
}
while(dec.length <= dot + decimals) { dec += '0'; }
result = dec;
} else{
var dot;
var dec = new String(whole);
dec += '.';
dot = dec.indexOf('.');
while(dec.length <= dot + decimals) { dec += '0'; }
result = dec;
}
return result;
}
jQuery.extend({
detailTenderPage:function(options) {
var defaults = {
param: {},
currentPage:1,
url:"",
callback:{}
};
var opts = $.extend(defaults, options);
opts.param.page=defaults.currentPage;
$.get(defaults.url,defaults.param,defaults.callback);
}
});
var pageTotal=-1;
var title='<li class="clearfix"><dl><dd class="d1"><strong>序号</strong></dd><dd class="d1"><strong>投标人</strong></dd><dd class="d1"><strong>投标金额</strong><dd class="d1"><strong>有效金额</strong></dd><dd class="d1"><strong>奖励</strong></dd><dd class="d3"><strong>投标类型</strong></dd><dd class="d3"><strong>投标时间</strong></dd></dl></li>';
function showTenderList(pagenum){
$.detailTenderPage({
param:{borrowid:2515},
currentPage:pagenum,
url:'detailTenderForJson.html?randID='+ escape(new Date()),
callback:function(result){
var databox = $("#tbjlbox");
var strs="";
var strlenght = result.data.list.length;
if(pageTotal<0){
pageTotal = result.data.page.pages;//分页数
var nav="";
for(var i=0;i<pageTotal;i++){
nav=nav+"<a onclick=\"showTenderList("+(i+1)+")\">"+(i+1)+"</a>";
}
$(".list-page").html(nav);
var oA = $(".list-page a");
$(oA).addClass("current").siblings().removeClass("current");
oA.click(function(){
var i = oA.index(this);
$(oA).addClass("current").siblings().removeClass("current");
showTenderList(i+1);
})
}
for(var i = 0; i<strlenght; i++){
strs+="<li>"+"<dl>"+
"<dd class='d1'>"+(result.data.page.pernum*(result.data.page.currentPage-1)+i+1)+"</dd>"+
"<dd class='d1'>"+
hideUsername(result.data.list.username)
+"</dd>"+
"<dd class='d1'>"+format(result.data.list.money,2)+"</dd>"+
"<dd class='d1'>"+format(result.data.list.account,2)+"</dd>"+
"<dd class='d1'>"+"0.8%"+"</dd>"+
// v1.6.6.1 RDPROJECT-102 zza 2013-09-25 start
"<dd class='d3'>"+isAuto(result.data.list.is_auto_tender)+"</dd>"+
// v1.6.6.1 RDPROJECT-102 zza 2013-09-25 end
"<dd class='d3'>"+new Date(result.data.list.addtime*1000).format('yyyy-MM-dd hh:mm:ss')+"</dd>"+
"</dl>"+"</li>"
}
databox.html(title+strs);
function isAuto(type){
var val="";
if(type==0)
{
return val="网站投标";
}
if(type==1)
{
return val="自动投标";
}
if(type==2)
{
return val="手机投标";
}
}
function hideUsername(text){
var text = text;
var len = text.length;
if(len<2)
{
text = text.substring(0,0)+"*";
return text;
}
else if(len<3)
{
text = text.substring(0,1)+"**";
return text;
}
else if(len<5)
{
text = text.substring(0,2)+"**";
return text;
}
else
{
text = text.substring(0,3)+"***";
return text;
}
}
}
});
}
showTenderList(1);
http://www.xinlongct.com/invest/detailTenderForJson.html?randID=Thu%20Feb%2027%202014%2010%3A27%3A22%20GMT+0800%20%28%u4E2D%u56FD%u6807%u51C6%u65F6%u95F4%29&borrowid=2515&page=1 你要的内容在这个页面地址里面 {"data":{"list":[{"account":"20000.000000","addtime":"1393300876","auto_repurchase":1,"award_after_push":0,"borrow_account":0,"borrow_id":2515,"id":18525,"interest":"366.666667","is_auto_tender":0,"money":"20000.000000","repay_account":"20366.666667","repay_time":"1395720076","repayment_account":"20366.666667","repayment_yesaccount":"0.000000","site_id":0,"status":1,"user_id":4207,"username":"dbacddbacd","wait_account":"20366.666667","wait_interest":"366.666667"},{"account":"14000.000000","addtime":"1393301765","auto_repurchase":1,"award_after_push":0,"borrow_account":0,"borrow_id":2515,"id":18529,"interest":"256.666667","is_auto_tender":0,"money":"14000.000000","repay_account":"14256.666667","repay_time":"1395720965","repayment_account":"14256.666667","repayment_yesaccount":"0.000000","site_id":0,"status":1,"user_id":4105,"username":"BingoZhang","wait_account":"14256.666667","wait_interest":"256.666667"},{"account":"38500.000000","addtime":"1393304122","auto_repurchase":1,"award_after_push":0,"borrow_account":0,"borrow_id":2515,"id":18538,"interest":"705.833333","is_auto_tender":0,"money":"38500.000000","repay_account":"39205.833333","repay_time":"1395723322","repayment_account":"39205.833333","repayment_yesaccount":"0.000000","site_id":0,"status":1,"user_id":2286,"username":"lucifer","wait_account":"39205.833333","wait_interest":"705.833333"},{"account":"300.000000","addtime":"1393304204","auto_repurchase":1,"award_after_push":0,"borrow_account":0,"borrow_id":2515,"id":18539,"interest":"5.500000","is_auto_tender":0,"money":"300.000000","repay_account":"305.500000","repay_time":"1395723404","repayment_account":"305.500000","repayment_yesaccount":"0.000000","site_id":0,"status":1,"user_id":2286,"username":"lucifer","wait_account":"305.500000","wait_interest":"5.500000"},{"account":"27200.000000","addtime":"1393304960","auto_repurchase":1,"award_after_push":0,"borrow_account":0,"borrow_id":2515,"id":18542,"interest":"498.666667","is_auto_tender":0,"money":"27200.000000","repay_account":"27698.666667","repay_time":"1395724160","repayment_account":"27698.666667","repayment_yesaccount":"0.000000","site_id":0,"status":1,"user_id":2517,"username":"kekexlct","wait_account":"27698.666667","wait_interest":"498.666667"}],"page":{"currentPage":1,"end":5,"pages":1,"pernum":10,"start":0,"total":5},"type":0},"msg":"success"} 其中 username对于的不就是名称吗
解决办法参考教程http://faq.locoy.com/q-755.html 都是查找页面能看到 但是页面源代码却看不到的内容 leejunji 发表于 2014-2-27 10:33 static/image/common/back.gif
其中 username对于的不就是名称吗
解决办法参考教程http://faq.locoy.com/q-755.html 都是查找页面能看到 ...
谢谢了ali82ls。初次接触数据采集,在本论坛受益匪浅。 一个P2P平台的页面有一个导航栏上面有这几项选择:
借款详情个人资料账户详情还款信用 资料审核 投标记录
默认打开是进入“借款详情”的网址是:http://www.xinlongct.com/invest/detail.html?borrowid=2515#jkxq,我要采集“投标记录”里的数据,问题是只能手动点击打开http://www.jixiejianshe.com/invest/detail.html?borrowid=2515#tbinfo进入“投标记录”,输入网址后实际是进入“借款详情” 页面的。我在采集器里打开查看源文件也是默认页的内容,即使手动点击“投标记录”,刷新浏览器也会显示的默认页面“借款详情”的内容,在“投标记录”右键选择查看源文件也是看到的默认页面内容。这样采集器应该采集不到“投标记录”里的数据把,能不能模拟点击“投标记录”动作进入?初学数据采集,请高手指点。
页:
[1]