|
研究了几天都是没有结果, 实在有点难,不知道这种情况是不是需要写一个PHP插件页来获取JS处理后的页文内容,再用火车头来采集,求解答啊{:soso_e134:}
链接: http://detail.tmall.com/item.htm?spm=a220m.1000858.1000725.1.1XxF39&id=7763866752&user_id=531378844&is_b=1&cat_id=50045000&q=%BB%AF%D7%B1%CB%A2&rn=9d1ecc5e3a69fc22e09e72eba217ae45
需要提取的分页地址是: http://ald.taobao.com/recommend.htm?appId=03054&itemID=7763866752&categoryId=50019251&sellerId=531378844&shopId=62927585&brandId=73566151&refer=&showTitle=2048&callback=ald378&1370229230730
源代码对应处理连链接的JS代码是(不知道是不是哈):
<script>
(function(w, d, u) {
var gt = 'getElementsByTagName',
h = d[gt]('head')[0],
m,
i;
w.g_config = {
startTime: ( + new Date()),
bizTag: 2057,
ver: "4.0bp",
t: "20130532",
appId: 1,
itemId: "7763866752",
assetsHost: "http://l.tbcdn.cn",
p: 1,
type: "b",
isEC: true,
toolbar: false,
webww: true,
pageType: ""
};
if (w.screen.width >= 1260 && (w.g_config.bizTag & 8 || (/standard=1/.test(w.location.search) && w.g_config.bizTag & 16))) d[gt]('html')[0]['className'] += " w990";
for (i = 0; i < u.length; i++) {
m = d.createElement("script");
m.async = true;
m.src = u[i];
h.insertBefore(m, h.firstChild);
}
})(window, document, ['http://dsc.taobaocdn.com/i2/770/380/7763866752/T1xcWCXAxXXXcWeqbX.desc%7Cvar%5Edesc%3Bsign%5Ec898b8d5bed5bc286402f7dec33f5ec7%3Blang%5Egbk%3Bt%5E1369063050']);
</script>
|
|