Excel精英培训网

 找回密码
 注册
数据透视表40+个常用小技巧,让你一次学会!
查看: 2394|回复: 4

[已解决]请教网页数据抓取问题

[复制链接]
发表于 2016-4-2 17:04 | 显示全部楼层 |阅读模式
前一贴可能没说清楚,另发一贴请教。
网页http://www.ygqq.com/goods/goods831-54.html,要取“第54期”中的“所有参与记录”选项卡中共237页的“时间,昵称,参与人次”3个字段的数据,“昵称”中的照片不要,“参与人次”中只要前面的数字,隐藏其中的“查看夺宝码”几个字及后面的夺宝码都不要。谢谢!
最佳答案
2016-4-4 08:08
整理了下,完整代码如下。。。
  1. Sub main()
  2.     Dim Size As String, result()
  3.     Size = Split(Split(getResponsetext("gid=831&pid=54&size=1"), "total"":")(1), ",")(0)
  4.     InvenstDetail = getResponsetext("gid=831&pid=54&size=" & Size) '后续就是对InvestArr的处理。需要的字段为 bytime ,bytimes, nicname,
  5.     tmp = Split(InvenstDetail, "buyTimes"":")
  6.     ReDim result(Size, 1 To 3)
  7.     For i = 1 To UBound(tmp) '参与人次
  8.         result(i, 3) = Split(tmp(i), ",")(0)
  9.     Next
  10.     tmp = Split(InvenstDetail, "buyTime"":")
  11.     For i = 1 To UBound(tmp) '时间
  12.         result(i, 1) = FROM_UNIXTIME(Split(tmp(i), ",")(0))
  13.     Next
  14.     tmp = Split(InvenstDetail, "nickname"":""") '昵称
  15.     InvenstDetail = ""
  16.     For i = 1 To UBound(tmp)
  17.         result(i, 2) = Split(tmp(i), "")(0)
  18.     Next
  19.     Erase tmp
  20.     result(0, 1) = "时间": result(0, 2) = "昵称": result(0, 3) = "参与人次"
  21.     Cells.Clear
  22.     Range("A1").Resize(UBound(result), 3) = result
  23. End Sub

  24. Function getResponsetext(sendStr As String)
  25.     With CreateObject("MSXML2.XMLHTTP")
  26.         .Open "POST", "http://www.ygqq.com/goods/timeline.do?t=0.7006472381931250", False '登录
  27.         .setrequestheader "Referer", "http://www.ygqq.com/goods/goods831-54.html"
  28.         .setrequestheader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
  29.         .setrequestheader "Accept", "application/json, text/javascript, */*; q=0.01"
  30.         .setrequestheader "X-Requested-With", "XMLHttpRequest"
  31.         .setrequestheader "Content-Length", "22"
  32.         .setrequestheader "Host", "www.ygqq.com"
  33.         .setrequestheader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
  34.         .setrequestheader "Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"
  35.         .setrequestheader "Accept-Encoding", "gzip, deflate"
  36.         .send sendStr '获取记录条数
  37.         getResponsetext = .responsetext
  38.     End With
  39. End Function

  40. Function FROM_UNIXTIME(UNIXTIME) As String
  41.     FROM_UNIXTIME = Format((UNIXTIME / 1000 + 8 * 3600) / 86400 + 70 * 365 + 19, "yyyy-mm-dd hh:mm:ss:") & Right(UNIXTIME, 3)
  42. End Function
复制代码
excel精英培训的微信平台,每天都会发送excel学习教程和资料。扫一扫明天就可以收到新教程
发表于 2016-4-2 20:50 | 显示全部楼层
回复

使用道具 举报

发表于 2016-4-2 23:13 | 显示全部楼层
本帖最后由 xdragon 于 2016-4-4 08:10 编辑

见下楼。。。。
回复

使用道具 举报

发表于 2016-4-4 08:08 | 显示全部楼层    本楼为最佳答案   
整理了下,完整代码如下。。。
  1. Sub main()
  2.     Dim Size As String, result()
  3.     Size = Split(Split(getResponsetext("gid=831&pid=54&size=1"), "total"":")(1), ",")(0)
  4.     InvenstDetail = getResponsetext("gid=831&pid=54&size=" & Size) '后续就是对InvestArr的处理。需要的字段为 bytime ,bytimes, nicname,
  5.     tmp = Split(InvenstDetail, "buyTimes"":")
  6.     ReDim result(Size, 1 To 3)
  7.     For i = 1 To UBound(tmp) '参与人次
  8.         result(i, 3) = Split(tmp(i), ",")(0)
  9.     Next
  10.     tmp = Split(InvenstDetail, "buyTime"":")
  11.     For i = 1 To UBound(tmp) '时间
  12.         result(i, 1) = FROM_UNIXTIME(Split(tmp(i), ",")(0))
  13.     Next
  14.     tmp = Split(InvenstDetail, "nickname"":""") '昵称
  15.     InvenstDetail = ""
  16.     For i = 1 To UBound(tmp)
  17.         result(i, 2) = Split(tmp(i), "")(0)
  18.     Next
  19.     Erase tmp
  20.     result(0, 1) = "时间": result(0, 2) = "昵称": result(0, 3) = "参与人次"
  21.     Cells.Clear
  22.     Range("A1").Resize(UBound(result), 3) = result
  23. End Sub

  24. Function getResponsetext(sendStr As String)
  25.     With CreateObject("MSXML2.XMLHTTP")
  26.         .Open "POST", "http://www.ygqq.com/goods/timeline.do?t=0.7006472381931250", False '登录
  27.         .setrequestheader "Referer", "http://www.ygqq.com/goods/goods831-54.html"
  28.         .setrequestheader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0"
  29.         .setrequestheader "Accept", "application/json, text/javascript, */*; q=0.01"
  30.         .setrequestheader "X-Requested-With", "XMLHttpRequest"
  31.         .setrequestheader "Content-Length", "22"
  32.         .setrequestheader "Host", "www.ygqq.com"
  33.         .setrequestheader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
  34.         .setrequestheader "Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"
  35.         .setrequestheader "Accept-Encoding", "gzip, deflate"
  36.         .send sendStr '获取记录条数
  37.         getResponsetext = .responsetext
  38.     End With
  39. End Function

  40. Function FROM_UNIXTIME(UNIXTIME) As String
  41.     FROM_UNIXTIME = Format((UNIXTIME / 1000 + 8 * 3600) / 86400 + 70 * 365 + 19, "yyyy-mm-dd hh:mm:ss:") & Right(UNIXTIME, 3)
  42. End Function
复制代码

Book1.zip

202.7 KB, 下载次数: 9

评分

参与人数 2 +15 收起 理由
yizhih + 3 相当的给力
悠悠05 + 12 龙哥好棒

查看全部评分

回复

使用道具 举报

 楼主| 发表于 2016-4-4 20:17 | 显示全部楼层
xdragon 发表于 2016-4-4 08:08
整理了下,完整代码如下。。。

万分感谢,像这样类似的网站太不容易抓取数据了,您的水平相当高,再一次感谢您!
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 注册

本版积分规则

小黑屋|手机版|Archiver|Excel精英培训 ( 豫ICP备11015029号 )

GMT+8, 2024-4-28 01:14 , Processed in 0.482155 second(s), 15 queries , Gzip On, Yac On.

Powered by Discuz! X3.4

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表