如何在vba中抓取web数据
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了如何在vba中抓取web数据相关的知识,希望对你有一定的参考价值。
我已经按照jsotola的建议并记录下面的宏,但遇到错误,我该如何解决?运行时错误91,并突出显示以下代码
Selection.ListObject.TableObject.Refresh
Sub Macro1()
ActiveWorkbook.Queries.Add Name:="1-1-1", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.Page(Web.Contents(""http://www.hkjc.com/English/racing/Horse.asp?HorseNo=V099""))," & Chr(13) & "" & Chr(10) & " Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""Column1"", type text}, {""Column2"", type text}, {""Column3"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
ActiveWorkbook.Queries.Add Name:="1-1-2", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.Page(Web.Contents(""http://www.hkjc.com/English/racing/Horse.asp?HorseNo=V099""))," & Chr(13) & "" & Chr(10) & " Data1 = Source{1}[Data]," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(Data1,{{""Column1"", type text}, {""Column2"", type text}, {""Column3"", type text}, {""Column4"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
Workbooks("Book1").Connections.Add2 "Query - Table 0", _
"Connection to the 'Table 0' query in the workbook.", _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=Table 0" _
, """Table 0""", 6, True, False
Workbooks("Book1").Connections.Add2 "Query - Table 1", _
"Connection to the 'Table 1' query in the workbook.", _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=Table 1" _
, """Table 1""", 6, True, False
Sheets.Add After:=ActiveSheet
Selection.ListObject.TableObject.Refresh
Sheets.Add After:=ActiveSheet
Selection.ListObject.TableObject.Refresh
End Sub
答案
您可以使用以下脚本。
①我抓住左手边的链接
.getElementsByTagName("table")(3).getElementsByTagName("a")
由于这些返回以“about:”开头的相对路径,我将此部分替换为固定前缀字符串BASESTRING
。这给了absolute path。
②我通过获取table
标签的集合并通过索引选择适当的表来使用主要信息来定位表。
Set hTable = .getElementsByTagName("table")(6)
③此外,由于我使用的方法不支持className的定位,由于我假设的后期绑定html文件),我从包含此信息的元素的innerHTML解析子标题“SMART BOY(V076)”。否则,它可以用.getElementsByClassName("subsubheader")(0)
更清晰地定位
页面上的示例数据:
代码输出示例:
码:
Option Explicit
Public Sub GetTable()
Dim sResponse As String, hTable As Object
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://www.hkjc.com/english/racing/horse.asp?HorseNo=V076", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "HEAD"))
With CreateObject("htmlFile")
.Write sResponse
Set hTable = .getElementsByTagName("table")(6)
Dim links As Object, title As String
Set links = .getElementsByTagName("table")(3).getElementsByTagName("a")
title = Replace$(Split(Split(.getElementsByTagName("table")(2).innerHTML, "title_eng_text>")(1), "<")(0), " ", vbNullString)
End With
Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, hBody As Object
Set hBody = hTable.getElementsByTagName("tbody")
Const BASESTRING As String = "http://www.hkjc.com/english/racing/"
With ActiveSheet
.Cells(1, 1) = title
r = 2
For Each tSection In hBody 'HTMLTableSection
Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
c = 1
.Cells(r, c) = links(r - 1).innerHTML
.Cells(r, c + 1) = Replace$(links(r - 1), "about:", BASESTRING)
For Each td In tCell 'DispHTMLElementCollection
.Cells(r, c + 2).Value = td.innerText 'HTMLTableCell
c = c + 1
Next td
r = r + 1
Next tr
Next tSection
.UsedRange.Columns.AutoFit
End With
Application.ScreenUpdating = True
End Sub
以上是关于如何在vba中抓取web数据的主要内容,如果未能解决你的问题,请参考以下文章