错误处理时需要对象错误
Posted
技术标签:
【中文标题】错误处理时需要对象错误【英文标题】:Object required Error when error handling 【发布时间】:2022-01-13 10:09:15 【问题描述】:以下代码通过抓取 ECHA 网站检索 A 列中物质的“档案网址”。我正在尝试错误处理无法找到物质 URL 的情况。
我不太明白为什么下面的代码会失败。我用评论突出显示了有问题的行。这在调试中突出显示为需要对象错误,但我看不出哪里出错了。
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dis-s-registeredsubstances_WAR_dis-s-regsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim ohtml, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_name") = SubstanceName
MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
'PROBLEMATIC CODE
If oHTML.querySelector(".details").getAttribute("href") Is Error Then
SubstanceUrl = "-"
Else
'Sometimes output changes despite same input
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
End If
Debug.Print SubstanceUrl
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function
要运行此代码,A 列中必须存在值。丙酮和苯可分别用于测试 2 行。测试错误处理输入像 Benzenjaj 这样的东西。
我认为这是一个快速修复。就是看不出来。
更新:
对组成的物质名称进行测试:
前 2 个结果正常,但合成的化学物质导致以下错误:
代码:
Sub PopulateExposures() Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
结束子
公共函数 SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dis-s-registeredsubstances_WAR_dis-s-regsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_name") = SubstanceName
MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
On Error Resume Next 'ignore error on following line
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
On Error GoTo 0 'stop ignoring errors
If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"
结束函数
函数曝光数据(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
结束函数
【问题讨论】:
Is
运算符比较 2 个对象。 getAttribute 方法返回一个不是 Object 的 String。
如何将此字符串的输出设置为对象以检查 URL 是否存在?
@BrianMStafford 我不明白底部模块中的Info
是一个字符串,但Is
运算符似乎可以工作?
Info
不是字符串。它是一个包含 Object 的 Variant,如 Set Info =
行所示。您只需要对象的 Set 关键字。此外,最好将 Info 定义为正确的类型。类似于Dim Info As IHTMLElement
。
【参考方案1】:
你可以忽略任何错误:
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
On Error Resume Next 'ignore error on following line
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
On Error Goto 0 'stop ignoring errors
If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"
【讨论】:
嗨蒂姆,如果我使用合成物质,这似乎会失败 另外,我希望未找到 URL 的单元格实际上返回一些字符,例如“-” “失败”究竟是什么意思?那看起来像什么?如果querySelector()
失败,那么它应该返回“ExposureData
之前,您需要检查它不等于“您可以在查找特定的注册文档 url 时测试 querySelectorAll 的 .Length。您需要在其他地方修改代码以处理“-”返回 url。在忽略错误方面,我更喜欢 Tim 的解决方案。
Option Explicit
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
If Left$(url, 5) = "https" Then
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Else
rw.Cells(5).Resize(1, 3).Value = url
End If
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dis-s-registeredsubstances_WAR_dis-s-regsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_name") = SubstanceName
MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
If oHTML.querySelectorAll("[href*=registered-dossier]").Length = 0 Then
SubstanceUrl = "-"
Else
'Sometimes output changes despite same input
SubstanceUrl = oHTML.querySelector(".details")
End If
Debug.Print SubstanceUrl
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function
【讨论】:
以上是关于错误处理时需要对象错误的主要内容,如果未能解决你的问题,请参考以下文章
使用 pexpect 和多处理时出错?错误“TypEerror:无法序列化 '_io.TextIOWrapper' 对象”