错误处理时需要对象错误

Posted

技术标签:

【中文标题】错误处理时需要对象错误【英文标题】:Object required Error when error handling 【发布时间】:2022-01-13 10:09:15 【问题描述】:

以下代码通过抓取 ECHA 网站检索 A 列中物质的“档案网址”。我正在尝试错误处理无法找到物质 URL 的情况。

我不太明白为什么下面的代码会失败。我用评论突出显示了有问题的行。这在调试中突出显示为需要对象错误,但我看不出哪里出错了。

Sub PopulateExposures()
    Dim url, rw As Range
    
    Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
    Do While Application.CountA(rw) > 0
        url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
        rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
        Set rw = rw.Offset(1, 0) 'next substance
    Loop

End Sub

Public Function SubstanceUrl(SubstanceName, CASNumber) As String
    
    Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
                "p_p_id=dis-s-registeredsubstances_WAR_dis-s-regsubsportlet&p_p_lifecycle=1&" & _
                "p_p_state=normal&p_p_mode=view&" & _
                "__dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
    
    Dim ohtml, oHttp, MyDict, payload, DictKey, sep
    
    Set oHTML = New HTMLDocument
    Set oHttp = CreateObject("MSXML2.XMLHTTP")
    Set MyDict = CreateObject("Scripting.Dictionary")
    
    MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_name") = SubstanceName
    MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_cas-number") = CASNumber
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
    
    payload = ""
    For Each DictKey In MyDict
        payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
        sep = "&"
    Next DictKey
        
    With oHttp
        .Open "POST", url, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
        .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
        .send payload
        oHTML.body.innerHTML = .responseText
    End With
    
    'PROBLEMATIC CODE
    If oHTML.querySelector(".details").getAttribute("href") Is Error Then
    
    SubstanceUrl = "-"
    Else
    
     'Sometimes output changes despite same input
    SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
    
    End If
    
    
    Debug.Print SubstanceUrl
    
    
    
End Function

Function ExposureData(urlToGet)
    
    Dim XMLReq As New MSXML2.XMLHTTP60
    Dim HTMLDoc As HTMLDocument, dds
    Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
    
    Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
    Route(2) = "sGeneralPopulationHazardViaDermalRoute"
    Route(3) = "sGeneralPopulationHazardViaOralRoute"
    
    XMLReq.Open "Get", urlToGet & "/7/1", False
    XMLReq.send
     
    If XMLReq.Status <> 200 Then
        Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
    Else
        Set HTMLDoc = New HTMLDocument
        HTMLDoc.body.innerHTML = XMLReq.responseText
        For c = 1 To UBound(Route, 1)
            Set Info = HTMLDoc.getElementById(Route(c))
            If Not Info Is Nothing Then
                Set Info = Info.NextSibling.NextSibling.NextSibling
                Set dds = Info.getElementsByTagName("dd")
                If dds.Length > 1 Then
                    Results(c) = dds(1).innerText
                Else
                    Results(c) = "hazard unknown"
                End If
            Else
                Results(c) = "no info"
            End If
        Next c
    End If
    
    ExposureData = Results
    
End Function

要运行此代码,A 列中必须存在值。丙酮和苯可分别用于测试 2 行。测试错误处理输入像 Benzenjaj 这样的东西。

我认为这是一个快速修复。就是看不出来。

更新:

对组成的物质名称进行测试:

前 2 个结果正常,但合成的化学物质导致以下错误:

代码:

Sub PopulateExposures() Dim url, rw As Range

 Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
 Do While Application.CountA(rw) > 0
     url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
     rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
     Set rw = rw.Offset(1, 0) 'next substance
 Loop

结束子

公共函数 SubstanceUrl(SubstanceName, CASNumber) As String

 Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
             "p_p_id=dis-s-registeredsubstances_WAR_dis-s-regsubsportlet&p_p_lifecycle=1&" & _
             "p_p_state=normal&p_p_mode=view&" & _
             "__dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"

 Dim oHTML, oHttp, MyDict, payload, DictKey, sep

 Set oHTML = New HTMLDocument
 Set oHttp = CreateObject("MSXML2.XMLHTTP")
 Set MyDict = CreateObject("Scripting.Dictionary")

 MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_name") = SubstanceName
 MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_cas-number") = CASNumber
 MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
 MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"

 payload = ""
 For Each DictKey In MyDict
     payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
     sep = "&"
 Next DictKey

 With oHttp
     .Open "POST", url, False
     .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
     .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
     .send payload
     oHTML.body.innerHTML = .responseText
 End With

 On Error Resume Next 'ignore error on following line
 SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
 On Error GoTo 0      'stop ignoring errors

 If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"

结束函数

函数曝光数据(urlToGet)

 Dim XMLReq As New MSXML2.XMLHTTP60
 Dim HTMLDoc As HTMLDocument, dds
 Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data

 Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
 Route(2) = "sGeneralPopulationHazardViaDermalRoute"
 Route(3) = "sGeneralPopulationHazardViaOralRoute"

 XMLReq.Open "Get", urlToGet & "/7/1", False
 XMLReq.send

 If XMLReq.Status <> 200 Then
     Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
 Else
     Set HTMLDoc = New HTMLDocument
     HTMLDoc.body.innerHTML = XMLReq.responseText
     For c = 1 To UBound(Route, 1)
         Set Info = HTMLDoc.getElementById(Route(c))
         If Not Info Is Nothing Then
             Set Info = Info.NextSibling.NextSibling.NextSibling
             Set dds = Info.getElementsByTagName("dd")
             If dds.Length > 1 Then
                 Results(c) = dds(1).innerText
             Else
                 Results(c) = "hazard unknown"
             End If
         Else
             Results(c) = "no info"
         End If
     Next c
 End If

 ExposureData = Results

结束函数

【问题讨论】:

Is 运算符比较 2 个对象。 getAttribute 方法返回一个不是 Object 的 String。 如何将此字符串的输出设置为对象以检查 URL 是否存在? @BrianMStafford 我不明白底部模块中的Info 是一个字符串,但Is 运算符似乎可以工作? Info 不是字符串。它是一个包含 Object 的 Variant,如 Set Info = 行所示。您只需要对象的 Set 关键字。此外,最好将 Info 定义为正确的类型。类似于Dim Info As IHTMLElement 【参考方案1】:

你可以忽略任何错误:


    With oHttp
        .Open "POST", url, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
        .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
        .send payload
        oHTML.body.innerHTML = .responseText
    End With
    
    On Error Resume Next 'ignore error on following line
    SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
    On Error Goto 0      'stop ignoring errors

    If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>" 

【讨论】:

嗨蒂姆,如果我使用合成物质,这似乎会失败 另外,我希望未找到 URL 的单元格实际上返回一些字符,例如“-” “失败”究竟是什么意思?那看起来像什么?如果querySelector() 失败,那么它应该返回“ 请查看问题的更新以查看问题的图像以及我从您的答案中使用的代码。 在将 URL 传递给 ExposureData 之前,您需要检查它不等于“【参考方案2】:

您可以在查找特定的注册文档 url 时测试 querySelectorAll 的 .Length。您需要在其他地方修改代码以处理“-”返回 url。在忽略错误方面,我更喜欢 Tim 的解决方案。

Option Explicit

Sub PopulateExposures()
    Dim url, rw As Range
    
    Set rw = Sheets("data").Range("A2:E2")       'first row with inputs
    Do While Application.CountA(rw) > 0
        url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
        If Left$(url, 5) = "https" Then
            rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
        Else
            rw.Cells(5).Resize(1, 3).Value = url
        End If
        Set rw = rw.Offset(1, 0)                 'next substance
    Loop

End Sub

Public Function SubstanceUrl(SubstanceName, CASNumber) As String
    
    Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
    "p_p_id=dis-s-registeredsubstances_WAR_dis-s-regsubsportlet&p_p_lifecycle=1&" & _
    "p_p_state=normal&p_p_mode=view&" & _
    "__dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
    
    Dim oHTML, oHttp, MyDict, payload, DictKey, sep
    
    Set oHTML = New HTMLDocument
    Set oHttp = CreateObject("MSXML2.XMLHTTP")
    Set MyDict = CreateObject("Scripting.Dictionary")
    
    MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_name") = SubstanceName
    MyDict("_dis-s-registeredsubstances_WAR_dis-s-regsubsportlet_disreg_cas-number") = CASNumber
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
    MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
    
    payload = ""
    For Each DictKey In MyDict
        payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
        sep = "&"
    Next DictKey
        
    With oHttp
        .Open "POST", url, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
        .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
        .send payload
        oHTML.body.innerHTML = .responseText
    End With
    
    If oHTML.querySelectorAll("[href*=registered-dossier]").Length = 0 Then
    
        SubstanceUrl = "-"
        
    Else
    
        'Sometimes output changes despite same input
        SubstanceUrl = oHTML.querySelector(".details")
    
    End If
    
    
    Debug.Print SubstanceUrl
      
End Function

Function ExposureData(urlToGet)
    
    Dim XMLReq As New MSXML2.XMLHTTP60
    Dim HTMLDoc As HTMLDocument, dds
    Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
    
    Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
    Route(2) = "sGeneralPopulationHazardViaDermalRoute"
    Route(3) = "sGeneralPopulationHazardViaOralRoute"
    
    XMLReq.Open "Get", urlToGet & "/7/1", False
    XMLReq.send
     
    If XMLReq.Status <> 200 Then
        Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
    Else
        Set HTMLDoc = New HTMLDocument
        HTMLDoc.body.innerHTML = XMLReq.responseText
        For c = 1 To UBound(Route, 1)
            Set Info = HTMLDoc.getElementById(Route(c))
            If Not Info Is Nothing Then
                Set Info = Info.NextSibling.NextSibling.NextSibling
                Set dds = Info.getElementsByTagName("dd")
                If dds.Length > 1 Then
                    Results(c) = dds(1).innerText
                Else
                    Results(c) = "hazard unknown"
                End If
            Else
                Results(c) = "no info"
            End If
        Next c
    End If
    
    ExposureData = Results
    
End Function

【讨论】:

以上是关于错误处理时需要对象错误的主要内容,如果未能解决你的问题,请参考以下文章

错误和异常处理

异常处理

异常处理

使用 pexpect 和多处理时出错?错误“TypEerror:无法序列化 '_io.TextIOWrapper' 对象”

JS中错误处理

扩展Python模块系列----异常和错误处理