从亚马逊提取价格和卖家数据

Posted

技术标签:

【中文标题】从亚马逊提取价格和卖家数据【英文标题】:Extract price and seller data from Amazon 【发布时间】:2016-03-02 23:42:53 【问题描述】:

我正在使用以下代码从亚马逊提取数据。

Sub Macro1()
    ' Macro1 Macro
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://www.amazon.com/gp/offer-listing/B00N41UTWG/ref=olp_f_new?ie=UTF8&f_new=true" _
        , Destination:=Range("$A$1"))
        .Name = "oldOfferPrice" _
        ' "its_details_value_node.html?nsc=true&listId=www_s201_b9233&tsId=BBK01.ED0439"
        .FieldNames = True
        .RowNumbers = True
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = True
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False

    End With
End Sub

上面的代码正在提取完整的页面数据,但我的要求是只提取价格。页面价格采用这种格式。

<div class="a-row a-spacing-mini olpOffer">
                    <div class="a-column a-span2">
       <span class="a-size-large a-color-price olpOfferPrice a-text-bold">                $171.99                </span>
<span class="a-color-price">
<span class="supersaver"><i class="a-icon a-icon-prime" aria-label="Amazon Prime TM"><span class="a-icon-alt">Amazon Prime TM</span></i></span>
</span>

我想提取两个值,即 $171.99 和 Amazon Prime TM。一页中可能有多个价格和卖方值,我想提取所有值。

【问题讨论】:

如果需要对上述查询进一步澄清,请告知 【参考方案1】:

下面的示例展示了如何使用XHRSplit 检索特定ASIN 的亚马逊商品,并将结果输出到工作表:

Sub TestExtractAmazonOffers()

    Dim arrList() As Variant

    ' clear sheet
    Sheets("Sheet1").Cells.Delete
    ' retrieve offers for certain ASIN
    arrList = ExtractAmazonOffers("B07CR8D2DW")
    ' output data
    Output Sheets("Sheet1"), 1, 1, arrList

End Sub

Function ExtractAmazonOffers(strASIN As String)

    Dim strUrl As String
    Dim arrTmp() As String
    Dim strTmp As String
    Dim arrItems() As String
    Dim i As Long
    Dim arrCols() As String
    Dim strSellerName As String
    Dim strOfferPrice As String
    Dim strAmazonPrime As String
    Dim strShippingPrice As String
    Dim arrResults() As Variant
    Dim arrCells() As Variant

    ' init
    arrResults = Array(Array("Offer Price", "Amazon Prime TM", "Shipping Price", "Seller Name"))
    strUrl = "https://www.amazon.com/gp/offer-listing/" & strASIN & "/ref=olp_f_new?ie=UTF8&f_new=true"
    Do
        ' http get request of the search result page
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", strUrl, False
            .Send
            strResp = .ResponseText
        End With
        arrTmp = Split(strResp, "id=""olpOfferList""", 2)
        If UBound(arrTmp) = 1 Then
            arrItems = Split(arrTmp(1), "<div class=""a-row a-spacing-mini olpOffer""")
            For i = 1 To UBound(arrItems)
                ' get item columns
                arrCols = Split(arrItems(i), "<div class=""a-column", 6)
                ' retrieve seller name from column 4
                strTmp = Split(arrCols(4), "olpSellerName", 2)(1)
                arrTmp = Split(strTmp, "", 2)
                If UBound(arrTmp) = 1 Then ' from image alt
                    strTmp = Split(arrTmp(1), """", 2)(0)
                    strSellerName = Trim(strTmp)
                Else ' from link
                    strTmp = Split(strTmp, "<a", 2)(1)
                    strTmp = Split(strTmp, ">", 2)(1)
                    strTmp = Split(strTmp, "<", 2)(0)
                    strSellerName = Trim(strTmp)
                End If
                ' retrieve offer price from column 1
                strTmp = Split(arrCols(1), "olpOfferPrice", 2)(1)
                strTmp = Split(strTmp, ">", 2)(1)
                strTmp = Split(strTmp, "<", 2)(0)
                strOfferPrice = Trim(strTmp)
                ' retrieve amazon prime
                arrTmp = Split(arrCols(1), "olpShippingInfo", 2)
                strAmazonPrime = IIf(InStr(arrTmp(0), "Amazon Prime") > 0, "Amazon Prime", "-")
                ' retrieve shipping info
                arrTmp = Split(arrTmp(1), "olpShippingPrice", 2)
                If UBound(arrTmp) = 1 Then
                    strTmp = Split(arrTmp(1), ">", 2)(1)
                    strTmp = Split(strTmp, "<", 2)(0)
                    strShippingPrice = Trim(strTmp)
                Else
                    strShippingPrice = "Free"
                End If
                ' store data
                ReDim Preserve arrResults(UBound(arrResults) + 1)
                arrResults(UBound(arrResults)) = Array(strOfferPrice, strAmazonPrime, strShippingPrice, strSellerName)
            Next
        End If
        ' search for next page link
        arrTmp = Split(strResp, "class=""a-last""", 2)
        If UBound(arrTmp) = 0 Then Exit Do
        strTmp = Split(arrTmp(1), "href=""", 2)(1)
        strUrl = Split(strTmp, """", 2)(0)
        If Left(strUrl, 1) = "/" Then strUrl = "https://www.amazon.com" & strUrl
    Loop
    ' convert nested array to 2-dimensional array
    ReDim arrCells(UBound(arrResults), 3)
    For i = 0 To UBound(arrCells, 1)
        For j = 0 To UBound(arrCells, 2)
            arrCells(i, j) = arrResults(i)(j)
        Next
    Next
    ExtractAmazonOffers = arrCells

End Function

Sub Output(objSheet As Worksheet, lngTop As Long, lngLeft As Long, arrCells As Variant)

    With objSheet
        .Select
        With .Range(.Cells(lngTop, lngLeft), .Cells( _
                UBound(arrCells, 1) - LBound(arrCells, 1) + lngTop, _
                UBound(arrCells, 2) - LBound(arrCells, 2) + lngLeft))
            .NumberFormat = "@"
            .Value = arrCells
            .Columns.AutoFit
        End With
    End With

End Sub

生成的工作表如下:

【讨论】:

以上是关于从亚马逊提取价格和卖家数据的主要内容,如果未能解决你的问题,请参考以下文章

使用 ASIN 在亚马逊获取商品/产品价格

从亚马逊库存中提取产品/价格/类别/规格

有没有人成功地使用 PHP 从亚马逊卖家中心下载订单?

通过python从HTML中提取特定信息

亚马逊卖家选品idea从哪来?

印度卖家在亚马逊上卖牛粪饼,反响竟然还不错