从亚马逊提取价格和卖家数据
Posted
技术标签:
【中文标题】从亚马逊提取价格和卖家数据【英文标题】:Extract price and seller data from Amazon 【发布时间】:2016-03-02 23:42:53 【问题描述】:我正在使用以下代码从亚马逊提取数据。
Sub Macro1()
' Macro1 Macro
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://www.amazon.com/gp/offer-listing/B00N41UTWG/ref=olp_f_new?ie=UTF8&f_new=true" _
, Destination:=Range("$A$1"))
.Name = "oldOfferPrice" _
' "its_details_value_node.html?nsc=true&listId=www_s201_b9233&tsId=BBK01.ED0439"
.FieldNames = True
.RowNumbers = True
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = True
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub
上面的代码正在提取完整的页面数据,但我的要求是只提取价格。页面价格采用这种格式。
<div class="a-row a-spacing-mini olpOffer">
<div class="a-column a-span2">
<span class="a-size-large a-color-price olpOfferPrice a-text-bold"> $171.99 </span>
<span class="a-color-price">
<span class="supersaver"><i class="a-icon a-icon-prime" aria-label="Amazon Prime TM"><span class="a-icon-alt">Amazon Prime TM</span></i></span>
</span>
我想提取两个值,即 $171.99 和 Amazon Prime TM。一页中可能有多个价格和卖方值,我想提取所有值。
【问题讨论】:
如果需要对上述查询进一步澄清,请告知 【参考方案1】:下面的示例展示了如何使用XHR
和Split
检索特定ASIN 的亚马逊商品,并将结果输出到工作表:
Sub TestExtractAmazonOffers()
Dim arrList() As Variant
' clear sheet
Sheets("Sheet1").Cells.Delete
' retrieve offers for certain ASIN
arrList = ExtractAmazonOffers("B07CR8D2DW")
' output data
Output Sheets("Sheet1"), 1, 1, arrList
End Sub
Function ExtractAmazonOffers(strASIN As String)
Dim strUrl As String
Dim arrTmp() As String
Dim strTmp As String
Dim arrItems() As String
Dim i As Long
Dim arrCols() As String
Dim strSellerName As String
Dim strOfferPrice As String
Dim strAmazonPrime As String
Dim strShippingPrice As String
Dim arrResults() As Variant
Dim arrCells() As Variant
' init
arrResults = Array(Array("Offer Price", "Amazon Prime TM", "Shipping Price", "Seller Name"))
strUrl = "https://www.amazon.com/gp/offer-listing/" & strASIN & "/ref=olp_f_new?ie=UTF8&f_new=true"
Do
' http get request of the search result page
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", strUrl, False
.Send
strResp = .ResponseText
End With
arrTmp = Split(strResp, "id=""olpOfferList""", 2)
If UBound(arrTmp) = 1 Then
arrItems = Split(arrTmp(1), "<div class=""a-row a-spacing-mini olpOffer""")
For i = 1 To UBound(arrItems)
' get item columns
arrCols = Split(arrItems(i), "<div class=""a-column", 6)
' retrieve seller name from column 4
strTmp = Split(arrCols(4), "olpSellerName", 2)(1)
arrTmp = Split(strTmp, "", 2)
If UBound(arrTmp) = 1 Then ' from image alt
strTmp = Split(arrTmp(1), """", 2)(0)
strSellerName = Trim(strTmp)
Else ' from link
strTmp = Split(strTmp, "<a", 2)(1)
strTmp = Split(strTmp, ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strSellerName = Trim(strTmp)
End If
' retrieve offer price from column 1
strTmp = Split(arrCols(1), "olpOfferPrice", 2)(1)
strTmp = Split(strTmp, ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strOfferPrice = Trim(strTmp)
' retrieve amazon prime
arrTmp = Split(arrCols(1), "olpShippingInfo", 2)
strAmazonPrime = IIf(InStr(arrTmp(0), "Amazon Prime") > 0, "Amazon Prime", "-")
' retrieve shipping info
arrTmp = Split(arrTmp(1), "olpShippingPrice", 2)
If UBound(arrTmp) = 1 Then
strTmp = Split(arrTmp(1), ">", 2)(1)
strTmp = Split(strTmp, "<", 2)(0)
strShippingPrice = Trim(strTmp)
Else
strShippingPrice = "Free"
End If
' store data
ReDim Preserve arrResults(UBound(arrResults) + 1)
arrResults(UBound(arrResults)) = Array(strOfferPrice, strAmazonPrime, strShippingPrice, strSellerName)
Next
End If
' search for next page link
arrTmp = Split(strResp, "class=""a-last""", 2)
If UBound(arrTmp) = 0 Then Exit Do
strTmp = Split(arrTmp(1), "href=""", 2)(1)
strUrl = Split(strTmp, """", 2)(0)
If Left(strUrl, 1) = "/" Then strUrl = "https://www.amazon.com" & strUrl
Loop
' convert nested array to 2-dimensional array
ReDim arrCells(UBound(arrResults), 3)
For i = 0 To UBound(arrCells, 1)
For j = 0 To UBound(arrCells, 2)
arrCells(i, j) = arrResults(i)(j)
Next
Next
ExtractAmazonOffers = arrCells
End Function
Sub Output(objSheet As Worksheet, lngTop As Long, lngLeft As Long, arrCells As Variant)
With objSheet
.Select
With .Range(.Cells(lngTop, lngLeft), .Cells( _
UBound(arrCells, 1) - LBound(arrCells, 1) + lngTop, _
UBound(arrCells, 2) - LBound(arrCells, 2) + lngLeft))
.NumberFormat = "@"
.Value = arrCells
.Columns.AutoFit
End With
End With
End Sub
生成的工作表如下:
【讨论】:
以上是关于从亚马逊提取价格和卖家数据的主要内容,如果未能解决你的问题,请参考以下文章