爬取数据来着
Posted linqingxuan
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了爬取数据来着相关的知识,希望对你有一定的参考价值。
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-1
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-2
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-3
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-4
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-5
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-6
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-7
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-8
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-9
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-10
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-11
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-12
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-13
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-14
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-15
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-16
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-17
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-18
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-19
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-20
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-21
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-22
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-23
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-24
",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-25
"
]
}
],
"source": [
"import requests
",
"from lxml import etree
",
"import time
",
"
",
"
",
"with open(‘/Users/wanruo/Desktop/Dang.cvs‘,‘w‘,encoding=‘utf-8‘) as f:
",
"
",
" url=‘http://bang.dangdang.com/books/fivestars‘
",
"
",
" data=requests.get(url).text
",
"
",
" s=etree.html(data)
",
"
",
" items=s.xpath(‘//*[@id="sortRanking"]/div/a/@href‘)
",
"
",
" for x in range(1,26):
",
" page_url=items[0][0:74]+str(x)
",
" print(page_url)
",
"
",
" r=requests.get(items[0]).text
",
" ss=etree.HTML(r)
",
"
",
" file=ss.xpath(‘//ul[@class="bang_list clearfix bang_list_mode"]/li‘)
",
"
",
" for each_item in items:
",
" for book in file:
",
" title=book.xpath(‘./div[@class="name"]/a/@title‘)[0]
",
" book_href=book.xpath(‘./div[@class="name"]/a/@href‘)[0]
",
" pinglun=book.xpath(‘./div[@class="star"]/a/text()‘)[0].strip(‘条评论‘)
",
" wuxing=book.xpath(‘./div[@class="biaosheng"]/span/text()‘)[0].strip(‘次‘)
",
"
",
" date=book.xpath(‘./div[@class="publisher_info"]/span/text()‘)[0]
",
"
",
"# try:
",
" # price_e=book.xpath(‘./div[@class="price"]/p[@class="price_e"]/span/text()‘)[0]
",
" # except:
",
" # price_e=‘NA‘
",
"
",
"# try:
",
" # company=book.xpath(‘./div[@class="publisher_info"][2]/span/text()‘)[0]
",
" # except:
",
" # compant=‘NA‘
",
"
",
"
",
" f.write(‘{},{},{},{},{}\n‘.format(title,book_href,pinglun,wuxing,date))
",
"
",
"
",
"
",
"
"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
以上是关于爬取数据来着的主要内容,如果未能解决你的问题,请参考以下文章