python爬虫:爬取京东商品信息
Posted auraro997
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python爬虫:爬取京东商品信息相关的知识,希望对你有一定的参考价值。
‘‘‘ 初级版 ‘‘‘ import time from selenium import webdriver from selenium.webdriver.common.keys import Keys driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘) num = 1 try: driver.implicitly_wait(10) driver.get(‘https://www.jd.com/‘) input_tag = driver.find_element_by_id(‘key‘) input_tag.send_keys(‘墨菲定律‘) input_tag.send_keys(Keys.ENTER) time.sleep(5) good_list = driver.find_elements_by_class_name(‘gl-item‘) for good in good_list: # print(good) # 商品名称 good_name = good.find_element_by_css_selector(‘.p-name em‘).text print(good_name) # 商品链接 good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘) print(good_url) # 商品价格 good_price = good.find_element_by_class_name(‘p-price‘).text print(good_price) # 商品评价 good_commit = good.find_element_by_class_name(‘p-commit‘).text good_content = ‘‘‘ num= 商品名称: 商品链接: 商品价格: 商品的评价条数: \n ‘‘‘.format(num,good_name,good_url,good_price,good_commit) print(good_content) with open(‘jd.txt‘,‘a‘,encoding=‘utf-8‘) as f: f.write(good_content) print(‘商品信息写入成功!‘) finally: driver.close() ‘‘‘ 终极版 ‘‘‘ import time from selenium import webdriver from selenium.webdriver.common.keys import Keys driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘) num = 1 try: driver.implicitly_wait(10) driver.get(‘https://www.jd.com/‘) input_tag = driver.find_element_by_id(‘key‘) input_tag.send_keys(‘墨菲定律‘) input_tag.send_keys(Keys.ENTER) time.sleep(5) # 下拉滑动5000px js_code = ‘‘‘ windows.scrollTo(0,5000) ‘‘‘ driver.execute_script(js_code) # 等待5s待商品数据加载 time.sleep(5) good_list = driver.find_elements_by_class_name(‘gl-item‘) for good in good_list: # print(good) # 商品名称 good_name = good.find_element_by_css_selector(‘.p-name em‘).text print(good_name) # 商品链接 good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘) print(good_url) # 商品价格 good_price = good.find_element_by_class_name(‘p-price‘).text print(good_price) # 商品评价 good_commit = good.find_element_by_class_name(‘p-commit‘).text good_content = ‘‘‘ num= 商品名称: 商品链接: 商品价格: 商品的评价条数: \n ‘‘‘.format(num,good_name,good_url,good_price,good_commit) print(good_content) with open(‘jd.txt‘,‘a‘,encoding=‘utf-8‘) as f: f.write(good_content) num += 1 print(‘商品信息写入成功!‘) next_tag = driver.find_element_by_class_name(‘pn-next‘) next_tag.click time.sleep(10) finally: driver.close() ‘‘‘ 狂暴版 ‘‘‘ from selenium import webdriver from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time # def get_good(driver): num = 1 try: time.sleep(5) #下拉滑动5000px js_code = ‘‘‘ window.scrollTo(0,5000) ‘‘‘ driver.execute_script(js_code) time.sleep(5) # 商品信息加载,等待5s good_list = driver.find_elements_by_class_name(‘gl-item‘) for good in good_list: # 商品名称 good_name = good.find_element_by_css_selector(‘.p-name em‘).text # 商品链接 good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘) # 商品价格 good_price = good.find_element_by_class_name(‘p-price‘).text # 商品评价 good_commit = good.find_element_by_class_name(‘p-commit‘).text good_content = ‘‘‘ num: 商品名称: 商品链接: 商品价格: 商品评论: \n ‘‘‘.format(num,good_name,good_url,good_price,good_commit) print(good_content) # 保存数据写入文件 with open(‘京东商品信息爬取.txt‘, ‘a‘, encoding=‘utf-8‘) as f: f.write(good_content) num += 1 # 找到页面下一页点击 next_tag = driver.find_element_by_class_name(‘pn-next‘) next_tag.click() time.sleep(5) #递归调用函数本身 get_good(driver) finally: driver.close() if __name__ == ‘__main__‘: driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘) try: driver.implicitly_wait(10) driver.get(‘https://www.jd.com/‘) input = driver.find_element_by_id(‘key‘) input.send_keys(‘人间失格‘) input.send_keys(Keys.ENTER) get_good(driver) print(‘商品信息写入完成‘) finally: driver.close()
以上是关于python爬虫:爬取京东商品信息的主要内容,如果未能解决你的问题,请参考以下文章