数据分析之瓜子二手车车辆信息大全,买卖二手车?别听别人忽悠,自己分析最准确
Posted 数据面对面
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据分析之瓜子二手车车辆信息大全,买卖二手车?别听别人忽悠,自己分析最准确相关的知识,希望对你有一定的参考价值。
这是一篇技术文档,使用的是当下最火热的Python语言进行技术操作,获取瓜子二手车上广州地区所有的二手车信息,一共获取8000多辆车的信息,小编已经将二手价价格+原价、车辆基本参数、发动机参数、底盘及制动信息、安全配置、内部和外部配置66条信息,都获取下来,提供汇总分析,想买车,又不知道什么车最保值?最划算?别再听别人忽悠了,让我们拿数据说话,从数据看问题,找到自己的需求,如需要获取代码的实际使用情况,敬请关本注工众号“数据面对面”,回复“瓜子二手车信息”,即可获得广州所有二手车的信息,如有需要其他地区车辆信息,请留言,小编看到后也会发给你~
下面开始我们的技术之旅:
1、首先导入所需要的包:
import requests
from lxml import etree
import pandas as pd
import time
requests可以进行请求网站,lxml进行解析网站,速度最快,比beautifulsoup解析速度快一些,pandas可以用于数据的处理和写入,time进行时间的控制,防止被封
2、定义获取所有二手车的详情页面的链接,返回一个字典,包含标题和链接的函数get_url(),返回一个字典
def get_url(url):
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
"Cookie":"clueSourceCode=10103000412%2300; uuid=37c23ea1-b114-4da1-e911-3d6bad733c40; antipas=7459330585r34392H8c50Z2Oj2B; ganji_uuid=2229921558379361577342; sessionid=f98a3b98-2d8f-4eef-8a51-8ca69394c889; cainfo=%7B%22ca_s%22%3A%22pz_sogou%22%2C%22ca_n%22%3A%22pz_bt%22%2C%22ca_i%22%3A%22-%22%2C%22ca_medium%22%3A%22-%22%2C%22ca_term%22%3A%22-%22%2C%22ca_content%22%3A%22-%22%2C%22ca_campaign%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22keyword%22%3A%22-%22%2C%22ca_keywordid%22%3A%22-%22%2C%22scode%22%3A%2210103000412%22%2C%22ca_transid%22%3Anull%2C%22platform%22%3A%221%22%2C%22version%22%3A1%2C%22ca_b%22%3A%22-%22%2C%22ca_a%22%3A%22-%22%2C%22display_finance_flag%22%3A%22-%22%2C%22client_ab%22%3A%22-%22%2C%22guid%22%3A%2237c23ea1-b114-4da1-e911-3d6bad733c40%22%2C%22sessionid%22%3A%22f98a3b98-2d8f-4eef-8a51-8ca69394c889%22%7D; cityDomain=gz; _gl_tracker=%7B%22ca_source%22%3A%22-%22%2C%22ca_name%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22ca_id%22%3A%22-%22%2C%22ca_s%22%3A%22self%22%2C%22ca_n%22%3A%22-%22%2C%22ca_i%22%3A%22-%22%2C%22sid%22%3A78275118765%7D; preTime=%7B%22last%22%3A1546005025%2C%22this%22%3A1546004506%2C%22pre%22%3A1546004506%7D"
}
response=requests.get(url,headers=headers)
res=response.text
res=etree.HTML(res)
uls=res.xpath("//ul[@class='carlist clearfix js-top']")[0]
che_urls=[]
che_titles=[]
a_urls=uls.xpath("./li/a/@href")
a_titles=uls.xpath("./li/a/@title")
for a_url in a_urls:
a_url="https://www.guazi.com"+a_url
che_urls.append(a_url)
for che_title in a_titles:
che_titles.append(che_title)
che_dist={
"标题":che_titles,
"链接":che_urls
}
return(che_dist)
3、将含有标题和链接的字典写入CSV文件
def write_csv(che_dist):
pf=pd.DataFrame(che_dist) pf.to_csv("test_save.csv",index=False,header=False,mode='a',sep=',',encoding='utf_8_sig') pf.to_csv("test.csv",index=False,header=False,sep=',',encoding='utf_8_sig')
print("写入标题和链接成功...done")
def read_dsv():
dataurl=pd.read_csv('test.csv',usecols=[1],encoding='utf_8_sig')
url_list=dataurl.values.tolist()
return(url_list)
print("读取链接成功...get")
5、获取详情页的详细信息,返回一个字典信息
def get_info_detail(url):
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
"Cookie":"clueSourceCode=10103000412%2300; uuid=37c23ea1-b114-4da1-e911-3d6bad733c40; antipas=7459330585r34392H8c50Z2Oj2B; ganji_uuid=2229921558379361577342; sessionid=f98a3b98-2d8f-4eef-8a51-8ca69394c889; cainfo=%7B%22ca_s%22%3A%22pz_sogou%22%2C%22ca_n%22%3A%22pz_bt%22%2C%22ca_i%22%3A%22-%22%2C%22ca_medium%22%3A%22-%22%2C%22ca_term%22%3A%22-%22%2C%22ca_content%22%3A%22-%22%2C%22ca_campaign%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22keyword%22%3A%22-%22%2C%22ca_keywordid%22%3A%22-%22%2C%22scode%22%3A%2210103000412%22%2C%22ca_transid%22%3Anull%2C%22platform%22%3A%221%22%2C%22version%22%3A1%2C%22ca_b%22%3A%22-%22%2C%22ca_a%22%3A%22-%22%2C%22display_finance_flag%22%3A%22-%22%2C%22client_ab%22%3A%22-%22%2C%22guid%22%3A%2237c23ea1-b114-4da1-e911-3d6bad733c40%22%2C%22sessionid%22%3A%22f98a3b98-2d8f-4eef-8a51-8ca69394c889%22%7D; cityDomain=gz; _gl_tracker=%7B%22ca_source%22%3A%22-%22%2C%22ca_name%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22ca_id%22%3A%22-%22%2C%22ca_s%22%3A%22self%22%2C%22ca_n%22%3A%22-%22%2C%22ca_i%22%3A%22-%22%2C%22sid%22%3A78275118765%7D; preTime=%7B%22last%22%3A1546005025%2C%22this%22%3A1546004506%2C%22pre%22%3A1546004506%7D"
}
response=requests.get(url,headers=headers)
res=response.text
res=etree.HTML(res)
#获取标题
title1=res.xpath("//h2[@class='titlebox']")[0].text
#获取车源号
cheyuan_number=res.xpath("//div[@class='right-carnumber']")[0].text
#获取上牌时间、公里数,地区、排量、变速类型等信息
all_info=res.xpath("//ul[@class='assort clearfix']")[0]
#获取上牌时间
get_time=all_info.xpath("./li[@class='one']/span")[0].text
#获取表显里程
kilometer=all_info.xpath("./li[@class='two']/span")[0].text
#获取上牌地
get_area=all_info.xpath("./li[@class='three']/span")[0].text
#获取排量
out_liang=all_info.xpath("./li[@class='three']/span")[1].text
#获取变速箱类型
engine_type=all_info.xpath("./li[@class='last']/span")[0].text
#获取报价等信息
price_all=res.xpath("//div[@class='pricebox js-disprice']")[0]
#print(etree.tostring(price_all,encoding='utf-8').decode('utf-8'))
#获取车主报价,需要清洗
owner_price=price_all.xpath("./span[@class='pricestype']")[0]
owner_price=etree.tostring(owner_price,encoding='utf-8').decode('utf-8')
#print(etree.tostring(owner_price,encoding='utf-8').decode('utf-8'))
#获取原价
old_price=price_all.xpath("./span[@class='newcarprice']")[0].text
#获取车辆的基本参数
all_base_info=res.xpath("//div[@class='detailcontent clearfix js-detailcontent active']")[0]
#print(etree.tostring(all_base_info,encoding='utf-8').decode('utf-8'))
#获取证件品牌型号
certificate_brand=all_base_info.xpath(".//tr[2]/td[2]")[0].text
#获取产商信息
producer=all_base_info.xpath(".//tr[3]/td[2]")[0].text
#获取汽车级别
car_type=all_base_info.xpath(".//tr[4]/td[2]")[0].text
#获取发动机信息
car_engine=all_base_info.xpath(".//tr[5]/td[2]")[0].text
#获取变速箱
gearbox=all_base_info.xpath(".//tr[6]/td[2]")[0].text
#获取车身结构
body_structure=all_base_info.xpath(".//tr[7]/td[2]")[0].text
#长*宽*高(mm)信息
long_with_high=all_base_info.xpath(".//tr[8]/td[2]")[0].text
#轴距(mm)
wheelbase=all_base_info.xpath(".//tr[9]/td[2]")[0].text
#行李箱容积(L)
trunk_volume=all_base_info.xpath(".//tr[10]/td[2]")[0].text
#整备质量(kg)
car_weight=all_base_info.xpath(".//tr[11]/td[2]")[0].text
#获取车辆的发动机参数
all_engine_info=res.xpath("//div[@class='detailcontent clearfix js-detailcontent active']/table[2]")[0]
#print(all_engine_info)
#print(etree.tostring(all_engine_info,encoding='utf-8').decode('utf-8'))
#排量(L)
displacement=all_engine_info.xpath(".//tr[2]/td[2]")[0].text
#进气形式
intake_type=all_engine_info.xpath(".//tr[3]/td[2]")[0].text
#气缸
air_cylinder=all_engine_info.xpath(".//tr[4]/td[2]")[0].text
#最大马力(Ps)
maximum_horsepower=all_engine_info.xpath(".//tr[5]/td[2]")[0].text
#最大扭矩(N*m)
maximum_torque=all_engine_info.xpath(".//tr[6]/td[2]")[0].text
#燃料类型
fuel_type=all_engine_info.xpath(".//tr[7]/td[2]")[0].text
#燃油标号
fuel_label=all_engine_info.xpath(".//tr[8]/td[2]")[0].text
#供油方式
oil_supply_way=all_engine_info.xpath(".//tr[9]/td[2]")[0].text
#排放标准
emission_standard=all_engine_info.xpath(".//tr[10]/td[2]")[0].text
#获取车辆的底盘及制动
all_foundation_info=res.xpath("//div[@class='detailcontent clearfix js-detailcontent active']/table[3]")[0]
#print(etree.tostring(all_foundation_info,encoding='utf-8').decode('utf-8'))
#驱动方式
type_of_drive=all_foundation_info.xpath(".//tr[2]/td[2]")[0].text
#助力类型
help_type=all_foundation_info.xpath(".//tr[3]/td[2]")[0].text
#前悬挂类型
font_hang_type=all_foundation_info.xpath(".//tr[4]/td[2]")[0].text
#后悬挂类型
back_hang_type=all_foundation_info.xpath(".//tr[5]/td[2]")[0].text
#前制动类型
font_stop_type=all_foundation_info.xpath(".//tr[6]/td[2]")[0].text
#后制动类型
back_stop_type=all_foundation_info.xpath(".//tr[7]/td[2]")[0].text
#驻车制动类型
type_of_stop=all_foundation_info.xpath(".//tr[8]/td[2]")[0].text
#前轮胎规格
font_wheel_type=all_foundation_info.xpath(".//tr[9]/td[2]")[0].text
#后轮胎规格
back_wheel_type=all_foundation_info.xpath(".//tr[10]/td[2]")[0].text
#获取车辆的安全配置
all_safe_info=res.xpath("//div[@class='detailcontent clearfix js-detailcontent active']/table[4]")[0]
#主副驾驶安全气囊
Main_and_auxiliary_airbag=all_safe_info.xpath(".//tr[2]/td[2]")[0].text
#前后排侧气囊
Front_and_rear_side_airbags=all_safe_info.xpath(".//tr[3]/td[2]")[0].text
#前后排头部气囊
Front_and_rear_head_airbag=all_safe_info.xpath(".//tr[4]/td[2]")[0].text
#胎压检测
Tire_pressure_detection=all_safe_info.xpath(".//tr[5]/td[2]")[0].text
#车内中控锁
Central_locking_in_the_car=all_safe_info.xpath(".//tr[6]/td[2]")[0].text
#儿童座椅接口
Child_seat_interface=all_safe_info.xpath(".//tr[7]/td[2]")[0].text
#无钥匙启动
Keyless_start=all_safe_info.xpath(".//tr[8]/td[2]")[0].text
#防抱死系统(ABS)
Anti_lock_braking_system=all_safe_info.xpath(".//tr[9]/td[2]")[0].text
#车身稳定控制(ESP)
Body_Stability_Control=all_safe_info.xpath(".//tr[10]/td[2]")[0].text
#获取车辆的外部配置
all_outshape_info=res.xpath("//div[@class='detailcontent clearfix js-detailcontent active']/table[5]")[0]
#电动天窗
Electric_sunroof =all_outshape_info.xpath(".//tr[2]/td[2]")[0].text
#全景天窗
Panoramic_skylight=all_outshape_info.xpath(".//tr[3]/td[2]")[0].text
#电动吸合门
Electric_suction_door=all_outshape_info.xpath(".//tr[4]/td[2]")[0].text
#感应后备箱
Induction_trunk=all_outshape_info.xpath(".//tr[5]/td[2]")[0].text
#感应雨刷
Induction_wiper=all_outshape_info.xpath(".//tr[6]/td[2]")[0].text
#后雨刷
Rear_wiper=all_outshape_info.xpath(".//tr[7]/td[2]")[0].text
#前后电动车窗
Front_and_rear_electric_windows=all_outshape_info.xpath(".//tr[8]/td[2]")[0].text
#后视镜电动调节
Rearview_mirror_electric_adjustment=all_outshape_info.xpath(".//tr[9]/td[2]")[0].text
#后视镜加热
Rearview_mirror_heating=all_outshape_info.xpath(".//tr[10]/td[2]")[0].text
#获取车辆的内部配置
all_inshape_info=res.xpath("//div[@class='detailcontent clearfix js-detailcontent active']/table[6]")[0]
#多功能方向盘
Multifunction_steering_wheel=all_inshape_info.xpath(".//tr[2]/td[2]")[0].text
#定速巡航
Fixed_speed_cruise=all_inshape_info.xpath(".//tr[3]/td[2]")[0].text
#空调
air_conditioning=all_inshape_info.xpath(".//tr[4]/td[2]")[0].text
#自动空调
Automatic_air_conditioner=all_inshape_info.xpath(".//tr[5]/td[2]")[0].text
#GPS导航
GPS_navigation=all_inshape_info.xpath(".//tr[6]/td[2]")[0].text
#倒车雷达
Reversing_radar=all_inshape_info.xpath(".//tr[7]/td[2]")[0].text
#倒车影像系统
Reversing_image_system=all_inshape_info.xpath(".//tr[8]/td[2]")[0].text
#真皮座椅
Leather_seat=all_inshape_info.xpath(".//tr[9]/td[2]")[0].text
#前后排座椅加热
Front_and_rear_seat_heating=all_inshape_info.xpath(".//tr[10]/td[2]")[0].text
#获取过户、看车地址等信息
basic_info=res.xpath("//ul[@class='basic-eleven clearfix']")[0]
#获取过户次数过户
guohu=basic_info.xpath("./li[@class='seven']/div")[0].text
#获取看车地点
add_seecar=basic_info.xpath("./li[@class='eight']/div")[0].text
info_all={
"标题":[title1],
"车源号":[cheyuan_number],
"上牌时间":[get_time],
"表显里程":[kilometer],
"上牌地":[get_area],
"排量":[out_liang],
"变速箱类型":[engine_type],
"车主报价":[owner_price],
"原价":[old_price],
#####车辆的基本参数
"证件品牌型号":[certificate_brand],
"产商信息":[producer],
"汽车级别":[car_type],
"发动机信息":[car_engine],
"变速箱":[gearbox],
"车身结构":[body_structure],
"长*宽*高(mm)":[long_with_high],
"轴距(mm)":[wheelbase],
"行李箱容积(L)":[trunk_volume],
"整备质量(kg)":[car_weight],
####获取车辆的发动机参数
"排量(L)":[displacement],
"进气形式":[intake_type],
"气缸":[air_cylinder],
"最大马力(Ps)":[maximum_horsepower],
"最大扭矩(N*m)":[maximum_torque],
"燃料类型":[fuel_type],
"燃油标号":[fuel_label],
"供油方式":[oil_supply_way],
"排放标准":[emission_standard],
#获取车辆的底盘及制动
"驱动方式":[type_of_drive],
"助力类型":[help_type],
"前悬挂类型":[font_hang_type],
"后悬挂类型":[back_hang_type],
"前制动类型":[font_stop_type],
"后制动类型":[back_stop_type],
"驻车制动类型":[type_of_stop],
"前轮胎规格":[font_wheel_type],
"后轮胎规格":[back_wheel_type],
#获取车辆的安全配置
"主副驾驶安全气囊":[Main_and_auxiliary_airbag],
"前后排侧气囊":[Front_and_rear_side_airbags],
"前后排头部气囊":[Front_and_rear_head_airbag],
"胎压检测":[Tire_pressure_detection],
"车内中控锁":[Central_locking_in_the_car],
"儿童座椅接口":[Child_seat_interface],
"无钥匙启动":[Keyless_start],
"防抱死系统(ABS)":[Anti_lock_braking_system],
"车身稳定控制(ESP)":[Body_Stability_Control],
#获取车辆的外部配置
"电动天窗":[Electric_sunroof],
"全景天窗":[Panoramic_skylight],
"电动吸合门":[Electric_suction_door],
"感应后备箱":[Induction_trunk],
"感应雨刷":[Induction_wiper],
"后雨刷":[Rear_wiper],
"前后电动车窗":[Front_and_rear_electric_windows],
"后视镜电动调节":[Rearview_mirror_electric_adjustment],
"后视镜加热":[Rearview_mirror_heating],
#获取车辆的内部配置
"多功能方向盘":[Multifunction_steering_wheel],
"定速巡航":[Fixed_speed_cruise],
"空调":[air_conditioning],
"自动空调":[Automatic_air_conditioner],
"GPS导航":[GPS_navigation],
"倒车雷达":[Reversing_radar],
"倒车影像系统":[Reversing_image_system],
"真皮座椅":[Leather_seat],
"前后排座椅加热":[Front_and_rear_seat_heating],
#获取过户、看车地址等信息
"过户次数":[guohu],
"看车地点":[add_seecar]
}
return(info_all)
6、将获取的车辆信息写入CSV文件中
def write_info(info_all):
pdata=pd.DataFrame(info_all)
pdata.to_csv("info_all.csv",index=False,header=False,sep=',',mode='a',encoding='utf_8_sig')
7、到此,我们所有的代码都完成了,可以开始获取信息了
for page in range(1,223):
try:
url="https://www.guazi.com/www/buy/o"+str(page)+"c-1/#bread"
#获取所有的URL
che_dist=get_url(url)
#将获取的链接写入CSV文件
write_csv(che_dist)
#读取url
url_list=read_dsv()
num=0
for url in url_list:
try:
#获取每个url详情的详细信息
url=url[0]
info_all=get_info_detail(url)
write_info(info_all)
num=num+1
print("正在获取第"+str(page)+"页的数据,写入第"+str(num)+"个车辆详情信息成功!")
time.sleep(4)
except:
continue
else:
continue
time.sleep(3)
print("获取第"+str(page)+"页数据成功,共"+str(num)+"辆车!")
except:
continue
else:
continue
print("done!!!")
代码讲解部分到此结束,这是获取下来的结果
下一篇文章我们即将对结果进行分析,期待和你一起分享分析结果,看看什么牌子的车最保值?什么地方的车最划算?敬请期待
更多信息请关注~
以上是关于数据分析之瓜子二手车车辆信息大全,买卖二手车?别听别人忽悠,自己分析最准确的主要内容,如果未能解决你的问题,请参考以下文章