抓取赶集app数据
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了抓取赶集app数据相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python # -*- coding:utf-8 -*- import json import requests url = "https://app.ganji.com/datashare/" headers = { "Content-Type": "application/x-www-form-urlencoded", "userid": "C1ED10776D9B6108D8FEFEE4EA53058A", "model":"Generic/iphone", "customerid":"705", "clientagent":"iPhone 6S Plus#414*736#11.0.3", "versionid":"8.3.0", "os":"ios", "net":"wifi", "dv":"iPhone 6S Plus", "interface":"SearchPostsByJson3", "accept-language":"zh-cn", } def req(url, headers, data): content = None try: r = requests.post(url, headers=headers, data=data, timeout=5) content = r.json() except Exception as e: print("requests error: ", e, "requests url: ", url) return content def get_ganji_list_data(): # 获取列表数据 data = ‘t=-576747455&&showType=0&showtype=0&jsonArgs={"pageSize":20,"cityScriptIndex":2300,"majorCategoryScriptIndex":7,"queryFilters":[],"categoryId":7,"andKeywords":[{"name":"title","value":"%E5%95%86%E9%93%BA%E5%87%BA%E5%94%AE"}],"customerId":"705","sortKeywords":[{"field":"post_at","sort":"desc"}],"pageIndex":1}‘ ganji_data = req(url, headers, data) if ganji_data is not None: return ganji_data return None def get_article_data(): ganji_data = get_ganji_list_data() if ganji_data is not None: data_list = ganji_data["posts"] print("count: ", ganji_data["total"]) for data_ in data_list: title, d_sign, puid = data_["title"], data_["d_sign"], data_["puid"] print(title, d_sign) data_article = "d_sign={0}&cityId=176&post_type_for_maidian=5&categoryId=7&spfy=0".format(d_sign) # 根据 puid 获取详细信息. puid 需放在headers中 headers["interface"] = "GetPostByPuid" headers["puid"] = puid content_data = req(url, headers, data_article) if content_data["status"] == 0: data = content_data["data"] end_data = {} end_data["price"] = data["price"]["v"] end_data["price_unit"] = data["price"]["u"] end_data["title"] = data["title"] end_data["city"] = data["city"] end_data["description"] = data["description"] end_data["district_name"] = data["district_name"] end_data["street_name"] = data["street_name"] end_data["latlng"] = data["latlng"] end_data["id"] = data["id"] time.sleep(2)
header里东西真多,最终测试 只需要这几种,累死宝宝了,
以上是关于抓取赶集app数据的主要内容,如果未能解决你的问题,请参考以下文章