python 爬取搜房网·新房·数据信息。(Python多线程请求BeautifulSoup)
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 爬取搜房网·新房·数据信息。(Python多线程请求BeautifulSoup)相关的知识,希望对你有一定的参考价值。
#coding=utf8
import sys
__author__ = 'xohozu'
import Queue
import threading
import traceback
from time import sleep
import bs4
import MySQLdb
import requests
from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding('utf8')
LOCK = threading.Lock()
DEBUG = True
def add_city_to_queue(citys, direction='forward', *args):
"""
:param citys: 即将爬取的城市队列
:param direction: 从城市列表提取的方向(前->后,后->前)
:param args: 包含(posts,payloads,next_urls,pages,datas)等队列,当这些队列均为空时提取下一个城市至citys
"""
# 保存城市信息)
links = []
with open('new_house_cities.txt', 'r') as f:
for line in f.readlines():
if not line.strip():
continue
line = line.strip().split(' ')
pos = line[1].find('/house')
links.append((line[0].decode('utf8'), line[1][:pos], line[1][pos:]))
if not links:
print u'没有任何城市列表信息。'
return
if direction == 'forward':
pass
elif direction == 'backward':
links.reverse()
while True:
for queue in args:
if not queue.empty():
break
else:
# 每次提取1个城市
for _ in range(1):
link = links.pop(0)
citys.put(link)
links.append(link)
sleep(5)
class SqlExecute(threading.Thread):
"""
将datas队列中的数据添加至数据库
若已经存在,则无需添加.(为何有重复数据,暂时不明真相)
"""
def __init__(self, datas, con):
threading.Thread.__init__(self)
self.datas = datas
self.con = con
def run(self):
while True:
try:
cur = self.con.cursor()
break
except:
global LOCK
LOCK.acquire()
traceback.print_exc(file=open('log/failed_sql_exec.txt', 'a'))
LOCK.release()
while True:
data = self.datas.get()
select = "select * from xf " \
"where city = '%s' AND district = '%s' AND purpose = '%s' AND railway = '%s' " \
"AND q_price = '%s' AND unit = '%s' AND name = '%s' AND link = '%s' AND state = '%s' " \
"AND tag = '%s' AND addr = '%s' AND price = '%s';" % data
insert = "insert into xf(city, district, purpose, railway, q_price, " \
"unit, name, link, state, tag, addr, price) " \
"values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % data
LOCK.acquire()
try:
if cur.execute(select):
with open('log/failed_record_exists.txt', 'a') as f:
for d in data:
if d:
print >> f, d.encode('utf8'),
print >> f
else:
cur.execute(insert)
self.con.commit()
except:
self.con.rollback()
traceback.print_exc(file=open('log/failed_sql_exec.txt', 'a'))
with open('log/failed_sql_exec.txt', 'a') as f:
print >> f, data, '\n'
LOCK.release()
self.datas.task_done()
class PageParse(threading.Thread):
"""
对含有房源信息的网页进行解析,并存数据库;
探测是否包含‘下一页’标签,加入next_urls队列
"""
def __init__(self, pages, next_urls, payloads, datas):
threading.Thread.__init__(self)
self.pages = pages
self.next_urls = next_urls
self.payloads = payloads
self.datas = datas
def run(self):
while True:
# if self.datas.qsize() > 500:
# sleep(10)
page = self.pages.get()
if not self.extract(page):
global LOCK
LOCK.acquire()
with open('log/failed_get_pages_list.txt', 'a') as f:
del page['html']
print >> f, page
LOCK.release()
else:
global DEBUG
if DEBUG:
LOCK.acquire()
print self.name, self.__class__
print page['strCity'], page['strDistrict'], page['strPurpose'], page['Railway'], page['strPrice']
print page['host'] + page['url']
print u'完成此页面的数据解析。'
print
LOCK.release()
self.pages.task_done()
def extract(self, page):
"""
抽取网页房源信息列表
:param page:
:return: bool
"""
try:
soup = BeautifulSoup(page['html'], 'lxml', from_encoding='gb18030')
# 存在相符的房源
if not soup.find(attrs={'class': 'searchNoInfo'}):
# 查找所需房源信息列表
foods = soup.find_all(attrs={'class': 'sslalone'})
flag = True
if foods:
for food in foods:
flag = self.store_details(food, page) and flag
# 查找'下一页'div元素
url = soup.find(attrs={'class': 'pagearrowright'})
if url:
next_url = page.copy()
del next_url['html']
next_url['url'] = url.a.get('href')
self.next_urls.put(next_url)
return flag
else:
# 该网页没有房源信息
# 不尝试再次请求,直接返回
return False
except:
global LOCK
LOCK.acquire()
traceback.print_exc(file=open('log/failed_get_pages_list.txt', 'a'))
with open('log/failed_get_pages_list.txt', 'a') as f:
print >> f, page['host'], page['url'], '\n'
LOCK.release()
return False
def store_details(self, food, keywords):
"""
解析某一房源网页数据,并保存
:param food:
:param keywords:
"""
try:
sslainfor = food.find(attrs={'class': 'sslainfor'})
li = sslainfor.find_all('li')
name = li[0].strong.string
href = li[0].strong.a.get('href')
tag = li[0].span.string
state = ''
if li[0].img:
# 检查图标类型
if li[0].img.get('src').find('dotzs') != -1:
state = u'在售'
elif li[0].img.get('src').find('dotds') != -1:
state = u'待售'
elif li[0].img.get('src').find('dotzus') != -1:
state = u'租售'
elif li[0].img.get('src').find('dot10') != -1:
state = u'售完'
elif li[0].img.get('src').find('dotzz') != -1:
state = u'在租'
elif li[0].img.get('src').find('zazu-new') != -1:
state = u'在租*新'
elif li[0].img.get('src').find('zashow-new') != -1:
state = u'租售*新'
else:
state = li[0].img.get('src')
# 保存未知的图标信息
with open('log/tubiao.txt', 'a') as f:
print >> f, li[0].img.get('src')
if li[1].font:
addr = li[1].font.get('title')
else:
addr = li[1].span.text + li[1].contents[1]
sslaright = food.find(attrs={'class': 'sslaright'})
price = ''
if sslaright:
# 判断该条目是否含有价格信息
price = sslaright.find(attrs={'class': 'junjia'})
if price:
price = price.text.strip()
self.datas.put((keywords['strCity'], keywords['strDistrict'], keywords['strPurpose'], keywords['Railway'],
keywords['strPrice'], keywords['unit'], name, href, state, tag, addr, price))
return True
except:
global LOCK
LOCK.acquire()
traceback.print_exc(file=open('log/failed_get_pages_list.txt', 'a'))
with open('log/failed_get_pages_list.txt', 'a') as f:
print >> f, self.store_details.__name__, 'failed\n'
LOCK.release()
return False
class PageGet(threading.Thread):
"""
请求从'下一页'标签提取的链接,获取html文本
"""
def __init__(self, pages, next_urls, proxies):
threading.Thread.__init__(self)
self.pages = pages
self.next_urls = next_urls
self.proxies = proxies
def run(self):
while True:
next_url = self.next_urls.get()
global DEBUG
if DEBUG:
global LOCK
LOCK.acquire()
print self.name, self.__class__
print next_url['strCity'], next_url['strDistrict'], next_url['strPurpose'], next_url['Railway'], \
next_url['strPrice']
print next_url['host'] + next_url['url']
print u'正在请求此链接获取网页内容...'
print
LOCK.release()
try:
r = requests.get(next_url['host'] + next_url['url'], proxies=self.proxies)
sleep(1)
page = next_url.copy()
page['html'] = r.content
self.pages.put(page)
except:
LOCK.acquire()
print next_url['host'] + next_url['url'], u'请求失败!'
traceback.print_exc(file=open('log/failed_get_next_pages_list.txt', 'a'))
with open('log/failed_get_next_pages_list.txt', 'a') as f:
print >> f, next_url
LOCK.release()
self.next_urls.task_done()
class ParametersPost(threading.Thread):
"""
POST (区域,类型, 价格)或(地铁,类型, 价格)参数,获取相应的房源数据
"""
def __init__(self, payloads, pages, proxies):
threading.Thread.__init__(self)
self.payloads = payloads
self.pages = pages
self.proxies = proxies
def run(self):
while True:
# if self.pages.qsize() > 200:
# sleep(10)
post_data = self.payloads.get()
global DEBUG
if DEBUG:
global LOCK
LOCK.acquire()
print self.name, self.__class__
print post_data['strCity'], post_data['strDistrict'], post_data['strPurpose'], post_data['Railway'], \
post_data['strPrice']
print post_data['host'] + post_data['url']
print u'正在POST此参数'
print
LOCK.release()
try:
data = {
'strCity': post_data['strCity'].encode('gb18030'),
'strDistrict': post_data['strDistrict'].encode('gb18030'),
'strPrice': post_data['strPrice'].encode('gb18030'),
'Railway': post_data['Railway'].encode('gb18030'),
'strPurpose': post_data['strPurpose'].encode('gb18030'),
'show_type': post_data['show_type'],
}
r = requests.post(post_data['host'] + post_data['url'], data=data, proxies=self.proxies)
sleep(1)
page = post_data.copy()
page['html'] = r.content
self.pages.put(page)
except:
LOCK.acquire()
print post_data['strCity'], post_data['strDistrict'], post_data['strPrice'], post_data['strPurpose'], \
post_data['Railway'], u'POST此参数失败!'
traceback.print_exc(file=open('log/failed_parameters_post_list.txt', 'a'))
with open('log/failed_parameters_post_list.txt', 'a') as f:
print >> f, post_data['strCity'].encode('utf8'), post_data['strDistrict'].encode('utf8'), \
post_data['strPrice'].encode('utf8'), post_data['strPurpose'].encode('utf8'), \
post_data['Railway'].encode('utf8')
LOCK.release()
self.payloads.task_done()
class PostParametersGenerate(threading.Thread):
"""
POST (区域,类型)或(地铁,类型)参数,获取相应的价格参数数据
"""
def __init__(self, posts, payloads, proxies):
threading.Thread.__init__(self)
self.posts = posts
self.payloads = payloads
self.proxies = proxies
def run(self):
while True:
# if self.payloads.qsize() > 100:
# sleep(10)
post_data = self.posts.get()
global DEBUG
if DEBUG:
global LOCK
LOCK.acquire()
print self.name, self.__class__
print post_data['strCity'], post_data['strDistrict'], post_data['strPurpose'], post_data['Railway']
print post_data['host'] + post_data['url']
print u'正在POST获取strPrice'
print
LOCK.release()
try:
data = {
'strCity': post_data['strCity'].encode('gb18030'),
'strDistrict': post_data['strDistrict'].encode('gb18030'),
'Railway': post_data['Railway'].encode('gb18030'),
'strPurpose': post_data['strPurpose'].encode('gb18030'),
'show_type': post_data['show_type'],
}
r = requests.post(post_data['host'] + post_data['url'], data=data, proxies=self.proxies)
sleep(1)
soup = BeautifulSoup(r.content, 'lxml', from_encoding='gb18030')
# 判断未包含价格参数时是否有房源信息
if soup.find(attrs={'class': 'searchNoInfo'}):
self.posts.task_done()
continue
s3 = soup.find(id="sjina_B01_07").find(attrs={'class': 's3'})
price_flag = False
for c in s3.contents[::-1]:
if isinstance(c, bs4.element.Tag):
if c.name == 'span':
unit = c.string
elif c.name == 'a':
payload = post_data.copy()
payload['strPrice'] = c.string
payload['unit'] = unit
self.payloads.put(payload)
# 标记含有价格参数
price_flag = True
# 没有价格信息
if not price_flag:
payload = post_data.copy()
payload['strPrice'] = ''
payload['unit'] = ''
self.payloads.put(payload)
except:
LOCK.acquire()
print post_data['host'] + post_data['url'], u'获取strPrice参数失败!'
traceback.print_exc(file=open('log/failed_generate_strPrice_list.txt', 'a'))
with open('log/failed_generate_strPrice_list.txt', 'a') as f:
print >> f, post_data['strCity'].encode('utf8'), post_data['strDistrict'].encode('utf8'), \
post_data['strPurpose'].encode('utf8'), post_data['Railway'].encode('utf8')
LOCK.release()
self.posts.task_done()
class PostDataGenerate(threading.Thread):
"""
生成城市的区域,类型,地铁参数等post数据
"""
def __init__(self, citys, posts, proxies):
threading.Thread.__init__(self)
self.citys = citys
self.posts = posts
self.proxies = proxies
def run(self):
while True:
city, host, url = self.citys.get()
params = self.get_post_params(host + url)
if not params:
global LOCK
LOCK.acquire()
print city, host, url, u'获取区域,类型,地铁参数失败!'
LOCK.release()
with open('log/failed_city_list.txt', 'a') as f:
print >> f, city.encode('utf8'), url.encode('utf8')
else:
global DEBUG
if DEBUG:
LOCK.acquire()
print self.name, self.__class__
print city, host + url
print u'区域,类型,地铁 参数生成'
print
LOCK.release()
for district in params[0]:
for purpose in params[1]:
self.posts.put({
'strCity': city,
'strDistrict': district,
'Railway': '',
'strPurpose': purpose,
'show_type': '',
'url': url,
'host': host
})
for railway in params[2]:
for purpose in params[1]:
self.posts.put({
'strCity': city,
'strDistrict': '',
'Railway': railway,
'strPurpose': purpose,
'show_type': 'rail',
'url': url,
'host': host
})
self.citys.task_done()
def get_post_params(self, url):
"""
获取post请求需要的参数
:param url:
:return: 返回区域,类型,地铁三元组 | False
"""
def to_name(s):
"""
删除 xxx(xx) 格式字符串的带括号部分
:param s:
:return:
"""
pos = s.find('(')
if pos != -1:
return s[:pos]
else:
pos = s.find(u'(')
if pos != -1:
return s[:pos]
else:
return s
try:
r = requests.get(url, proxies=self.proxies)
sleep(1)
sp = BeautifulSoup(r.content, 'lxml', from_encoding='gb18030')
purpose_div = sp.find(id="sjina_B01_06")
if purpose_div:
purpose = [to_name(a.string.strip())
for a in purpose_div.find(attrs={'class': 's3'}).find_all('a')]
district_div = sp.find(id="sjina_B01_05")
if district_div:
district = [to_name(a.string.strip())
for a in district_div.find(attrs={'class': 's3'}).find_all('a')]
railway_div = sp.find(id="sjina_B01_09")
if railway_div:
railway = [to_name(a.string.strip())
for a in railway_div.find(attrs={'class': 's3'}).find_all('a')]
if district_div and railway_div:
return district, purpose, railway
elif district_div:
return district, purpose, []
else:
return False
else:
global LOCK
LOCK.acquire()
with open('log/failed_get_post_params.txt', 'a') as f:
print >> f, url
print >> f, sp.encode('utf8')
LOCK.release()
except:
LOCK.acquire()
traceback.print_exc(file=open('log/failed_get_post_params.txt', 'a'))
LOCK.release()
return False
class PrintThread(threading.Thread):
def __init__(self, citys, posts, payloads, next_urls, pages, datas):
threading.Thread.__init__(self)
self.citys = citys
self.posts = posts
self.payloads = payloads
self.next_urls = next_urls
self.pages = pages
self.datas = datas
def run(self):
while True:
global LOCK
LOCK.acquire()
print
print 'city_queue ', self.citys.qsize()
print 'post_queue ', self.posts.qsize()
print 'payload_queue', self.payloads.qsize()
print 'next_urls ', self.next_urls.qsize()
print 'pages ', self.pages.qsize()
print 'datas ', self.datas.qsize()
print
# for t in sorted(threading.enumerate()):
# print t
# print
LOCK.release()
sleep(5)
def create_threads(class_name, num, *args):
if num > 0:
for i in range(num):
t = class_name(*args)
t.setDaemon(True)
t.start()
import proxy
def main():
# 初始化队列
# citys -> posts
# posts -> payloads
# payloads -> pages
# next_urls -> pages
# pages -> datas
# pages -> next_urls
citys = Queue.Queue()
posts = Queue.Queue()
payloads = Queue.Queue()
next_urls = Queue.Queue()
pages = Queue.Queue(1000)
datas = Queue.Queue(5000)
con = MySQLdb.connect(host='localhost', user='root', passwd='root', db='soufun',
charset='utf8')
proxies = None
proxieslist = {
'http': []
}
create_threads(PostDataGenerate, 1, citys, posts, proxies)
create_threads(PostParametersGenerate, 1, posts, payloads, proxies)
create_threads(ParametersPost, 10, payloads, pages, proxies)
create_threads(PageGet, 10, pages, next_urls, proxies)
create_threads(PageParse, 15, pages, next_urls, payloads, datas)
create_threads(SqlExecute, 20, datas, con)
create_threads(PrintThread, 1, citys, posts, payloads, next_urls, pages, datas)
# 代理更新
proxy.proxy(proxies, proxieslist)
if len(sys.argv) > 2:
print u'运行参数不正确.'
return
add_city_to_queue(citys, sys.argv[1], posts, payloads, next_urls, pages, datas)
if __name__ == '__main__':
main()
# new_house_cities.txt
北京 http://soufun.com/house/web/Search_Result.php
天津 http://newhouse.tj.soufun.com/house/web/Search_Result.php
重庆 http://newhouse.cq.soufun.com/house/web/Search_Result.php
合肥 http://newhouse.hf.soufun.com/house/web/Search_Result.php
芜湖 http://newhouse.wuhu.soufun.com/house/web/Search_Result.php
淮南 http://newhouse.huainan.soufun.com/house/web/Search_Result.php
蚌埠 http://newhouse.bengbu.soufun.com/house/web/Search_Result.php
阜阳 http://newhouse.fuyang.soufun.com/house/web/Search_Result.php
安庆 http://newhouse.anqing.soufun.com/house/web/Search_Result.php
巢湖 http://newhouse.chaohu.soufun.com/house/web/Search_Result.php
滁州 http://newhouse.chuzhou.soufun.com/house/web/Search_Result.php
六安 http://newhouse.luan.soufun.com/house/web/Search_Result.php
铜陵 http://newhouse.tongling.soufun.com/house/web/Search_Result.php
淮北 http://newhouse.huaibei.soufun.com/house/web/Search_Result.php
宣城 http://newhouse.xuancheng.soufun.com/house/web/Search_Result.php
黄山 http://newhouse.huangshan.soufun.com/house/web/Search_Result.php
宿州 http://newhouse.ahsuzhou.soufun.com/house/web/Search_Result.php
亳州 http://newhouse.bozhou.soufun.com/house/web/Search_Result.php
马鞍山 http://newhouse.mas.soufun.com/house/web/Search_Result.php
池州 http://newhouse.chizhou.soufun.com/house/web/Search_Result.php
万州 http://newhouse.wanzhou.soufun.com/house/web/Search_Result.php
涪陵 http://newhouse.fuling.soufun.com/house/web/Search_Result.php
綦江 http://newhouse.qijiang.soufun.com/house/web/Search_Result.php
黔江 http://newhouse.qianjiang.soufun.com/house/web/Search_Result.php
合川 http://newhouse.hechuan.soufun.com/house/web/Search_Result.php
永川 http://newhouse.yongchuan.soufun.com/house/web/Search_Result.php
福州 http://newhouse.fz.soufun.com/house/web/Search_Result.php
厦门 http://newhouse.xm.soufun.com/house/web/Search_Result.php
泉州 http://newhouse.qz.soufun.com/house/web/Search_Result.php
莆田 http://newhouse.putian.soufun.com/house/web/Search_Result.php
漳州 http://newhouse.zhangzhou.soufun.com/house/web/Search_Result.php
龙岩 http://newhouse.longyan.soufun.com/house/web/Search_Result.php
三明 http://newhouse.sanming.soufun.com/house/web/Search_Result.php
南平 http://newhouse.nanping.soufun.com/house/web/Search_Result.php
宁德 http://newhouse.ningde.soufun.com/house/web/Search_Result.php
平潭 http://newhouse.pingtan.soufun.com/house/web/Search_Result.php
广州 http://newhouse.gz.soufun.com/house/web/Search_Result.php
深圳 http://newhouse.sz.soufun.com/house/web/Search_Result.php
东莞 http://newhouse.dg.soufun.com/house/web/Search_Result.php
珠海 http://newhouse.zh.soufun.com/house/web/Search_Result.php
中山 http://newhouse.zs.soufun.com/house/web/Search_Result.php
佛山 http://newhouse.fs.soufun.com/house/web/Search_Result.php
惠州 http://newhouse.huizhou.soufun.com/house/web/Search_Result.php
江门 http://newhouse.jm.soufun.com/house/web/Search_Result.php
汕头 http://newhouse.st.soufun.com/house/web/Search_Result.php
清远 http://newhouse.qingyuan.soufun.com/house/web/Search_Result.php
肇庆 http://newhouse.zhaoqing.soufun.com/house/web/Search_Result.php
阳江 http://newhouse.yangjiang.soufun.com/house/web/Search_Result.php
茂名 http://newhouse.maoming.soufun.com/house/web/Search_Result.php
湛江 http://newhouse.zj.soufun.com/house/web/Search_Result.php
梅州 http://newhouse.meizhou.soufun.com/house/web/Search_Result.php
揭阳 http://newhouse.jieyang.soufun.com/house/web/Search_Result.php
河源 http://newhouse.heyuan.soufun.com/house/web/Search_Result.php
云浮 http://newhouse.yunfu.soufun.com/house/web/Search_Result.php
潮州 http://newhouse.chaozhou.soufun.com/house/web/Search_Result.php
韶关 http://newhouse.shaoguan.soufun.com/house/web/Search_Result.php
南宁 http://newhouse.nn.soufun.com/house/web/Search_Result.php
柳州 http://newhouse.liuzhou.soufun.com/house/web/Search_Result.php
桂林 http://newhouse.guilin.soufun.com/house/web/Search_Result.php
贵港 http://newhouse.guigang.soufun.com/house/web/Search_Result.php
北海 http://newhouse.bh.soufun.com/house/web/Search_Result.php
百色 http://newhouse.baise.soufun.com/house/web/Search_Result.php
梧州 http://newhouse.wuzhou.soufun.com/house/web/Search_Result.php
钦州 http://newhouse.qinzhou.soufun.com/house/web/Search_Result.php
防城港 http://newhouse.fangchenggang.soufun.com/house/web/Search_Result.php
来宾 http://newhouse.laibin.soufun.com/house/web/Search_Result.php
崇左 http://newhouse.chongzuo.soufun.com/house/web/Search_Result.php
贵阳 http://newhouse.gy.soufun.com/house/web/Search_Result.php
遵义 http://newhouse.zunyi.soufun.com/house/web/Search_Result.php
黔东南 http://newhouse.qiandongnan.soufun.com/house/web/Search_Result.php
安顺 http://newhouse.anshun.soufun.com/house/web/Search_Result.php
毕节 http://newhouse.bijie.soufun.com/house/web/Search_Result.php
六盘水 http://newhouse.liupanshui.soufun.com/house/web/Search_Result.php
黔西南 http://newhouse.qianxinan.soufun.com/house/web/Search_Result.php
兰州 http://newhouse.lz.soufun.com/house/web/Search_Result.php
天水 http://newhouse.tianshui.soufun.com/house/web/Search_Result.php
海口 http://newhouse.hn.soufun.com/house/web/Search_Result.php
三亚 http://newhouse.sanya.soufun.com/house/web/Search_Result.php
郑州 http://newhouse.zz.soufun.com/house/web/Search_Result.php
洛阳 http://newhouse.ly.soufun.com/house/web/Search_Result.php
焦作 http://newhouse.jiaozuo.soufun.com/house/web/Search_Result.php
平顶山 http://newhouse.pingdingshan.soufun.com/house/web/Search_Result.php
驻马店 http://newhouse.zhumadian.soufun.com/house/web/Search_Result.php
新乡 http://newhouse.xx.soufun.com/house/web/Search_Result.php
南阳 http://newhouse.nanyang.soufun.com/house/web/Search_Result.php
信阳 http://newhouse.xinyang.soufun.com/house/web/Search_Result.php
开封 http://newhouse.kaifeng.soufun.com/house/web/Search_Result.php
漯河 http://newhouse.luohe.soufun.com/house/web/Search_Result.php
安阳 http://newhouse.anyang.soufun.com/house/web/Search_Result.php
商丘 http://newhouse.shangqiu.soufun.com/house/web/Search_Result.php
周口 http://newhouse.zhoukou.soufun.com/house/web/Search_Result.php
许昌 http://newhouse.xuchang.soufun.com/house/web/Search_Result.php
濮阳 http://newhouse.puyang.soufun.com/house/web/Search_Result.php
鹤壁 http://newhouse.hebi.soufun.com/house/web/Search_Result.php
哈尔滨 http://newhouse.hrb.soufun.com/house/web/Search_Result.php
大庆 http://newhouse.daqing.soufun.com/house/web/Search_Result.php
齐齐哈尔 http://newhouse.qiqihaer.soufun.com/house/web/Search_Result.php
佳木斯 http://newhouse.jiamusi.soufun.com/house/web/Search_Result.php
牡丹江 http://newhouse.mudanjiang.soufun.com/house/web/Search_Result.php
武汉 http://newhouse.wuhan.soufun.com/house/web/Search_Result.php
宜昌 http://newhouse.yc.soufun.com/house/web/Search_Result.php
襄阳 http://newhouse.xiangyang.soufun.com/house/web/Search_Result.php
荆州 http://newhouse.jingzhou.soufun.com/house/web/Search_Result.php
十堰 http://newhouse.shiyan.soufun.com/house/web/Search_Result.php
黄冈 http://newhouse.huanggang.soufun.com/house/web/Search_Result.php
荆门 http://newhouse.jingmen.soufun.com/house/web/Search_Result.php
随州 http://newhouse.suizhou.soufun.com/house/web/Search_Result.php
恩施 http://newhouse.enshi.soufun.com/house/web/Search_Result.php
孝感 http://newhouse.xiaogan.soufun.com/house/web/Search_Result.php
黄石 http://newhouse.huangshi.soufun.com/house/web/Search_Result.php
鄂州 http://newhouse.ezhou.soufun.com/house/web/Search_Result.php
咸宁 http://newhouse.xianning.soufun.com/house/web/Search_Result.php
仙桃 http://newhouse.xiantao.soufun.com/house/web/Search_Result.php
长沙 http://newhouse.cs.soufun.com/house/web/Search_Result.php
株洲 http://newhouse.zhuzhou.soufun.com/house/web/Search_Result.php
衡阳 http://newhouse.hengyang.soufun.com/house/web/Search_Result.php
岳阳 http://newhouse.yueyang.soufun.com/house/web/Search_Result.php
湘潭 http://newhouse.xt.soufun.com/house/web/Search_Result.php
常德 http://newhouse.changde.soufun.com/house/web/Search_Result.php
益阳 http://newhouse.yiyang.soufun.com/house/web/Search_Result.php
永州 http://newhouse.yongzhou.soufun.com/house/web/Search_Result.php
娄底 http://newhouse.loudi.soufun.com/house/web/Search_Result.php
邵阳 http://newhouse.shaoyang.soufun.com/house/web/Search_Result.php
怀化 http://newhouse.huaihua.soufun.com/house/web/Search_Result.php
郴州 http://newhouse.chenzhou.soufun.com/house/web/Search_Result.php
张家界 http://newhouse.zhangjiajie.soufun.com/house/web/Search_Result.php
石家庄 http://newhouse.sjz.soufun.com/house/web/Search_Result.php
唐山 http://newhouse.ts.soufun.com/house/web/Search_Result.php
保定 http://newhouse.bd.soufun.com/house/web/Search_Result.php
沧州 http://newhouse.cangzhou.soufun.com/house/web/Search_Result.php
廊坊 http://newhouse.lf.soufun.com/house/web/Search_Result.php
邢台 http://newhouse.xingtai.soufun.com/house/web/Search_Result.php
邯郸 http://newhouse.hd.soufun.com/house/web/Search_Result.php
秦皇岛 http://newhouse.qhd.soufun.com/house/web/Search_Result.php
衡水 http://newhouse.hs.soufun.com/house/web/Search_Result.php
张家口 http://newhouse.zhangjiakou.soufun.com/house/web/Search_Result.php
承德 http://newhouse.chengde.soufun.com/house/web/Search_Result.php
固安 http://newhouse.guan.soufun.com/house/web/Search_Result.php
燕郊 http://newhouse.yanjiao.soufun.com/house/web/Search_Result.php
涿州 http://newhouse.zhuozhou.soufun.com/house/web/Search_Result.php
香河 http://newhouse.xianghe.soufun.com/house/web/Search_Result.php
南京 http://nanjing.soufun.com/house/web/Search_Result.php
苏州 http://newhouse.suzhou.soufun.com/house/web/Search_Result.php
无锡 http://newhouse.wuxi.soufun.com/house/web/Search_Result.php
徐州 http://newhouse.xz.soufun.com/house/web/Search_Result.php
常州 http://newhouse.cz.soufun.com/house/web/Search_Result.php
淮安 http://newhouse.huaian.soufun.com/house/web/Search_Result.php
扬州 http://newhouse.yz.soufun.com/house/web/Search_Result.php
南通 http://newhouse.nt.soufun.com/house/web/Search_Result.php
盐城 http://newhouse.yancheng.soufun.com/house/web/Search_Result.php
连云港 http://newhouse.lyg.soufun.com/house/web/Search_Result.php
宿迁 http://newhouse.sq.soufun.com/house/web/Search_Result.php
泰州 http://newhouse.taizhou.soufun.com/house/web/Search_Result.php
镇江 http://newhouse.zhenjiang.soufun.com/house/web/Search_Result.php
常熟 http://newhouse.changshu.soufun.com/house/web/Search_Result.php
张家港 http://newhouse.zjg.soufun.com/house/web/Search_Result.php
昆山 http://newhouse.ks.soufun.com/house/web/Search_Result.php
吴江 http://newhouse.wj.soufun.com/house/web/Search_Result.php
太仓 http://newhouse.tc.soufun.com/house/web/Search_Result.php
江阴 http://newhouse.jy.soufun.com/house/web/Search_Result.php
宜兴 http://newhouse.yixing.soufun.com/house/web/Search_Result.php
南昌 http://newhouse.nc.soufun.com/house/web/Search_Result.php
赣州 http://newhouse.ganzhou.soufun.com/house/web/Search_Result.php
九江 http://newhouse.jiujiang.soufun.com/house/web/Search_Result.php
吉安 http://newhouse.jian.soufun.com/house/web/Search_Result.php
上饶 http://newhouse.shangrao.soufun.com/house/web/Search_Result.php
萍乡 http://newhouse.pingxiang.soufun.com/house/web/Search_Result.php
宜春 http://newhouse.yichun.soufun.com/house/web/Search_Result.php
景德镇 http://newhouse.jingdezhen.soufun.com/house/web/Search_Result.php
抚州 http://newhouse.jxfuzhou.soufun.com/house/web/Search_Result.php
新余 http://newhouse.xinyu.soufun.com/house/web/Search_Result.php
长春 http://newhouse.changchun.soufun.com/house/web/Search_Result.php
吉林 http://newhouse.jl.soufun.com/house/web/Search_Result.php
沈阳 http://newhouse.sy.soufun.com/house/web/Search_Result.php
大连 http://newhouse.dl.soufun.com/house/web/Search_Result.php
抚顺 http://newhouse.fushun.soufun.com/house/web/Search_Result.php
锦州 http://newhouse.jinzhou.soufun.com/house/web/Search_Result.php
鞍山 http://newhouse.anshan.soufun.com/house/web/Search_Result.php
葫芦岛 http://newhouse.huludao.soufun.com/house/web/Search_Result.php
阜新 http://newhouse.fuxin.soufun.com/house/web/Search_Result.php
营口 http://newhouse.yk.soufun.com/house/web/Search_Result.php
本溪 http://newhouse.benxi.soufun.com/house/web/Search_Result.php
朝阳 http://newhouse.chaoyang.soufun.com/house/web/Search_Result.php
辽阳 http://newhouse.liaoyang.soufun.com/house/web/Search_Result.php
盘锦 http://newhouse.panjin.soufun.com/house/web/Search_Result.php
丹东 http://newhouse.dandong.soufun.com/house/web/Search_Result.php
银川 http://newhouse.yinchuan.soufun.com/house/web/Search_Result.php
呼和浩特 http://newhouse.nm.soufun.com/house/web/Search_Result.php
包头 http://newhouse.bt.soufun.com/house/web/Search_Result.php
赤峰 http://newhouse.chifeng.soufun.com/house/web/Search_Result.php
鄂尔多斯 http://newhouse.erds.soufun.com/house/web/Search_Result.php
通辽 http://newhouse.tl.soufun.com/house/web/Search_Result.php
乌兰察布 http://newhouse.wlcb.soufun.com/house/web/Search_Result.php
巴彦淖尔 http://newhouse.byne.soufun.com/house/web/Search_Result.php
西宁 http://newhouse.xn.soufun.com/house/web/Search_Result.php
济南 http://newhouse.jn.soufun.com/house/web/Search_Result.php
青岛 http://newhouse.qd.soufun.com/house/web/Search_Result.php
淄博 http://newhouse.zb.soufun.com/house/web/Search_Result.php
潍坊 http://newhouse.wf.soufun.com/house/web/Search_Result.php
烟台 http://newhouse.yt.soufun.com/house/web/Search_Result.php
临沂 http://newhouse.linyi.soufun.com/house/web/Search_Result.php
济宁 http://newhouse.jining.soufun.com/house/web/Search_Result.php
滨州 http://newhouse.binzhou.soufun.com/house/web/Search_Result.php
德州 http://newhouse.dz.soufun.com/house/web/Search_Result.php
东营 http://newhouse.dy.soufun.com/house/web/Search_Result.php
枣庄 http://newhouse.zaozhuang.soufun.com/house/web/Search_Result.php
聊城 http://newhouse.lc.soufun.com/house/web/Search_Result.php
泰安 http://newhouse.taian.soufun.com/house/web/Search_Result.php
日照 http://newhouse.rz.soufun.com/house/web/Search_Result.php
菏泽 http://newhouse.heze.soufun.com/house/web/Search_Result.php
威海 http://newhouse.weihai.soufun.com/house/web/Search_Result.php
莱芜 http://newhouse.laiwu.soufun.com/house/web/Search_Result.php
太原 http://newhouse.taiyuan.soufun.com/house/web/Search_Result.php
长治 http://newhouse.changzhi.soufun.com/house/web/Search_Result.php
运城 http://newhouse.yuncheng.soufun.com/house/web/Search_Result.php
临汾 http://newhouse.linfen.soufun.com/house/web/Search_Result.php
晋中 http://newhouse.jinzhong.soufun.com/house/web/Search_Result.php
大同 http://newhouse.datong.soufun.com/house/web/Search_Result.php
忻州 http://newhouse.xinzhou.soufun.com/house/web/Search_Result.php
吕梁 http://newhouse.lvliang.soufun.com/house/web/Search_Result.php
朔州 http://newhouse.shuozhou.soufun.com/house/web/Search_Result.php
阳泉 http://newhouse.yangquan.soufun.com/house/web/Search_Result.php
晋城 http://newhouse.jc.soufun.com/house/web/Search_Result.php
西安 http://newhouse.xian.soufun.com/house/web/Search_Result.php
宝鸡 http://newhouse.baoji.soufun.com/house/web/Search_Result.php
咸阳 http://newhouse.xianyang.soufun.com/house/web/Search_Result.php
渭南 http://newhouse.weinan.soufun.com/house/web/Search_Result.php
汉中 http://newhouse.hanzhong.soufun.com/house/web/Search_Result.php
安康 http://newhouse.ankang.soufun.com/house/web/Search_Result.php
榆林 http://newhouse.sxyulin.soufun.com/house/web/Search_Result.php
延安 http://newhouse.yanan.soufun.com/house/web/Search_Result.php
铜川 http://newhouse.tongchuan.soufun.com/house/web/Search_Result.php
成都 http://newhouse.cd.soufun.com/house/web/Search_Result.php
绵阳 http://newhouse.mianyang.soufun.com/house/web/Search_Result.php
泸州 http://newhouse.luzhou.soufun.com/house/web/Search_Result.php
南充 http://newhouse.nanchong.soufun.com/house/web/Search_Result.php
德阳 http://newhouse.deyang.soufun.com/house/web/Search_Result.php
乐山 http://newhouse.leshan.soufun.com/house/web/Search_Result.php
达州 http://newhouse.dazhou.soufun.com/house/web/Search_Result.php
广安 http://newhouse.guangan.soufun.com/house/web/Search_Result.php
攀枝花 http://newhouse.panzhihua.soufun.com/house/web/Search_Result.php
遂宁 http://newhouse.suining.soufun.com/house/web/Search_Result.php
自贡 http://newhouse.zigong.soufun.com/house/web/Search_Result.php
内江 http://newhouse.neijiang.soufun.com/house/web/Search_Result.php
巴中 http://newhouse.bazhong.soufun.com/house/web/Search_Result.php
宜宾 http://newhouse.yibin.soufun.com/house/web/Search_Result.php
眉山 http://newhouse.meishan.soufun.com/house/web/Search_Result.php
资阳 http://newhouse.ziyang.soufun.com/house/web/Search_Result.php
乌鲁木齐 http://newhouse.xj.soufun.com/house/web/Search_Result.php
喀什 http://newhouse.kashi.soufun.com/house/web/Search_Result.php
巴州 http://newhouse.bazhou.soufun.com/house/web/Search_Result.php
伊犁 http://newhouse.yili.soufun.com/house/web/Search_Result.php
昆明 http://newhouse.km.soufun.com/house/web/Search_Result.php
红河 http://newhouse.honghe.soufun.com/house/web/Search_Result.php
曲靖 http://newhouse.qujing.soufun.com/house/web/Search_Result.php
玉溪 http://newhouse.yuxi.soufun.com/house/web/Search_Result.php
丽江 http://newhouse.lijiang.soufun.com/house/web/Search_Result.php
大理 http://newhouse.dali.soufun.com/house/web/Search_Result.php
西双版纳 http://newhouse.xishuangbanna.soufun.com/house/web/Search_Result.php
杭州 http://newhouse.hz.soufun.com/house/web/Search_Result.php
宁波 http://newhouse.nb.soufun.com/house/web/Search_Result.php
嘉兴 http://newhouse.jx.soufun.com/house/web/Search_Result.php
温州 http://newhouse.wz.soufun.com/house/web/Search_Result.php
金华 http://newhouse.jh.soufun.com/house/web/Search_Result.php
台州 http://newhouse.tz.soufun.com/house/web/Search_Result.php
绍兴 http://newhouse.sx.soufun.com/house/web/Search_Result.php
湖州 http://newhouse.huzhou.soufun.com/house/web/Search_Result.php
衢州 http://newhouse.quzhou.soufun.com/house/web/Search_Result.php
丽水 http://newhouse.ls.soufun.com/house/web/Search_Result.php
舟山 http://newhouse.zhoushan.soufun.com/house/web/Search_Result.php
长兴 http://newhouse.changxing.soufun.com/house/web/Search_Result.php
德清 http://newhouse.deqing.soufun.com/house/web/Search_Result.php
上海 http://newhouse.sh.soufun.com/house/web/Search_Result.php
以上是关于python 爬取搜房网·新房·数据信息。(Python多线程请求BeautifulSoup)的主要内容,如果未能解决你的问题,请参考以下文章