api
import socket
import select
class MySock:
def __init__(self, sock, data):
self.sock = sock
self.data = data
def __getattr__(self, item):
return getattr(self.sock, item)
class YinBing:
def __init__(self):
self.r_list = []
self.w_list = []
def add(self, req_info):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.setblocking(0)
try:
sock.connect((req_info.get("host"), req_info.get("port")))
except BlockingIOError:
pass
s = MySock(sock, req_info)
self.r_list.append(s)
self.w_list.append(s)
def run(self):
while True:
rl, wl, el = select.select(self.r_list, self.w_list, [], 0.5)
for sock in wl:
sock.send(("GET %s Http/1.1\\r\\nHost:%s\\r\\n\\r\\n" % (sock.data["path"], sock.data["host"])).encode("utf-8"))
for sock in rl:
response = sock.recv(1024)
callback = sock.data.get("callback")
if callback: callback(response)
self.r_list.remove(sock)
if not self.r_list:
break
调用方法
from 自定义爬虫框架 import YinBing
def done1(response):
print(\'处理一\', response)
def done2(response):
print(response)
url_list = [
{\'host\': \'www.baidu.com\', \'port\': 80, \'path\': \'/\', \'callback\': done1},
{\'host\': \'www.cnblogs.com\', \'port\': 80, \'path\': \'/index.html\', \'callback\': done2},
{\'host\': \'www.bing.com\', \'port\': 80, \'path\': \'/\', \'callback\': None},
]
if __name__ == \'__main__\':
y = YinBing()
for obj in url_list:
y.add(obj)
y.run()