第一个爬虫和测试
Posted 叫你一声你敢应吗
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了第一个爬虫和测试相关的知识,希望对你有一定的参考价值。
(1)模拟乒乓球赛函数测试结果:
单打:
from random import random def printIntro(): print("这个程序模拟两个选手A和B的某种比赛") print("程序运行需要A和B的能力值(0到1之间)") def getInputs(): a=eval(input("请输入选手A的能力值(0-1):")) b=eval(input("请输入选手B的能力值(0-1):")) return a,b def simNGames(probA,probB): winsA,winsB=0,0 for i in range(7): scoreA,scoreB=simOneGame(probA,probB) if scoreA>scoreB: winsA+=1 if winsA==5: break else: winsB+=1 if winsB==5: break return winsA,winsB def gameOver(a,b): return a==11 or b==11 def simOneGame(probA,probB): scoreA,scoreB = 0,0 serving = \'A\' while not gameOver(scoreA,scoreB): if serving == \'A\': if random()<probA: scoreA+=1 else: serving=\'B\' else: if random()<probB: scoreB+=1 else: serving=\'A\' return scoreA,scoreB def printSummary(winsA,winsB): n=winsA+winsB print("竞技分析开始,共模拟{}场比赛".format(n)) print("选手A获胜{}场比赛,占比{:.1%}".format(winsA,winsA/n)) print("选手B获胜{}场比赛,占比{:.1%}".format(winsB,winsB/n)) try: printIntro() probA,probB = 0.5,0.5 winsA,winsB = simNGames(probA,probB) printSummary(winsA,winsB) print("043林树锋的结果截图") except: print(\'Error\')
测试结果:
双打:
from random import random def printIntro(): print("这个程序模拟两个选手A和B的某种比赛") print("程序运行需要A和B的能力值(0到1之间)") def getInputs(): a=eval(input("请输入选手A的能力值(0-1):")) b=eval(input("请输入选手B的能力值(0-1):")) return a,b def simNGames(probA,probB): winsA,winsB=0,0 for i in range(5): scoreA,scoreB=simOneGame(probA,probB) if scoreA>scoreB: winsA+=1 if winsA==3: break else: winsB+=1 if winsB==3: break return winsA,winsB def gameOver(a,b): return a==11 or b==11 def simOneGame(probA,probB): scoreA,scoreB = 0,0 serving = \'A\' while not gameOver(scoreA,scoreB): if serving == \'A\': if random()<probA: scoreA+=1 else: serving=\'B\' else: if random()<probB: scoreB+=1 else: serving=\'A\' return scoreA,scoreB def printSummary(winsA,winsB): n=winsA+winsB print("竞技分析开始,共模拟{}场比赛".format(n)) print("选手A获胜{}场比赛,占比{:.1%}".format(winsA,winsA/n)) print("选手B获胜{}场比赛,占比{:.1%}".format(winsB,winsB/n)) try: printIntro() probA,probB = 0.5,0.5 winsA,winsB = simNGames(probA,probB) printSummary(winsA,winsB) print("043林树锋的结果截图") except: print(\'Error\')
测试结果:
。
(2)用 requests 库的 get()函数访问搜狗20次,打印返回状态,text()内容,计算 text()属性和 content()属性所返回的网页内容长度。
import requests for i in range(0,20): r=requests.get("https://www.sogou.com/") print(r.status_code) print(r.text) print(type(r.text)) print(len(r.content))
(3)完成简单html页面计算:
from bs4 import BeautifulSoup import re html=""" <!DOCTYPE html> <head> <meta charset="utf-8"> <title>菜鸟教程(runoob.com)</title> </head> <body> <h1>我的第一个标题</h1> <p id="first">我的第一个段落</p > </body> <table border="1"> <tr> <td>row 1, cell 1</td> <td>row 1, cell 2</td> </tr> <tr> <td>row 2, cell 1</td> <td>row 2, cell 2</td> </tr> </table> </html> """ abc= BeautifulSoup(html) print(str(abc.head.string)+\'\\n\'+\'43\')#打印头标签内容加上学号 print(abc.body.string)#打印body标签的内容 print(abc.find_all(id="first")) r=abc.text zhongwen = re.findall(u\'[\\u1100-\\uFFFDh]+?\',r) print(zhongwen)
(4)爬取中国大学排名网站内容并转换为csv文件:
import csv import os import requests
import pandas from bs4 import BeautifulSoup allUniv = [] def getHTMLText(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = \'utf-8\' return r.text except: return "" def fillUnivList(soup): data = soup.find_all(\'tr\') for tr in data: ltd = tr.find_all(\'td\') if len(ltd)==0: continue singleUniv = [] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def writercsv(save_road,num,title): #转化为csv文件 if os.path.isfile(save_road): with open(save_road,\'a\',newline=\'\')as f: csv_write=csv.writer(f,dialect=\'excel\') for i in range(num): u=allUniv[i] csv_write.writerow(u) else: with open(save_road,\'w\',newline=\'\')as f: csv_write=csv.writer(f,dialect=\'excel\') csv_write.writerow(title) for i in range(num): u=allUniv[i] csv_write.writerow(u) title=["排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研究合作","成果转化"] save_road="C:\\\\Users\\\\Benny\\\\Desktop\\\\Python\\\\Python练习\\sqlit_test02.csv" def main(): url = \'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html\' html = getHTMLText(url) soup = BeautifulSoup(html, "html.parser") fillUnivList(soup) writercsv(save_road,100,title) main()
以上是关于第一个爬虫和测试的主要内容,如果未能解决你的问题,请参考以下文章
Python练习册 第 0013 题: 用 Python 写一个爬图片的程序,爬 这个链接里的日本妹子图片 :-),(http://tieba.baidu.com/p/2166231880)(代码片段