第一个爬虫和测试

Posted 叫你一声你敢应吗

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了第一个爬虫和测试相关的知识,希望对你有一定的参考价值。

(1)模拟乒乓球赛函数测试结果:

单打:

from random import random
def printIntro():
    print("这个程序模拟两个选手A和B的某种比赛")
    print("程序运行需要A和B的能力值(0到1之间)")
def getInputs():
    a=eval(input("请输入选手A的能力值(0-1):"))
    b=eval(input("请输入选手B的能力值(0-1):"))
    return a,b
def simNGames(probA,probB):
    winsA,winsB=0,0
    for i in range(7):
        scoreA,scoreB=simOneGame(probA,probB)
        if scoreA>scoreB:
            winsA+=1
            if winsA==5:
                break
        else:
            winsB+=1
            if winsB==5:
                break
    return winsA,winsB
def gameOver(a,b):
    return a==11 or b==11
def simOneGame(probA,probB):
    scoreA,scoreB = 0,0
    serving = \'A\'
    while not gameOver(scoreA,scoreB):
        if serving == \'A\':
            if random()<probA:
                scoreA+=1
            else:
                serving=\'B\'
        else:
            if random()<probB:
                scoreB+=1
            else:
                serving=\'A\'
    return scoreA,scoreB
def printSummary(winsA,winsB):
    n=winsA+winsB
    print("竞技分析开始,共模拟{}场比赛".format(n))
    print("选手A获胜{}场比赛,占比{:.1%}".format(winsA,winsA/n))
    print("选手B获胜{}场比赛,占比{:.1%}".format(winsB,winsB/n))
try:
    printIntro()
    probA,probB = 0.5,0.5
    winsA,winsB = simNGames(probA,probB)
    printSummary(winsA,winsB)
    print("043林树锋的结果截图")
except:
    print(\'Error\')

测试结果:

 

双打:

from random import random
def printIntro():
    print("这个程序模拟两个选手A和B的某种比赛")
    print("程序运行需要A和B的能力值(0到1之间)")
def getInputs():
    a=eval(input("请输入选手A的能力值(0-1):"))
    b=eval(input("请输入选手B的能力值(0-1):"))
    return a,b
def simNGames(probA,probB):
    winsA,winsB=0,0
    for i in range(5):
        scoreA,scoreB=simOneGame(probA,probB)
        if scoreA>scoreB:
            winsA+=1
            if winsA==3:
                break
        else:
            winsB+=1
            if winsB==3:
                break
    return winsA,winsB
def gameOver(a,b):
    return a==11 or b==11
def simOneGame(probA,probB):
    scoreA,scoreB = 0,0
    serving = \'A\'
    while not gameOver(scoreA,scoreB):
        if serving == \'A\':
            if random()<probA:
                scoreA+=1
            else:
                serving=\'B\'
        else:
            if random()<probB:
                scoreB+=1
            else:
                serving=\'A\'
    return scoreA,scoreB
def printSummary(winsA,winsB):
    n=winsA+winsB
    print("竞技分析开始,共模拟{}场比赛".format(n))
    print("选手A获胜{}场比赛,占比{:.1%}".format(winsA,winsA/n))
    print("选手B获胜{}场比赛,占比{:.1%}".format(winsB,winsB/n))
try:
    printIntro()
    probA,probB = 0.5,0.5
    winsA,winsB = simNGames(probA,probB)
    printSummary(winsA,winsB)
    print("043林树锋的结果截图")
except:
    print(\'Error\')

测试结果:

(2)用 requests 库的 get()函数访问搜狗20次,打印返回状态,text()内容,计算 text()属性和 content()属性所返回的网页内容长度。

import requests
for i in range(0,20):
    r=requests.get("https://www.sogou.com/")
print(r.status_code)
print(r.text)
print(type(r.text))
print(len(r.content))

 (3)完成简单html页面计算:

from bs4 import BeautifulSoup
import re
html="""
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<title>菜鸟教程(runoob.com)</title>
</head>
<body>
    <h1>我的第一个标题</h1>
    <p id="first">我的第一个段落</p >
</body>
        <table border="1">
    <tr>
        <td>row 1, cell 1</td>
        <td>row 1, cell 2</td>
    </tr>
    <tr>
        <td>row 2, cell 1</td>
        <td>row 2, cell 2</td>
    </tr>
</table>
</html>
"""
abc= BeautifulSoup(html)
print(str(abc.head.string)+\'\\n\'+\'43\')#打印头标签内容加上学号
print(abc.body.string)#打印body标签的内容
print(abc.find_all(id="first"))
r=abc.text
zhongwen = re.findall(u\'[\\u1100-\\uFFFDh]+?\',r)
print(zhongwen)

 

(4)爬取中国大学排名网站内容并转换为csv文件:

 

import csv
import os
import requests
import pandas
from bs4 import BeautifulSoup allUniv = [] def getHTMLText(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = \'utf-8\' return r.text except: return "" def fillUnivList(soup): data = soup.find_all(\'tr\') for tr in data: ltd = tr.find_all(\'td\') if len(ltd)==0: continue singleUniv = [] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def writercsv(save_road,num,title): #转化为csv文件 if os.path.isfile(save_road): with open(save_road,\'a\',newline=\'\')as f: csv_write=csv.writer(f,dialect=\'excel\') for i in range(num): u=allUniv[i] csv_write.writerow(u) else: with open(save_road,\'w\',newline=\'\')as f: csv_write=csv.writer(f,dialect=\'excel\') csv_write.writerow(title) for i in range(num): u=allUniv[i] csv_write.writerow(u) title=["排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研究合作","成果转化"] save_road="C:\\\\Users\\\\Benny\\\\Desktop\\\\Python\\\\Python练习\\sqlit_test02.csv" def main(): url = \'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html\' html = getHTMLText(url) soup = BeautifulSoup(html, "html.parser") fillUnivList(soup) writercsv(save_road,100,title) main()

 

以上是关于第一个爬虫和测试的主要内容,如果未能解决你的问题,请参考以下文章

第一个爬虫和测试

第一个爬虫和测试

第一个爬虫和测试

Python练习册 第 0013 题: 用 Python 写一个爬图片的程序,爬 这个链接里的日本妹子图片 :-),(http://tieba.baidu.com/p/2166231880)(代码片段

第一个爬虫与测试

第一个爬虫与测试