网络爬虫入门——案例二:爬取教务系统中的学生成绩
Posted 可爱的熊乖乖
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了网络爬虫入门——案例二:爬取教务系统中的学生成绩相关的知识,希望对你有一定的参考价值。
参考资料:
本帖目标:
1.模拟登陆学校教务系统
2.对教务系统中的学生成绩进行抓取
3.将抓取到的内容保存到excel表格并计算平均成绩和绩点
# -*- coding: utf-8 -*- """ Created on Sat Apr 16 18:26:46 2016 @author: wuhan """ import urllib import urllib2 import cookielib from bs4 import BeautifulSoup import sys import xlwt from xlrd import open_workbook class SDU: def __init__(self): self.loginUrl = ‘http://zhjw.dlut.edu.cn/loginAction.do‘ self.gradeUrl = ‘http://zhjw.dlut.edu.cn/gradeLnAllAction.do?type=ln&oper=fainfo&fajhh=4289‘ self.cookies = cookielib.CookieJar() self.postdata = urllib.urlencode({ ‘zjh‘:‘*********‘, ‘mm‘:******‘ }) self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies)) self.book = xlwt.Workbook(encoding=‘utf-8‘,style_compression=0) def getPage(self): request = urllib2.Request( url = self.loginUrl, data = self.postdata) response = self.opener.open(request) response = self.opener.open(self.gradeUrl) return response.read().decode(‘gbk‘) def getGrade(self): page = self.getPage() page = BeautifulSoup(str(page)) tmp = page.find_all(‘tr‘, class_ = "odd") sheet = self.book.add_sheet(‘课程信息‘, cell_overwrite_ok=True) i = 0 for each in tmp: tag = each.find_all(‘td‘, align="center") j = 0 for td in tag: p = td.find(‘p‘, align="center") if(p): sheet.write(i,j,str(p.string).strip()) else: sheet.write(i,j,str(td.string).strip()) j += 1 i += 1 self.book.save(‘jiaowuxitong.xls‘) def getAverage(self): i = 0 sheet = open_workbook(‘jiaowuxitong.xls‘,formatting_info=True) table = sheet.sheet_by_index(0) nrows = table.nrows tmp1 = 0 tmp2 = 0 for i in range(nrows): if "必修" in table.cell(i,5).value: tmp2 += float(table.cell(i,4).value) if ‘通过‘ in table.cell(i,6).value: tmp1 += float(table.cell(i,4).value)*60 else: tmp1 += float(table.cell(i,4).value)*float(table.cell(i,6).value) i += 1 ave = tmp1/tmp2 print "您的平均成绩为:" print ave return ave def getGPA(self): i= 0 sheet = open_workbook(‘jiaowuxitong.xls‘,formatting_info=True) table = sheet.sheet_by_index(0) nrows = table.nrows tmp1 = 0 tmp2 = 0 for i in range(nrows): if not "通过" in table.cell(i,6).value: tcredit = float(table.cell(i,4).value) tmp1 += tcredit tgrade = float(table.cell(i,6).value) if tgrade in range(90,101): tmp2 += 4*tcredit elif tgrade in range(85,90): tmp2 += 3.7 * tcredit elif tgrade in range(82,84): tmp2 += 3.3 * tcredit elif tgrade in range(78,82): tmp2 += 3.0 * tcredit elif tgrade in range(75,78): tmp2 += 2.7 * tcredit elif tgrade in range(71,75): tmp2 += 2.3 * tcredit elif tgrade in range(66,71): tmp2 += 2.0 * tcredit elif tgrade in range(62,66): tmp2 += 1.7 * tcredit elif tgrade in range(60,62): tmp2 += 1.3 * tcredit else: tmp2 += 0 GPA = tmp2/tmp1 print "您的绩点为:" print GPA return GPA reload(sys) sys.setdefaultencoding(‘utf-8‘) sdu = SDU() sdu.getGrade() sdu.getAverage() sdu.getGPA()
以上是关于网络爬虫入门——案例二:爬取教务系统中的学生成绩的主要内容,如果未能解决你的问题,请参考以下文章
Python模拟登陆教务系统爬取成绩信息+绘制成绩分布图+导入MySQL