pythonpython每天抓取一篇英语美文,发送到邮箱
Posted 韩俊
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了pythonpython每天抓取一篇英语美文,发送到邮箱相关的知识,希望对你有一定的参考价值。
import requests,os,time from bs4 import BeautifulSoup import smtplib from email.mime.text import MIMEText header = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64)‘ ‘ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36‘} #TODO 进入主页面的函数,找到文章链接 def findEssay(rootUrl,pages,recordsPath): try: htmlpp = requests.get(rootUrl, headers=header) html = htmlpp.text.encode(htmlpp.encoding) # 获取dirname dirUrl1 = os.path.dirname(rootUrl) dirUrl = os.path.dirname(dirUrl1) soup = BeautifulSoup(html, ‘html.parser‘) essayTags = soup.select(‘.node_list a‘) # 得到当前页面的文章Tag值 for essayTag in essayTags: essayUrl = essayTag.get(‘href‘) # 获取文章的链接 essayName = essayTag.text # 获取文章的名字 if essayUrl not in pages: downloadEssay(dirUrl, essayUrl, essayName) pages.add(essayUrl) with open(recordsPath, ‘a+‘) as attach: attach.write(str(essayUrl) + ‘ ‘) print(‘写入记录成功‘) return nextPageBaseUrl = soup.select(‘.page a‘)[-2].get(‘href‘) #取得下一页的链接 nextPageUrl = os.path.join(dirUrl1,nextPageBaseUrl) #组装成完整的链接 findEssay(nextPageUrl,pages,recordsPath) except Exception as e: print(‘根链接出现错误‘+str(e)) #TODO 下载文章内容 def downloadEssay(dirUrl,essayUrl,essayName): try: htmlpp = requests.get(dirUrl + essayUrl) html = htmlpp.text.encode(htmlpp.encoding) soup = BeautifulSoup(html, ‘html.parser‘) paras = soup.select(‘#dede_content div‘) mailTo(essayName,paras) except Exception as e: with open(recordsPath, ‘a+‘) as attach: attach.write(str(essayUrl) + ‘ ‘) findEssay(rootUrl, pages, recordsPath) print(‘下载文章失败 ‘+str(e)) #TODO 制作成word文档,命名为日期,发送邮件的函数,发送完成删除文件 def mailTo(essayName,paras): content = "" for para in paras: content = content + ‘<p>‘ + para.getText() + ‘</p>‘ # #发送方邮件地址 sender = ‘发件人@163.com‘ # 发送方邮件密码 pwd = input(‘Password: ‘) receivers = [‘收件人[email protected]‘,‘收件人[email protected]‘] # 输入一个你要收取邮件的邮箱地址 # 邮件的内容、收件人、发件人信息 mail_message = ‘<html><body><h1>‘+essayName+‘</h1>‘ + ‘<article>‘+content+‘</article>‘ + ‘</body></html>‘ message = MIMEText(mail_message, ‘html‘, ‘utf-8‘) # 发送含HTML内容的邮件 message[‘To‘] = ‘;‘.join(receivers) # 填入收件人邮箱地址 message[‘From‘] = sender # 填入发件人邮箱地址 # 邮件的标题 today = time.strftime(‘%y%m%d‘) # 以当前日期命名文档 today = str(today) subject = ‘今日美文‘+today message[‘Subject‘] = subject # 可以不设置编码 try: smtpObj = smtplib.SMTP_SSL(‘smtp.163.com‘, 465) # 网易163邮箱 使用非本地服务器,需要建立和网易邮件服务 的SSL链接,端口465 smtpObj.login(sender, pwd) # 登录认证 smtpObj.sendmail(sender, receivers, message.as_string()) # 发送邮件主题 print(‘邮件发送成功!‘) smtpObj.quit() except smtplib.SMTPException as e: print(‘邮件发送失败,失败原因:‘, e) if __name__ == ‘__main__‘: recordsPath = ‘C:\enEssaysToLH.txt‘ pages = set() if not os.path.exists(recordsPath): with open(recordsPath,‘w‘): print(‘创建记录文件‘) with open(recordsPath,‘r‘) as readFile: for line in readFile.readlines(): pages.add(line.rstrip()) #TODO 解析主链接,生成dirname,进入主页面的函数 rootUrl = ‘http://www.enread.com/essays/index.html‘ findEssay(rootUrl,pages,recordsPath)
发送了很多次邮件,每次英文做主题(subject)的时候,都会出现554问题。当把邮件的题目统一换成中文后,同一个文章就能发送出去。可能这里面涉及了编码的问题,待以后研究。
以上是关于pythonpython每天抓取一篇英语美文,发送到邮箱的主要内容,如果未能解决你的问题,请参考以下文章