import requests url = "http://news.gzcc.cn/html/xiaoyuanxinwen/" res = requests.get(url) res.encoding = ‘utf-8‘ # 利用BeautifulSoup的HTML解析器,生成结构树 from bs4 import BeautifulSoup soup = BeautifulSoup(res.text, ‘html.parser‘) for news in soup.select(‘li‘): if len(news.select(‘.news-list-title‘)) > 0: #首页文章标题 title = news.select(‘.news-list-title‘)[0].text #首页文章描述 description = news.select(‘.news-list-description‘)[0].text #首页文章信息 info = news.select(‘.news-list-info‘)[0].text #首页文章链接 href = news.select(‘a‘)[0][‘href‘] url = href res= requests.get(url) res.encoding = ‘utf-8‘ soup = BeautifulSoup(res.text, ‘html.parser‘) #获取每篇文章的信息 newinfo = soup.select(‘.show-info‘)[0].text #获取文章内容 content = soup.select(‘#content‘)[0].text #日期 date = newinfo.split()[0] #当日时间 time = newinfo.split()[1] #作者 author = newinfo.split()[2] #审核 checker = newinfo.split()[3] #来源 source = newinfo.split()[4] print(‘------------------------------------------------------------------------------‘) print("文章标题:" + title ) print("\\n文章描述:" + description ) print("\\n文章信息:\\n"+date +‘ ‘+ time +‘\\n‘+ author +‘\\n‘+ checker +‘\\n‘+ source) print("\\n文章链接:" + href ) print(content) print(‘------------------------------------------------------------------------------‘)