用python抓一了一些数据存到本地
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了用python抓一了一些数据存到本地相关的知识,希望对你有一定的参考价值。
import codecs from xml.dom.minidom import Document import requests from bs4 import BeautifulSoup doc = Document() def getAllUrl(pageCount): url=‘https://www.xxx.co/xxxx/{page}‘ return url.format(page=pageCount) def getHtml(pageCount): html = requests.get(getAllUrl(pageCount)) return html def WirteXml(gName,gImg,wUrl): girlName = gName girlImage = gImg webUrl = wUrl name = doc.createElement("name") aperson.appendChild(name) personname = doc.createTextNode(girlName) name.appendChild(personname) img = doc.createElement("imgUrl") aperson.appendChild(img) prersonUrl = doc.createTextNode(girlImage) img.append.Child(prersonUrl) weburl = doc.createElement("webUrl") aperson.appendChild(weburl) personname = doc.createTextNode(webUrl) weburl.appendChild(personname) if __name__ == ‘__main__‘: # f = codecs.open(‘Conker.txt‘, ‘w‘, ‘utf-8‘) filename = "people.xml" f = codecs.open(filename, "w", ‘utf-8‘) people = doc.createElement("Actresses") doc.appendChild(people) aperson = doc.createElement("person") people.appendChild(aperson) for count in range(1,1250): html = getHtml(count).text soup= BeautifulSoup(html,"lxml") trs=soup.findAll("img") length=len(trs) for i in range(length): try: girlName = trs[i].attrs["title"] girlImage = trs[i].attrs["src"] webUrl ="https://www.xxx.co/xx/"+trs[i].attrs["src"].split(‘/‘)[-1][:-6] WirteXml(girlName,girlImage,webUrl) except: None print("第"+str(count)+"页抓完!!!") f.write(doc.toprettyxml(indent=" ")) f.close()
以上是关于用python抓一了一些数据存到本地的主要内容,如果未能解决你的问题,请参考以下文章
第十三届蓝桥杯省赛 python B组复盘(三道代码题全AC居然省一了)
python爬图片,现在想把这些图片存到本地文件夹里,然后给他们名字,最好是1,2,3等,怎么弄?