python slideshare-dl.py
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python slideshare-dl.py相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python2.7
#-*- coding: utf-8 -*-
import os,sys
import urllib2
from BeautifulSoup import BeautifulSoup
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import portrait,A4,landscape
reload(sys)
sys.setdefaultencoding('utf-8')
class SlideShare(object):
"""SlideShare download script"""
def __init__(self, url=None):
self.url = url
self.__slide_name = ''
self.__files = []
self.__images = []
self.soup=""
def get_soup(self,url):
try:
urlopen = urllib2.urlopen(url,timeout=30)
source = urlopen.read()
self.soup = BeautifulSoup(source)
return self.soup
except Exception, inst:
print "url error"
def get_image_file(self):
del self.__images[:]
self.__slide_name =self.url.split('/')[-1]
self.create_directory(self.__slide_name)
try:
html = self.soup.findAll('img', {'class':'slide_image'})
for image in html:
image_url = image.get('data-full').split('?')[0]
self.__images.append(image_url.split('/')[-1])
imagefilename=image_url.split('/')[-1]
if not os.path.isfile(imagefilename):
self.download_file(image_url)
#command = 'wget %s -P %s --quiet' % (image_url, "./")
#os.system(command)
except Exception, inst:
print "error url"
def create_directory(self, dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
os.chdir(dir_name)
def download_file(self, url):
file_name = url.split('/')[-1]
u = urllib2.urlopen(url,timeout=30)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status,
f.close()
def generate_pdf(self):
slidetitle=self.soup.findAll('h1', {'class':'notranslate slideshow-title-text'})
titlename=slidetitle[0].text
titlename=titlename.replace("'","")
pdf_name = titlename.replace("HBaseCon 2015: ","") + ".pdf"
print "Generating PDF %s..." %(pdf_name)
if not os.path.isfile(pdf_name):
aux = canvas.Canvas(pdf_name, pagesize = (1024,576))
for filename in self.__images:
image = os.getcwd() + '/' + filename
aux.drawImage(image,0,0,1024,576)
aux.showPage()
aux.save()
print "Done."
def get(self, url):
self.url = url
self.get_soup(self.url)
self.get_image_file()
self.generate_pdf()
os.chdir("..")
def main():
url = raw_input('Slideshare URL : ')
slide = SlideShare()
slide.get(url)
if __name__ == "__main__":
main()
以上是关于python slideshare-dl.py的主要内容,如果未能解决你的问题,请参考以下文章
代写python,代写python编程,python代写,python编程代写,留学生python代写