python slideshare-dl.py

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python slideshare-dl.py相关的知识,希望对你有一定的参考价值。

#!/usr/bin/env python2.7
#-*- coding: utf-8 -*-
import os,sys
import urllib2
from BeautifulSoup import BeautifulSoup
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import portrait,A4,landscape
reload(sys)
sys.setdefaultencoding('utf-8')

class SlideShare(object):
	"""SlideShare download script"""
	def __init__(self, url=None):
		self.url = url
		self.__slide_name = ''
		self.__files = []
		self.__images = []
        self.soup=""
    def get_soup(self,url):
        try:      
            urlopen = urllib2.urlopen(url,timeout=30)
            source = urlopen.read()
            self.soup = BeautifulSoup(source)
            return self.soup
        except Exception, inst:
            print "url error"
	def get_image_file(self):
        del self.__images[:]		
		self.__slide_name =self.url.split('/')[-1]
		self.create_directory(self.__slide_name)		
        try:
	        html = self.soup.findAll('img', {'class':'slide_image'})
	        for image in html:
	        	image_url = image.get('data-full').split('?')[0]
	        	self.__images.append(image_url.split('/')[-1])
                imagefilename=image_url.split('/')[-1]
                if not os.path.isfile(imagefilename):
	                self.download_file(image_url)
	                #command = 'wget %s -P %s --quiet' % (image_url, "./")
                    #os.system(command)	
        except Exception, inst:
            print "error url"

	def create_directory(self, dir_name):
		if not os.path.exists(dir_name):
			os.makedirs(dir_name)
		os.chdir(dir_name)

	def download_file(self, url):
		file_name = url.split('/')[-1]
		u = urllib2.urlopen(url,timeout=30)
		f = open(file_name, 'wb')
		meta = u.info()
		file_size = int(meta.getheaders("Content-Length")[0])
		print "Downloading: %s Bytes: %s" % (file_name, file_size)
 
		file_size_dl = 0
		block_sz = 8192
		while True:
			buffer = u.read(block_sz)
			if not buffer:
				break
			
			file_size_dl += len(buffer)
			f.write(buffer)
			status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
			status = status + chr(8)*(len(status)+1)
			print status,
		f.close()

	def generate_pdf(self):
		slidetitle=self.soup.findAll('h1', {'class':'notranslate slideshow-title-text'})
		titlename=slidetitle[0].text
        titlename=titlename.replace("'","")
		pdf_name = titlename.replace("HBaseCon 2015: ","") + ".pdf"
		print "Generating PDF %s..." %(pdf_name)
        if not os.path.isfile(pdf_name):
	        aux = canvas.Canvas(pdf_name, pagesize = (1024,576))
	        for filename in self.__images:
	        	image = os.getcwd() + '/' + filename
	        	aux.drawImage(image,0,0,1024,576)
	        	aux.showPage()
	        aux.save()
 	        print "Done."

 	def get(self, url):
		self.url = url 
        self.get_soup(self.url)
		self.get_image_file()
		self.generate_pdf()
        os.chdir("..")
def main():
	url = raw_input('Slideshare URL : ')
	slide = SlideShare()
	slide.get(url)
if __name__ == "__main__":
	main()

以上是关于python slideshare-dl.py的主要内容,如果未能解决你的问题,请参考以下文章

代写python,代写python编程,python代写,python编程代写,留学生python代写

001--python全栈--基础知识--python安装

Python代写,Python作业代写,代写Python,代做Python

Python开发

Python,python,python

Python 介绍