基于openOffice和python实现office转pdf和html示例代码
Posted reg183
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了基于openOffice和python实现office转pdf和html示例代码相关的知识,希望对你有一定的参考价值。
将office文件转化为html格式或者pdf格式
在转换之前,需要启动openOffice的服务:在openOffice目录下的命令窗口中执行soffice -headless -accept=”socket,host=127.0.0.1,port=8100;urp;” -nofirststartwizard即可启动
不知道如何启动的参照我的另外一篇文章
我电脑上安装的是python3.8
python的安装,在这里我就不多说了,在坐的老司机应该都熟悉了。
准备好了环境之后,话不多说,开始编写脚本。
脚本代码如下:
#
# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14
#
# This script converts a document from one office format to another by
# connecting to an OpenOffice.org instance via Python-UNO bridge.
#
# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
# - or any later version.
#
DEFAULT_OPENOFFICE_PORT = 8100
import uno
from os.path import abspath, isfile, splitext
from com.sun.star.beans import PropertyValue
from com.sun.star.task import ErrorCodeIOException
from com.sun.star.connection import NoConnectException
FAMILY_TEXT = "Text"
FAMILY_WEB = "Web"
FAMILY_SPREADSHEET = "Spreadsheet"
FAMILY_PRESENTATION = "Presentation"
FAMILY_DRAWING = "Drawing"
# ---------------------#
# Configuration Start #
# ---------------------#
# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
# most formats are auto-detected; only those requiring options are defined here
IMPORT_FILTER_MAP =
"txt":
"FilterName": "Text (encoded)",
"FilterOptions": "utf8"
,
"csv":
"FilterName": "Text - txt - csv (StarCalc)",
"FilterOptions": "44,34,0"
EXPORT_FILTER_MAP =
"pdf":
FAMILY_TEXT: "FilterName": "writer_pdf_Export",
FAMILY_WEB: "FilterName": "writer_web_pdf_Export",
FAMILY_SPREADSHEET: "FilterName": "calc_pdf_Export",
FAMILY_PRESENTATION: "FilterName": "impress_pdf_Export",
FAMILY_DRAWING: "FilterName": "draw_pdf_Export"
,
"html":
FAMILY_TEXT: "FilterName": "HTML (StarWriter)",
FAMILY_SPREADSHEET: "FilterName": "HTML (StarCalc)",
FAMILY_PRESENTATION: "FilterName": "impress_html_Export"
,
"odt":
FAMILY_TEXT: "FilterName": "writer8",
FAMILY_WEB: "FilterName": "writerweb8_writer"
,
"doc":
FAMILY_TEXT: "FilterName": "MS Word 97"
,
"rtf":
FAMILY_TEXT: "FilterName": "Rich Text Format"
,
"txt":
FAMILY_TEXT:
"FilterName": "Text",
"FilterOptions": "utf8"
,
"ods":
FAMILY_SPREADSHEET: "FilterName": "calc8"
,
"xls":
FAMILY_SPREADSHEET: "FilterName": "MS Excel 97"
,
"csv":
FAMILY_SPREADSHEET:
"FilterName": "Text - txt - csv (StarCalc)",
"FilterOptions": "44,34,0"
,
"odp":
FAMILY_PRESENTATION: "FilterName": "impress8"
,
"ppt":
FAMILY_PRESENTATION: "FilterName": "MS PowerPoint 97"
,
"swf":
FAMILY_DRAWING: "FilterName": "draw_flash_Export",
FAMILY_PRESENTATION: "FilterName": "impress_flash_Export"
PAGE_STYLE_OVERRIDE_PROPERTIES =
FAMILY_SPREADSHEET:
# --- Scale options: uncomment 1 of the 3 ---
# a) Reduce / enlarge printout: Scaling factor
"PageScale": 100,
# b) Fit print range(s) to width / height: Width in pages and Height in pages
# "ScaleToPagesX": 1, "ScaleToPagesY": 1000,
# c) Fit print range(s) on number of pages: Fit print range(s) on number of pages
# "ScaleToPages": 1,
"PrintGrid": False
# -------------------#
# Configuration End #
# -------------------#
class DocumentConversionException(Exception):
def __init__(self, message):
self.message = message
def __str__(self):
return self.message
class DocumentConverter:
def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
localContext = uno.getComponentContext()
resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver",
localContext)
try:
context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
except NoConnectException:
raise DocumentConversionException("failed to connect to OpenOffice.org on port %s" % port)
self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
def convert(self, inputFile, outputFile):
inputUrl = self._toFileUrl(inputFile)
outputUrl = self._toFileUrl(outputFile)
loadProperties = "Hidden": True
inputExt = self._getFileExt(inputFile)
if IMPORT_FILTER_MAP.has_key(inputExt):
loadProperties.update(IMPORT_FILTER_MAP[inputExt])
document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties))
try:
document.refresh()
except AttributeError:
pass
family = self._detectFamily(document)
self._overridePageStyleProperties(document, family)
outputExt = self._getFileExt(outputFile)
storeProperties = self._getStoreProperties(document, outputExt)
try:
document.storeToURL(outputUrl, self._toProperties(storeProperties))
finally:
document.close(True)
def _overridePageStyleProperties(self, document, family):
if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family):
properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family]
pageStyles = document.getStyleFamilies().getByName(PageStyles)
for styleName in pageStyles.getElementNames():
pageStyle = pageStyles.getByName(styleName)
for name, value in properties.items():
pageStyle.setPropertyValue(name, value)
def _getStoreProperties(self, document, outputExt):
family = self._detectFamily(document)
try:
propertiesByFamily = EXPORT_FILTER_MAP[outputExt]
except KeyError:
raise DocumentConversionException( "unknown output format: %s" % outputExt)
try:
return propertiesByFamily[family]
except KeyError:
raise DocumentConversionException("unsupported conversion: from %s to %s" % (family, outputExt))
def _detectFamily(self, document):
if document.supportsService("com.sun.star.text.WebDocument"):
return FAMILY_WEB
if document.supportsService("com.sun.star.text.GenericTextDocument"):
# must be TextDocument or GlobalDocument
return FAMILY_TEXT
if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
return FAMILY_SPREADSHEET
if document.supportsService("com.sun.star.presentation.PresentationDocument"):
return FAMILY_PRESENTATION
if document.supportsService("com.sun.star.drawing.DrawingDocument"):
return FAMILY_DRAWING
raise DocumentConversionException( "unknown document family: %s" % document)
def _getFileExt(self, path):
ext = splitext(path)[1]
if ext is not None:
return ext[1:].lower()
def _toFileUrl(self, path):
return uno.systemPathToFileUrl(abspath(path))
def _toProperties(self, dict):
props = []
for key in dict:
prop = PropertyValue()
prop.Name = key
prop.Value = dict[key]
props.append(prop)
return tuple(props)
if __name__ == "__main__":
from sys import argv, exit
if len(argv) < 3:
print("USAGE: python %s <input-file> <output-file>" % argv[0])
exit(255)
if not isfile(argv[1]):
print("no such input file: %s" % argv[1])
exit(1)
try:
converter = DocumentConverter()
converter.convert(argv[1], argv[2])
except DocumentConversionException as exception:
print("ERROR! " + str(exception))
exit(1)
except ErrorCodeIOException as exception:
print("ERROR! ErrorCodeIOException %d" % exception.ErrCode)
exit(1)
编写完上面的脚本之后要放到
因为如果不放openOffice的目录下很多类库都引用不到,会导致程序执行不了。
最后万事具备,只欠东风了。
打开cmd命令行,输入这个命令
可以把doc转为pdf,pdf文件已经生成了
输入这个命令
可以把doc转为html,html文件已经生成了
其他的office文件转pdf和html,大家可以按需自行尝试。
以上是关于基于openOffice和python实现office转pdf和html示例代码的主要内容,如果未能解决你的问题,请参考以下文章
python https://admintuts.com/how-to-check-the-availability-of-a-steam-id-without-using-the-official-
用于 Libreoffice-calc 和 Openoffice-calc 的 Python“Hello World”[关闭]