OpenXML - 将书签应用于 Word 文档中的段落
Posted
技术标签:
【中文标题】OpenXML - 将书签应用于 Word 文档中的段落【英文标题】:OpenXML - Apply bookmarks to paragraph in word document 【发布时间】:2019-02-11 14:50:48 【问题描述】:以下代码使用 OPENXML (asp.net) 可以正常工作,并使用 HEADING2 在 word 文档中打印我们的元素...我们如何将书签应用于特定段落..
我们正在尝试提取两个标题之间的部分...我们想知道如何应用书签以及如何使用两个书签之间的提取文本...
const string fileName = @"D:\DocFiles\Scan.docx";
const string documentRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
const string stylesRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
const string wordmlNamespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
XNamespace w = wordmlNamespace;
XDocument xDoc = null;
XDocument styleDoc = null;
using (Package wdPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read))
PackageRelationship docPackageRelationship =
wdPackage
.GetRelationshipsByType(documentRelationshipType)
.FirstOrDefault();
if (docPackageRelationship != null)
Uri documentUri =
PackUriHelper
.ResolvePartUri(
new Uri("/", UriKind.Relative),
docPackageRelationship.TargetUri);
PackagePart documentPart =
wdPackage.GetPart(documentUri);
// Load the document XML in the part into an XDocument instance.
xDoc = XDocument.Load(XmlReader.Create(documentPart.GetStream()));
// Find the styles part. There will only be one.
PackageRelationship styleRelation =
documentPart.GetRelationshipsByType(stylesRelationshipType)
.FirstOrDefault();
if (styleRelation != null)
Uri styleUri = PackUriHelper.ResolvePartUri(documentUri, styleRelation.TargetUri);
PackagePart stylePart = wdPackage.GetPart(styleUri);
// Load the style XML in the part into an XDocument instance.
styleDoc = XDocument.Load(XmlReader.Create(stylePart.GetStream()));
string defaultStyle =
(string)(
from style in styleDoc.Root.Elements(w + "style")
where (string)style.Attribute(w + "type") == "paragraph" &&
(string)style.Attribute(w + "default") == "1"
select style
).First().Attribute(w + "styleId");
// Find all paragraphs in the document.
var paragraphs =
from para in xDoc
.Root
.Element(w + "body")
.Descendants(w + "p")
let styleNode = para
.Elements(w + "pPr")
.Elements(w + "pStyle")
.FirstOrDefault()
select new
ParagraphNode = para,
StyleName = styleNode != null ?
(string)styleNode.Attribute(w + "val") :
defaultStyle
;
// Retrieve the text of each paragraph.
var paraWithText =
from para in paragraphs
select new
ParagraphNode = para.ParagraphNode,
StyleName = para.StyleName,
Text = ParagraphText(para.ParagraphNode)
;
foreach (var p in paraWithText)
if (p.StyleName=="Heading2")
Response.Write(p.StyleName + " -" + p.Text);
Response.Write("</br>");
【问题讨论】:
您是在问如何创建书签,还是您已经有了书签并想阅读它们之间的文字? 【参考方案1】:这是我创建的示例Bookmark
类,用于演示如何处理书签。它会找到成对的 w:bookmarkStart
和 w:bookmarkEnd
元素,并展示如何在这两个标记之间获取 w:r
元素。在此基础上,您可以对文本进行处理,例如,如GetValue()
方法所示。
using System.Collections.Generic;
using System.Linq;
using System.Xml.Linq;
using OpenXmlPowerTools;
namespace CodeSnippets.OpenXml.Wordprocessing
/// <summary>
/// Represents a corresponding pair of w:bookmarkStart and w:bookmarkEnd elements.
/// </summary>
public class Bookmark
private Bookmark(XElement root, string bookmarkName)
Root = root;
BookmarkStart = new XElement(W.bookmarkStart,
new XAttribute(W.id, -1),
new XAttribute(W.name, bookmarkName));
BookmarkEnd = new XElement(W.bookmarkEnd,
new XAttribute(W.id, -1));
private Bookmark(XElement root, XElement bookmarkStart, XElement bookmarkEnd)
Root = root;
BookmarkStart = bookmarkStart;
BookmarkEnd = bookmarkEnd;
/// <summary>
/// The root element containing both <see cref="BookmarkStart"/> and
/// <see cref="BookmarkEnd"/>.
/// </summary>
public XElement Root get;
/// <summary>
/// The w:bookmarkStart element.
/// </summary>
public XElement BookmarkStart get;
/// <summary>
/// The w:bookmarkEnd element.
/// </summary>
public XElement BookmarkEnd get;
/// <summary>
/// Finds a pair of w:bookmarkStart and w:bookmarkEnd elements in the given
/// <paramref name="root"/> element, where the w:name attribute value of the
/// w:bookmarkStart element is equal to <paramref name="bookmarkName"/>.
/// </summary>
/// <param name="root">The root <see cref="XElement"/>.</param>
/// <param name="bookmarkName">The bookmark name.</param>
/// <returns>A new <see cref="Bookmark"/> instance representing the bookmark.</returns>
public static Bookmark Find(XElement root, string bookmarkName)
XElement bookmarkStart = root
.Descendants(W.bookmarkStart)
.FirstOrDefault(e => (string) e.Attribute(W.name) == bookmarkName);
string id = bookmarkStart?.Attribute(W.id)?.Value;
if (id == null) return new Bookmark(root, bookmarkName);
XElement bookmarkEnd = root
.Descendants(W.bookmarkEnd)
.FirstOrDefault(e => (string) e.Attribute(W.id) == id);
return bookmarkEnd != null
? new Bookmark(root, bookmarkStart, bookmarkEnd)
: new Bookmark(root, bookmarkName);
/// <summary>
/// Gets all w:r elements between the bookmark's w:bookmarkStart and
/// w:bookmarkEnd elements.
/// </summary>
/// <returns>A collection of w:r elements.</returns>
public IEnumerable<XElement> GetRuns()
return Root
.Descendants()
.SkipWhile(d => d != BookmarkStart)
.Skip(1)
.TakeWhile(d => d != BookmarkEnd)
.Where(d => d.Name == W.r);
/// <summary>
/// Gets the concatenated inner text of all runs between the bookmark's
/// w:bookmarkStart and w:bookmarkEnd elements, ignoring paragraph marks
/// and page breaks.
/// </summary>
/// <remarks>
/// The output of this method can be compared to the output of the
/// <see cref="XElement.Value"/> property.
/// </remarks>
/// <returns>The concatenated inner text.</returns>
public string GetValue()
return GetRuns().Select(UnicodeMapper.RunToString).StringConcatenate();
上面的类处理文件如下(非常简单的测试文件):
<?xml version="1.0" encoding="utf-8"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p>
<w:r>
<w:t>First</w:t>
</w:r>
</w:p>
<w:bookmarkStart w:id="1" w:name="_Bm001" />
<w:p>
<w:r>
<w:t>Second</w:t>
</w:r>
</w:p>
<w:p>
<w:r>
<w:t>Third</w:t>
</w:r>
</w:p>
<w:bookmarkEnd w:id="1" />
<w:p>
<w:r>
<w:t>Fourth</w:t>
</w:r>
</w:p>
</w:body>
</w:document>
以上文档由以下单元测试创建,这些单元测试演示了如何使用Bookmark
类:
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml.Linq;
using CodeSnippets.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;
using Xunit;
namespace CodeSnippets.Tests.OpenXml.Wordprocessing
public class BookmarkTests
/// <summary>
/// The w:name value of our bookmark.
/// </summary>
private const string BookmarkName = "_Bm001";
/// <summary>
/// The w:id value of our bookmark.
/// </summary>
private const int BookmarkId = 1;
/// <summary>
/// The test w:document with our bookmark, which encloses the two runs
/// with inner texts "Second" and "Third".
/// </summary>
private static readonly XElement Document =
new XElement(W.document,
new XAttribute(XNamespace.Xmlns + "w", W.w.NamespaceName),
new XElement(W.body,
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "First"))),
new XElement(W.bookmarkStart,
new XAttribute(W.id, BookmarkId),
new XAttribute(W.name, BookmarkName)),
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "Second"))),
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "Third"))),
new XElement(W.bookmarkEnd,
new XAttribute(W.id, BookmarkId)),
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "Fourth")))
)
);
/// <summary>
/// Creates a <see cref="WordprocessingDocument"/> for on a <see cref="MemoryStream"/>
/// testing purposes, using the given <paramref name="document"/> as the w:document
/// root element of the main document part.
/// </summary>
/// <param name="document">The w:document root element.</param>
/// <returns>The <see cref="MemoryStream"/> containing the <see cref="WordprocessingDocument"/>.</returns>
private static MemoryStream CreateWordprocessingDocument(XElement document)
var stream = new MemoryStream();
const WordprocessingDocumentType type = WordprocessingDocumentType.Document;
using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, type))
MainDocumentPart part = wordDocument.AddMainDocumentPart();
part.PutXDocument(new XDocument(document));
return stream;
[Fact]
public void GetRuns_WordprocessingDocumentWithBookmarks_CorrectRunsReturned()
// Arrange.
// Create a new Word document on a Stream, using the test w:document
// as the main document part.
Stream stream = CreateWordprocessingDocument(Document);
// Open the WordprocessingDocument on the Stream, using the Open XML SDK.
using WordprocessingDocument wordDocument = WordprocessingDocument.Open(stream, true);
// Get the w:document element from the main document part and find
// our bookmark.
XElement document = wordDocument.MainDocumentPart.GetXElement();
Bookmark bookmark = Bookmark.Find(document, BookmarkName);
// Act, getting the bookmarked runs.
IEnumerable<XElement> runs = bookmark.GetRuns();
// Assert.
Assert.Equal(new[] "Second", "Third", runs.Select(run => run.Value));
[Fact]
public void GetText_WordprocessingDocumentWithBookmarks_CorrectRunsReturned()
// Arrange.
// Create a new Word document on a Stream, using the test w:document
// as the main document part.
Stream stream = CreateWordprocessingDocument(Document);
// Open the WordprocessingDocument on the Stream, using the Open XML SDK.
using WordprocessingDocument wordDocument = WordprocessingDocument.Open(stream, true);
// Get the w:document element from the main document part and find
// our bookmark.
XElement document = wordDocument.MainDocumentPart.GetXElement();
Bookmark bookmark = Bookmark.Find(document, BookmarkName);
// Act, getting the concatenated text contents of the bookmarked runs.
string text = bookmark.GetValue();
// Assert.
Assert.Equal("SecondThird", text);
您可以在我的CodeSnippets GitHub 存储库中找到完整的代码示例。查找Bookmark 和BookmarkTests 类,注意我使用的是Open-Xml-PowerTools。
显然,您可以使用这些 Open XML 元素做更复杂的事情。这只是一个简单的例子。
【讨论】:
以上是关于OpenXML - 将书签应用于 Word 文档中的段落的主要内容,如果未能解决你的问题,请参考以下文章
使用 OpenXML SDK 2.0 将页眉和页脚添加到现有的空 word 文档