C# 清除文本中的HTML标签
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了C# 清除文本中的HTML标签相关的知识,希望对你有一定的参考价值。
/// <summary>
/// 清除文本中html的标签
/// </summary>
/// <param name="Content"></param>
/// <returns></returns>
public
static
string
ClearHtml(
string
Content)
{
Content = Zxj_ReplaceHtml(
"&#[^>]*;"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?marquee[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?object[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?param[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?embed[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?table[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
" "
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?tr[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?th[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?p[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?a[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?img[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?tbody[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?li[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?span[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?div[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?th[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?td[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?script[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"on(mouse|exit|error|click|key)"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"<\\?xml[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"<\\/?[a-z]+:[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?font[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?b[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?u[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?i[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?strong[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
"</?strong[^>]*>"
,
""
, Content);
Content = Zxj_ReplaceHtml(
" "
,
""
, Content);
Regex r =
new
Regex(
@"\s+"
);
Content = r.Replace(Content,
""
);
Content.Trim();
string
clearHtml = Content;
return
clearHtml;
}
/// <summary>
/// 清除文本中的Html标签
/// </summary>
/// <param name="patrn">要替换的标签正则表达式</param>
/// <param name="strRep">替换为的内容</param>
/// <param name="content">要替换的内容</param>
/// <returns></returns>
private
static
string
Zxj_ReplaceHtml(
string
patrn,
string
strRep,
string
content)
{
if
(
string
.IsNullOrEmpty(content))
{
content =
""
;
}
Regex rgEx =
new
Regex(patrn, RegexOptions.IgnoreCase);
string
strTxt = rgEx.Replace(content, strRep);
return
strTxt;
}
以上是关于C# 清除文本中的HTML标签的主要内容,如果未能解决你的问题,请参考以下文章