正規表現を使用して HTML コードを削除する

著者：Eve Cole 更新時間：2009-07-01 15:56:10

using System.Text. RegularExpressions;// 参照が必要

// 利用正则表达式去掉"<"和">" 之间的内容
プライベート文字列 StripHT(文字列 strHtml)
{
Regex regex=new Regex("<.+?>",RegexOptions.IgnoreCase);
文字列 strOutput=regex.Replace(strHtml,"");
strOutput を返します。
}

// 方法二(不明であることをこの方法がCPU100%占有)

public static string DropHTML(string strHtml)
{
文字列 [] aryReg ={
@"<script[^>]*?>.*?</script>",
@"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""''])( \["" ''tbnr]|[^7])*?7|w+)|.{0})|s)*?(/s *)?>",
@"([r])[s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(セント|#162);",
@"&(ポンド|#163);",
@"&(コピー|#169);",
@"&#(d+);",
@"-->",
@「<!--.*」
};

文字列 [] aryRep = {
「」、
「」、
「」、
"""、
「＆」、
「<」、
>"、
「」、
"xa1",//chr(161),
"xa2",//chr(162),
"xa3",//chr(163),
"xa9",//chr(169),
「」、
"r"、
「」
};

文字列 newReg =aryReg[0];
文字列strOutput=strHtml;
for(int i = 0;i<aryReg.Length;i++)
{
正規表現 regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
strOutput = regex.Replace(strOutput,aryRep[i]);

strOutput.Replace("<","")

;

strOutput.Replace(">","");
strOutput.Replace("r","");
strOutput を返します。

}
http://www.cnblogs.com/wang123/archive/2006/09/16/505758.html