C#過濾html標籤的方法
阿新 • • 發佈:2020-09-09
使用C#過濾掉字串中所有的html標籤,只留下正文
using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; namespace RegexTestWin { public class ReplaceHtml { private IList<Regex> _regexs = new List<Regex>(); private IList<string> _replacement = newList<string>(); private static ReplaceHtml _replaceHtml = null; private static readonly object _object = new object(); private ReplaceHtml() { } public static ReplaceHtml Instance { get { if (_replaceHtml == null) { lock (_object) { if (_replaceHtml == null) { _replaceHtml = SetInstance(new ReplaceHtml()); } } }return _replaceHtml; } } public Regex GetRegex(int count) { return _regexs[count]; } public string GetReplacement(int count) { return _replacement[count]; } public int GetReplacementCount() { return _replacement.Count; } public string ReplaceHtmlTag(string Htmlstring) { Htmlstring = Htmlstring.Replace("\r\n", ""); Regex aRegex = null; for (int count = 0; count < this.GetReplacementCount(); count++) { aRegex = this.GetRegex(count); if (aRegex != null) { Htmlstring = aRegex.Replace(Htmlstring, this.GetReplacement(count), -1, 0); } } Htmlstring = Htmlstring.Replace("<", ""); Htmlstring = Htmlstring.Replace(">", ""); Htmlstring = Htmlstring.Replace("\r\n", ""); return Htmlstring; } private void AddRegex(Regex aRegex, string Replacement) { _regexs.Add(aRegex); _replacement.Add(Replacement); } private static ReplaceHtml SetInstance(ReplaceHtml aReplaceHtml) { #region 賦值正則表示式和替換後的字元陣列 string[] pattern = new string[] { @"<script.*?</script>",@"<style.*?</style>",@"<.*?>", @"<(.[^>]*)>",@"([\r\n])[\s]+",@"-->", @"<!--.*",@"&(quot|#34);",@"&(amp|#38);", @"&(lt|#60);",@"&(gt|#62);",@"&(nbsp|#160);", @"&(iexcl|#161);",@"&(cent|#162);",@"&(pound|#163);", @"&(copy|#169);",@"&#(\d+);" }; string[] replacement = new string[] { "","","","","","","","\"","&","<",">","","\xa1","\xa2","\xa3","\xa9","" }; #endregion if (pattern.Length != replacement.Length) { throw new Exception("正則表示式陣列和替換後的字元陣列的長度不一致!"); } int count = 0; //計數器 foreach (string str in pattern) { Regex aRegex = new Regex(str); aReplaceHtml.AddRegex(aRegex, replacement[count]); count += 1; } return aReplaceHtml; } } }