正則表示式，去除所有HTML標籤

阿新 • • 發佈：2019-01-17

protected string str = "<table><tr><td>sdasasdsdd</td></tr></table><br><p>sds</p><img id='img1' src='http://www.baidu.com/img/baidu_logo.gif' width='100' height='50' alt=''>aaassss<br><img src='http://www.baidu.com/img/baidu_logo.gif' width='100' height='50' alt=''> 說是道 ";

protected void Page_Load(object sender, EventArgs e)
{

//string regexstr = @"<[^>]*>"; //去除所有的標籤

        //@"<script[^>]*?>.*?</script>" //去除所有指令碼，中間部分也刪除

        // string regexstr = @"<img[^>]*>";   //去除圖片的正則

// string regexstr = @"<(?!br).*?>"; //去除所有標籤，只剩br

// string regexstr = @"<table[^>]*?>.*?</table>"; //去除table裡面的所有內容

        string regexstr = @"<(?!img|br|p|/p).*?>";   //去除所有標籤，只剩img,br,p

        str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);

}

ASP.NET 去除所有HTML標記 < type="text/

JavaScript">function StorePage(){d=document;t=d.selection?(d.selection.type!='None'?d.selection.createRange().text:''):(d.getSelection?d.getSelection():'');void(keyit=window.open('http://www.365key.com/storeit.aspx?t='+escape(d.title)+'&u='+escape(d.location.href)+'&c='+escape(t),'keyit','scrollbars=no,width=475,height=575,left=75,top=20,status=no,resizable=yes'));keyit.focus();}
注意:需要先using System.Text.RegularExpressions;

/**////   <summary>
///   去除HTML標記
///   </summary>
///   <param   name="NoHTML">包括HTML的原始碼   </param>
///   <returns>已經去除後的文字</returns>
public   static   string   NoHTML(string   Htmlstring)
{
//刪除指令碼
Htmlstring   =   Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase);
//刪除HTML
Htmlstring   =   Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);

Htmlstring   =   Regex.Replace(Htmlstring,@"&(quot|#34);","\"",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(nbsp|#160);","   ",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",RegexOptions.IgnoreCase);
Htmlstring   =   Regex.Replace(Htmlstring,   @"&#(\d+);","",RegexOptions.IgnoreCase);

Htmlstring.Replace("<","");
Htmlstring.Replace(">","");
Htmlstring.Replace("\r\n","");
Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

return   Htmlstring;
}

/**////提取HTML程式碼中文字的C#函式
///   <summary>
///   去除HTML標記
///   </summary>
///   <param   name="strHtml">包括HTML的原始碼   </param>
///   <returns>已經去除後的文字</returns>
using   System;
using   System.Text.RegularExpressions;
public   class   StripHTMLTest{
      public   static   void   Main(){
          string   s=StripHTML("<HTML><HEAD><TITLE>中國石龍資訊平臺</TITLE></HEAD><BODY>faddfs龍資訊平臺</BODY></HTML>");
          Console.WriteLine(s);
      }

      public   static   string   StripHTML(string   strHtml){
          string   []   aryReg   ={
                      @"<script[^>]*?>.*?</script>",

                      @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(http://www.cnblogs.com/xchit/admin/file://[%22%22'tbnr]%7c[%5e/7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
                      @"([\r\n])[\s]+",
                      @"&(quot|#34);",
                      @"&(amp|#38);",
                      @"&(lt|#60);",
                      @"&(gt|#62);",
                      @"&(nbsp|#160);",
                      @"&(iexcl|#161);",
                      @"&(cent|#162);",
                      @"&(pound|#163);",
                      @"&(copy|#169);",
                      @"&#(\d+);",
                      @"-->",
                      @"<!--.*\n"
                    };

          string   []   aryRep   =   {
                        "",
                        "",
                        "",
                        "\"",
                        "&",
                        "<",
                        ">",
                        "   ",
                        "\xa1",//chr(161),
                        "\xa2",//chr(162),
                        "\xa3",//chr(163),
                        "\xa9",//chr(169),
                        "",
                        "\r\n",
                        ""
                      };

          string   newReg   =aryReg[0];
          string   strOutput=strHtml;
          for(int   i   =   0;i<aryReg.Length;i++){
              Regex   regex   =   new   Regex(aryReg[i],RegexOptions.IgnoreCase);
              strOutput   =   regex.Replace(strOutput,aryRep[i]);
          }
          strOutput.Replace("<","");
          strOutput.Replace(">","");
          strOutput.Replace("\r\n","");
          return   strOutput;
      }
}

寫一個靜態方法
移除HTML標籤#region   移除HTML標籤
/**////   <summary>
///   移除HTML標籤
///   </summary>
///   <param   name="HTMLStr">HTMLStr</param>
public   static   string     ParseTags(string   HTMLStr)
{
return   System.Text.RegularExpressions.Regex.Replace(HTMLStr,   "<[^>]*>",   "");
}

#endregion

                  取出文字中的圖片地址#region   取出文字中的圖片地址
                  /**////   <summary>
                  ///   取出文字中的圖片地址
                  ///   </summary>
                  ///   <param   name="HTMLStr">HTMLStr</param>
                  public   static   string   GetImgUrl(string   HTMLStr)
                  {
                          string   str   =   string.Empty;
                          string   sPattern   =   @"^<img\s+[^>]*>";
                          Regex   r   =   new   Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",
                                  RegexOptions.Compiled);
                          Match   m   =   r.Match(HTMLStr.ToLower());
                          if   (m.Success)
                                  str   =   m.Result("${url}");
                          return   str;
                  }

                  #endregion

protected void Page_Load(object sender, EventArgs e)
{

//string regexstr = @"<[^>]*>"; //去除所有的標籤

        //@"<script[^>]*?>.*?</script>" //去除所有指令碼，中間部分也刪除

        // string regexstr = @"<img[^>]*>";   //去除圖片的正則

// string regexstr = @"<(?!br).*?>"; //去除所有標籤，只剩br

// string regexstr = @"<table[^>]*?>.*?</table>"; //去除table裡面的所有內容

        string regexstr = @"<(?!img|br|p|/p).*?>";   //去除所有標籤，只剩img,br,p

        str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);

}

ASP.NET 去除所有HTML標記 < type="text/JavaScript">function StorePage(){d=document;t=d.selection?(d.selection.type!='None'?d.selection.createRange().text:''):(d.getSelection?d.getSelection():'');void(keyit=window.open('http://www.365key.com/storeit.aspx?t='+escape(d.title)+'&u='+escape(d.location.href)+'&c='+escape(t),'keyit','scrollbars=no,width=475,height=575,left=75,top=20,status=no,resizable=yes'));keyit.focus();}
注意:需要先using System.Text.RegularExpressions;

正則表示式，去除所有HTML標籤

正則表示式，去除所有HTML標籤

利用正則表達式去除所有html標簽，只保留文字

正則表示式，匹配所有非中文字元

Python正則表示式過濾或者替換HTML標籤的方法

PHP正則表示式匹配巢狀HTML標籤的方法和技巧

js正則表示式，去除首尾多餘的空格或者換行

java/android 正則表示式去除所有HTML標籤

Python通過正則表示式獲取,去除(過濾)或者替換HTML標籤的幾種方法(本文由169it.com蒐集整理)

Python 正則表示式，search(不要求從開頭匹配)，findall(匹配所有)，sub(替換)，split(切割)

正則表示式，js去除空格

Javascript：匹配所有“\r\n”的正則表示式，把換行符替換成 br

php讀取富文字編輯器編輯後的文章內容，並去除所有html標籤、空格以及空白，擷取字串（包括中文）

正則表達式去除所有非ASCII字符

python3 學習5 正則表示式，re模組學習

SQL語句正則表示式匹配(獲取) 所有表名

day023正則表示式，re模組，簡單爬蟲和多頁面爬蟲（幹掉數字簽名證書驗證）

ACCESS 資料庫不支援正則表示式，如何用SQL語句查詢表中既去重複的有隻查是數字或字母的欄位！

python正則表示式，簡單的郵箱格式驗證

Python 正則表示式，re模組，match匹配(預設從開頭匹配)，分組

常用正則表示式，手機號，郵箱，網址

正則表示式，去除所有HTML標籤

相關推薦