C#正則表示式入門(下)
一、匹配郵政編碼,郵政編碼為6位數字組成。
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"^\d{6}$",RegexOptions.None);
Console.WriteLine(reg.IsMatch(code)?"匹配成功!":"匹配失敗!");
二、匹配數值
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"^\d+.?\d*$", RegexOptions.None);
Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失敗!");
三、去掉字串中的所有空格
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"\s+", RegexOptions.None);//匹配所有空格符
Console.WriteLine("去掉空格符後的字串為:"+reg.Replace(code, ""));
Console.WriteLine("原始字串長度為:"+code.Length);
reg = new Regex(@"\s+$");//匹配尾部空格符
code = reg.Replace(code, "");
Console.WriteLine("去掉尾部空格符後的長度為:"+code.Length);
reg = new Regex(@"^\s+");//匹配首部空格符
code = reg.Replace(code, "");
Console.WriteLine("去掉首部空格符後的長度為:" + code.Length);
注:上面所說的空格符包括、製表符等。
四、提取輸入字串中的所有合法的數值
下面程式用於提取所有的整數或者浮點數
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"\d+|\d+.?\d*", RegexOptions.Singleline);//單行模式
MatchCollection mc = reg.Matches(code,0);//從首個字元開始匹配
int counter=0;
foreach (Match mt in mc)
{
Console.WriteLine("數值【" + (++counter) + "】:" + mt.Value);
}
五、身份證號碼驗證
身份證號碼為15位全數字,或者為18位,前17位為數字,最後一位為0-9的數字或者字元”x”、”X”。
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"^\d{15}$|^\d{17}[0-9xX]$", RegexOptions.Singleline);//單行模式
Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失敗!");
六、獲取一個網頁中包括的所有URL
包含的名字空間如下:
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Collections;
static void Main(string[] args)
{
Hashtable myUrl = new Hashtable();
string pageContent;
pageContent = GetWebPageContent(@"http://www.baidu.com");
Regex reg = new Regex(@"http(s)?://[-\w]+(\.\w[-\w]*)+", RegexOptions.Singleline);//單行模式
MatchCollection mc = reg.Matches(pageContent,0);
Console.WriteLine("提取網址數量:"+mc.Count);
int counter = 0;
foreach (Match mt in mc)
{
if (!myUrl.ContainsValue(mt.Value))
{
myUrl.Add(++counter, mt.Value);
}
}
Console.WriteLine("去重後網址數量:" + counter);
foreach (DictionaryEntry url in myUrl)
{
Console.WriteLine("網址【"+url.Key+"】:"+url.Value);
}
}
/// <summary>
/// 獲取網頁內容
/// </summary>
/// <param name="url">網址</param>
/// <returns></returns>
private static string GetWebPageContent(string url)
{
try
{
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);
req.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";
req.Accept = "*/*";
req.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
req.ContentType = "text/xml";
HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
Encoding enc;
try
{
if (resp.CharacterSet.ToUpper() != "ISO-8859-1")
enc = Encoding.GetEncoding(resp.CharacterSet);
else
enc = Encoding.UTF8;
}
catch
{
// *** Invalid encoding passed
enc = Encoding.UTF8;
}
string sHTML = string.Empty;
using (StreamReader read = new StreamReader(resp.GetResponseStream(), enc))
{
sHTML = read.ReadToEnd();
}
return sHTML;
}
catch (Exception ex)
{
Console.WriteLine(ex.Message.ToString());
return null;
}
}
七、獲取圖片連結
Regex reg = new Regex("<img\\s+[^<>]*(src|data-src)=[^<>\\s]+(\\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//單行模式且不區分大小寫
MatchCollection mc = reg.Matches(strLine);
foreach (Match mt in mc)
{
Match tmp = Regex.Match(mt.Value, "(src|data-src)=(\")?(.*?)(\"|\\s|$)",RegexOptions.IgnoreCase);//再次匹配,獲取圖片連結
string res = tmp.Groups[3].Value;
if (res.Trim().Length > 1)
{
if (!myPicture.ContainsValue(res))
{
myPicture.Add(++counter, res);
}
}
}
八、分組的使用
string content = @"vqwsdvasdhttp://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html asdf?gr";
Regex reg1 = new Regex(@"http(s)?://([^/:]*)(.*?)\.html",RegexOptions.IgnoreCase);
Match mt = reg1.Match(content,0);
Console.WriteLine(mt.Value);//列印結果:http://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html
Console.WriteLine(mt.Groups[2].Value);//列印結果:202.38.193.153
Regex reg2 = new Regex(@":(\d{1,6})/", RegexOptions.IgnoreCase);
mt = reg2.Match(content);
Console.WriteLine(mt.Groups[1].Value);//列印結果:8000
content = "<p><img class=news-smallimg-img height=40 width=68 src=\"http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959.jpg\" alt=\"\" /></p>";
Regex reg = new Regex(@"<img\s+[^<>]*(src|data-src)=\s*""?(([^<>\s])+)(\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//單行模式且不區分大小寫
mt = reg.Match(content);
Console.WriteLine(mt.Groups[1].Value);//列印src
Console.WriteLine(mt.Groups[2].Value);//列印http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959
Console.WriteLine(mt.Groups[4].Value);//列印.jpg
Console.WriteLine(mt.Groups[5].Value);//列印jpg
九、環視的使用
【例1】要求匹配teacher,但是不能匹配teachers,teacher’s等形式。
string content = "you i last teacher * 800 <,?ojJOa sd teachers y628 TEACHER'S";
Regex reg=new Regex(@"teacher(?=\s)",RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(content);
foreach (Match mt in mc)
{
Console.WriteLine(mt.Value);
}
【例2】要求找出所有含teach的單詞
string content = "you i last teacher * 800 teaching <,?ojJOa sd teachers y628 TEACHER'S";
Regex reg=new Regex(@"teach((?!\s).)*",RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(content);
foreach (Match mt in mc)
{
Console.WriteLine(mt.Value);
}
【例3】將一個較大的數值每3位用一個逗號隔開
string content = "The population of 298444215 is growing";
Regex reg = new Regex(@"(?<=\d)(?=(\d\d\d)+(\s|\D|$))", RegexOptions.IgnoreCase);
string res = reg.Replace(content,",");
Console.WriteLine(res);
十、其它
【例1】最左最長規則
string source = "oneselfsufficient";
Regex reg = new Regex("one(self)?(selfsufficient)?");
Console.WriteLine(reg.Match(source).Value);//列印oneself
【例2】多分支結構按照順序優先匹配
string source = "oneselfsufficient";
Regex reg = new Regex("one(self|selfsufficient)");
Console.WriteLine(reg.Match(source).Value);//列印oneself
reg = new Regex("one(selfsufficient|self)");
Console.WriteLine(reg.Match(source).Value);//列印oneselfsufficient
注:.Net使用的正則表示式引擎型別為傳統NFA,對於多分支結構,按照順序優先匹配。