記一次XML檔案讀取優化
背景
最近在接手公司的一個老專案的時候發現開啟軟體介面的時候MenuItem的子項Items資料載入極其緩慢,造成點選介面的時候介面卡頓,最後分析原因是由於在讀取XML的時候沒有充分考慮到效能問題從而造成讀取操作效能低下,後面經過優化後能夠快速提高效率並最終解決介面卡頓的問題,這篇文章就將整個過程記錄下來從而便於對整個知識有一個更深入的理解。
using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; using System.Xml; namespace WpfApplicationDemo { /// <summary> /// Project name /// <summary> /// Deployment information definition /// </summary> public class DeploymentInfo { /// <summary> /// Deployment name /// </summary> public string DeploymentName { get; set; } /// <summary> /// Comment /// </summary> public string Comment { get; set; } /// <summary> /// Whether deployment is actived? /// </summary> public bool IsDeploymentActived { get; set; } } /// <summary> /// Project information definition /// </summary> public class ProjectInfo { /// </summary> public string ProjectName { get; set; } /// <summary> /// The first letter of ProjectName,which is used for group projectInfo /// if first letter is not letter then return Others as the group /// </summary> public string FirstProjectNameLetter => ProjectName.Length > 1 ? Regex.IsMatch(ProjectName.Substring(0, 1).ToUpper(), "^[a-zA-Z]") ? ProjectName.Substring(0, 1).ToUpper() : "Others" : string.Empty; /// <summary> /// Whether current project is actived? /// </summary> public bool IsProjectActived { get; set; } /// <summary> /// Last modification time /// </summary> public DateTime LastModifyTime { get; set; } /// <summary> /// Last open time /// </summary> public DateTime? LastOpenTime { get; set; } /// <summary> /// Deployment names in current project /// </summary> public List<DeploymentInfo> Deployments { get; set; } } public class ECSHelper { /// <summary> /// Get all existing project information /// </summary> /// <returns></returns> public static List<ProjectInfo> GetExistingProjectInfo() { string activedProjectName = GetActivedProjectName(); string activedDeploymentName = GetActivedDeploymentName(); string projectFolder = MachineConfig.GetEcsProjectDirectory(); var proDirectory = new System.IO.DirectoryInfo(projectFolder); var projectDirArray = proDirectory.GetDirectories("*", System.IO.SearchOption.TopDirectoryOnly).Where(p => File.Exists(p.FullName + "\\ToolControl.config")).ToArray(); //待返回的資料集合 List<ProjectInfo> projectInfoList = new List<ProjectInfo>(); if (projectDirArray != null && projectDirArray.Length > 0) { foreach (var pd in projectDirArray) { try { var fileInfo = new FileInfo(pd.FullName + "\\ToolControl.config"); var projectInfo = new ProjectInfo { ProjectName = pd.Name, Deployments = new List<DeploymentInfo>(), IsProjectActived = String.CompareOrdinal(activedProjectName, pd.Name) == 0, LastModifyTime = fileInfo.LastWriteTime }; var toolCtrlXml = new XmlDocument(); toolCtrlXml.Load(pd.FullName + "\\ToolControl.config"); var deploymentNodes = toolCtrlXml.SelectNodes("/configuration/Ecs/Deployment/Item"); foreach (XmlElement dn in deploymentNodes) { string deploymentName = dn.InnerText; projectInfo.Deployments.Add(new DeploymentInfo { DeploymentName = deploymentName, IsDeploymentActived = String.Compare(activedDeploymentName, deploymentName) == 0 && projectInfo.IsProjectActived }); } projectInfoList.Add(projectInfo); } catch (Exception ex) { throw new Exception(ex.Message); } } } return projectInfoList; } } }
上面的程式碼最終是要讀取下面的一個資料夾中每一個子資料夾中的ToolControl.Config檔案,具體檔案結構如下面截圖所示。
圖一 待讀取的資料夾結構
我們再來看看每個資料夾中的關鍵檔案ToolControl.Config檔案的結構
<configuration> <Security> <EncryptType>NoEncryption</EncryptType> <ProtectionSignature>f95381766e04b634689eec775a23633f</ProtectionSignature> <EncryptionSignature>967fe853d0528878e1449f9cf9db1c6b</EncryptionSignature> </Security> <Ecs> <Header> <Description> </Description> </Header> <Deployment> <Item Comment="">Release</Item> <Item Comment="">Simulation</Item> </Deployment> <ToolControl> <Group Name="TestGroup" Comment="20200911"> <Component Name="BPSI30IoPump" Type="Levitronix.Hardware.Pump.BPSI30IoPump,Levitronix.Hardware.dll" Comment=""> <Deployment>Any</Deployment> <Properties> <Property Name="EnableMaxSpeedLimitCheck">true</Property> <Property Name="AOIndex_ControlOutput">-1</Property> <Property Name="AIIndex_SpeedProcessReading">2</Property> <Property Name="DOIndex_Mode">-1</Property> <Property Name="DOIndex_EnableReset">-1</Property> <Property Name="DIIndex_PumpState">-1</Property> <Property Name="DIIndex_PumpError">-1</Property> <Property Name="ReadingSpeedScale">16000</Property> <Property Name="ReadingPressureScale">60</Property> <Property Name="SettingSpeedScale">16000</Property> <Property Name="SettingPressureScale">60</Property> </Properties> <ParameterAccess> <Parameter Name="ChamberAlarmAction" Privilege="Enginner" /> <Parameter Name="SystemAlarmAction" Privilege="Enginner" /> <Parameter Name="Enable" Privilege="Enginner" /> <Parameter Name="AutoStart" Privilege="Enginner" /> <Parameter Name="DefaultProcessRampTime" Privilege="Enginner" /> <Parameter Name="DefaultSpeedRampTime" Privilege="Enginner" /> <Parameter Name="MaxSpeedLimit" Privilege="Enginner" /> <Parameter Name="DefaultControlMode" Privilege="Enginner" /> <Parameter Name="DefaultPressureTarget" Privilege="Enginner" /> <Parameter Name="DefaultSpeedTarget" Privilege="Enginner" /> </ParameterAccess> <VariablePublish> <Variable Name="ReadingProcessData" Publish="False" /> <Variable Name="IsRunning" Publish="False" /> <Variable Name="IsError" Publish="False" /> <Variable Name="ComponentFullPath" Publish="False" /> </VariablePublish> </Component> </Group> </ToolControl> <UserInterface /> </Ecs> </configuration>
看上面的程式碼的思路是讀取每一個資料夾中的配置檔案並通過XmlDocument去Load每一個ToolControl.Config檔案,由於在我們的軟體中資料夾非常多而且ToolControl.Config檔案由於不同的專案配置不同,有些資料夾下面這個配置檔案是非常龐大的,所以整個進行迴圈並且使用XmlDocument去Load每一個檔案的時候其實效能是非常受影響的,所以針對上面的程式碼我做了如下修改,程式碼在效率方面得到了巨大的提升。
優化
首先我們來看看我們的優化程式碼
/// <summary> /// Get all existing project information /// </summary> /// <returns></returns> public static List<ProjectInfo> GetExistingProjectInfoEx() { string activedProjectName = GetActivedProjectName(); string activedDeploymentName = GetActivedDeploymentName(); string projectFolder = MachineConfig.GetEcsProjectDirectory(); var proDirectory = new System.IO.DirectoryInfo(projectFolder); var projectDirArray = proDirectory.GetDirectories("*", System.IO.SearchOption.TopDirectoryOnly).Where(p => File.Exists(p.FullName + "\\ToolControl.config")).ToArray(); List<ProjectInfo> projectInfoList = new List<ProjectInfo>(); if (projectDirArray != null && projectDirArray.Length > 0) { var projectBags = new ConcurrentBag<ProjectInfo>(); ParallelLoopResult result = Parallel.ForEach(projectDirArray, pd => { var projectInfo = new ProjectInfo { ProjectName = pd.Name, Deployments = new List<DeploymentInfo>(), IsProjectActived = String.CompareOrdinal(activedProjectName, pd.Name) == 0, LastModifyTime = new FileInfo(pd.FullName + "\\ToolControl.config").LastWriteTime }; //逐一解析Deploy子節點 ParseDeployment(pd,projectInfo,activedDeploymentName); projectBags.Add(projectInfo); }); } return projectInfoList; } private static void ParseDeployment(DirectoryInfo pd, ProjectInfo projectInfo, string activedDeploymentName) { var settings = new XmlReaderSettings() { IgnoreComments = true, IgnoreWhitespace = true }; using (var xmlReader = XmlReader.Create(pd.FullName + "\\ToolControl.config", settings)) { while (xmlReader.Read()) { if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "Deployment") { XElement eleNode = XNode.ReadFrom(xmlReader) as XElement; IEnumerable<XElement> elements = eleNode.Elements("Item"); foreach (var ele in elements) { string deploymentName = ele.Value; projectInfo.Deployments.Add(new DeploymentInfo() { DeploymentName = deploymentName, IsDeploymentActived = String.Compare(activedDeploymentName, deploymentName) == 0 && projectInfo.IsProjectActived }); } } } } }
這段程式碼主要從兩個方面進行優化,由於這些資料夾都是彼此獨立的互相之間沒有依賴關係,所以我們這裡使用並行Parallel.ForEach來進行並行讀取每一個資料夾這樣能夠很大程度上提高整個程式碼的讀取效率,另外一個很重要的方面就是使用XmlReader來取代XmlDocument,這個主要的優點就在於XmlReader能夠邊載入邊讀這樣比XmlDocument一次性讀取檔案效率更高,另外關於XmlReader進行讀取的操作過程可以參考這篇文章的分析。