1. 程式人生 > 其它 >PowerShell 分割和合並大檔案

PowerShell 分割和合並大檔案

技術標籤:powershellpowershell

PowerShell 分割和合並大檔案

技術要點

  1. 處理的一般都是大檔案,所以使用.NET 中 FileStream 物件,因為流處理可以提高效能。
  2. 將緩衝區設定為1M-50M,當分割的單個檔案大小超過1G時,使用50M記憶體,小於等於1M時,使用1M,其餘按比例增加力求節省記憶體。
  3. 暫時不考慮使用並行處理,因為在此場景中效能更多由硬碟的讀取速度決定。
  4. 分割出的檔案在原始檔名稱後追加_part_001 這樣的格式,方便在合併前按照升序排序。
# 先匯入 函式 Split-Merge-File
. 'E\Split-Merge-File.ps1' #注意句號和指令碼之間有空格
# 將檔案 ‘E:\win2012.vhdx’ 分割成20個小檔案,輸出至目錄 'E:\VHD' Split-File -File 'E:\win2012.vhdx' -ByPartCount -PartCount 20 -OutputDir 'E:\VHD' # 將件‘E:\win2012.vhdx’按照每個大小 500MB 來分割,輸出至目錄 'E:\VHD' Split-File -File 'E:\win2012.vhdx' -ByPartLength -PartLength 500MB -OutputDir 'E:\VHD' # 將 'E:\VHD' 目錄下包含 part 的所有檔案合併,輸出為 單個檔案 'E:\win2012-2.vhdx'
Merge- File -SourceDir 'E:\VHD' -Filter "*part*" -OutputFile 'E:\win2012-2.vhdx
# Obtain a suitable buffer length by partial file length
function Get-BufferLength ([int]$partialFileLength)
{
    [int]$MinBufferLength = 1MB
    # No need to consume great amount memory,initialize as 50M, you can adjust it from here.
[int]$MaxBufferLength = 50MB if($partialFileLength -ge 1GB) { return $MaxBufferLength} elseif( $partialFileLength -le 50MB) { return $MinBufferLength } else{ return [int]( $MaxBufferLength/1GB * $partialFileLength )} } # Write partial stream to file from current position function Write-PartialStreamToFile { param( [IO.FileStream]$stream, [long]$length, [string]$outputFile ) #copy stream to file function Copy-Stream( [int]$bufferLength ) { [byte[]]$buffer = New-Object byte[]( $bufferLength ) # Read partial file data to memory buffer $stream.Read($buffer,0,$buffer.Length) | Out-Null # Flush buffer to file $outStream = New-Object IO.FileStream($outputFile,'Append','Write','Read') $outStream.Write($buffer,0,$buffer.Length) $outStream.Flush() $outStream.Close() } $maxBuffer = Get-BufferLength $length $remBuffer = 0 $loop = [Math]::DivRem($length,$maxBuffer,[ref]$remBuffer) if($loop -eq 0) { Copy-Stream $remBuffer return } 1..$loop | foreach { $bufferLength = $maxBuffer # let last loop contains remanent length if( ($_ -eq $loop) -and ($remBuffer -gt 0) ) { $bufferLength = $maxBuffer + $remBuffer } Copy-Stream $bufferLength # show outer progress $progress = [int]($_*100/$loop) write-progress -activity 'Writting file' -status 'Progress' -id 2 -percentcomplete $progress -currentOperation "$progress %" } } # split a large file into mutiple parts by part count or part length function Split-File { param( [Parameter(Mandatory=$True)] [IO.FileInfo]$File, [Switch]$ByPartCount, [Switch]$ByPartLength, [int]$PartCount, [int]$PartLength, [IO.DirectoryInfo]$OutputDir = '.' ) # Argument validation if(-not $File.Exists) { throw "Source file [$File] not exists" } if(-not $OutputDir.Exists) { mkdir $OutputDir.FullName | Out-Null} if( (-not $ByPartCount) -and (-not $ByPartLength) ) { throw 'Must specify one of parameter, [ByPartCount] or [ByPartLength]' } elseif( $ByPartCount ) { if($PartCount -le 1) {throw '[PartCount] must larger than 1'} $PartLength = $File.Length / $PartCount } elseif( $ByPartLength ) { if($PartLength -lt 1) { throw '[PartLength] must larger than 0' } if($PartLength -ge $File.Length) { throw '[PartLength] must less than source file' } $temp = $File.Length /$PartLength $PartCount = [int]$temp if( ($File.Length % $PartLength) -gt 0 -and ( $PartCount -lt $temp ) ) { $PartCount++ } } $stream = New-Object IO.FileStream($File.FullName, [IO.FileMode]::Open ,[IO.FileAccess]::Read ,[IO.FileShare]::Read ) # Make sure each part file name ended like '001' so that it's convenient to merge [string]$numberMaskStr = [string]::Empty.PadLeft( [int]([Math]::Log10($PartCount) + 1), "0" ) 1 .. $PartCount | foreach { $outputFile = Join-Path $OutputDir ( "{0}.part_{1} " -f $File.Name , $_.ToString( $numberMaskStr ) ) # show outer progress $progress = [int]($_*100/$PartCount) write-progress -activity "Splitting file" -status "Progress $progress %" -Id 1 -percentcomplete $progress -currentOperation "Handle file $outputFile" if($_ -eq $PartCount) { Write-PartialStreamToFile $stream ($stream.Length - $stream.Position) $outputFile } else { Write-PartialStreamToFile $stream $PartLength $outputFile } } $stream.Close() } function Merge-File { param( [Parameter(Mandatory=$True)] [IO.DirectoryInfo]$SourceDir, [string]$Filter, [IO.FileInfo]$OutputFile ) # arguments validation if ( -not $SourceDir.Exists ) { throw "Directory $SourceDir not exists." } $files = dir $SourceDir -File -Filter $Filter if($files -eq $null){ throw "No matched file in directory $SourceDir"} # output stream $outputStream = New-Object IO.FileStream($OutputFile.FullName, [IO.FileMode]::Append ,[IO.FileAccess]::Write ,[IO.FileShare]::Read ) # merge file $files | foreach{ #input stream $inputStream = New-Object IO.FileStream($_.FullName, [IO.FileMode]::Open ,[IO.FileAccess]::Read ,[IO.FileShare]::Read ) $bufferLength = Get-BufferLength -partialFileLength $_.Length while($inputStream.Position -lt $inputStream.Length) { if( ($inputStream.Position + $bufferLength) -gt $inputStream.Length) { $bufferLength = $inputStream.Length - $inputStream.Position } # show outer progress $progress = [int]($inputStream.Position *100/ $inputStream.Length) write-progress -activity 'Merging file' -status "Progress $progress %" -percentcomplete $progress # read file to memory buffer $buffer= New-Object byte[]( $bufferLength ) $inputStream.Read( $buffer,0,$buffer.Length) | Out-Null #flush buffer to file $outputStream.Write( $buffer,0,$buffer.Length) | Out-Null $outputStream.Flush() } $inputStream.Close() } $outputStream.Close() }