HM編碼器程式碼閱讀(12)——CU編碼
阿新 • • 發佈:2019-01-30
總結,其實xCompressCU的作用就是從LCU開始深度遍歷,計算每一個depth上最優的模式,再綜合比較各個depth上最優的模式,選出最優的模式
為了便於理解把xCompressCU的一些無關程式碼刪除,下面是精簡版的xCompressCU
/* ** 壓縮CU的內部函式 */ #if AMP_ENC_SPEEDUP // 編碼加速巨集 Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth, PartSize eParentPartSize ) #else Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth ) #endif { Int iBaseQP = xComputeQP( rpcBestCU, uiDepth ); // 基本的量化步長32 // 最小的步長 Int iMinQP; // 最大的步長 Int iMaxQP; // 使用位元速率控制 // 注意這裡的QP使用了,位元速率控制物件計算出來的QP // 通過QP,位元速率控制物件控制了編碼器的位元率 if ( m_pcEncCfg->getUseRateCtrl() ) { iMinQP = m_pcRateCtrl->getRCQP(); iMaxQP = m_pcRateCtrl->getRCQP(); } // 刪除了無關程式碼(對理解預測沒有太大用處的程式碼)... /* ** 核心 */ if(!bSliceEnd && !bSliceStart && bInsidePicture ) { // 此迴圈測試每一種量化步長,計算率失真,選出最優的QP for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)//1次迴圈,iMinQP==iMaxQP { // 是否為無損模式 const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);// false if (bIsLosslessMode) { iQP = lowestQP; } // 初始化 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); // do inter modes, SKIP and 2Nx2N /* ** 在處理所有的其他模式之前,先處理幀間skip和2Nx2N的模式 ** 特別是對於2Nx2N的劃分,要分兩次處理: ** 1、嘗試merge模式——xCheckRDCostMerge2Nx2N ** 2、嘗試普通的幀間預測(即AMVP)——xCheckRDCostInter */ if( rpcBestCU->getSlice()->getSliceType() != I_SLICE ) { // skip模式處理 if(m_pcEncCfg->getUseEarlySkipDetection()) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );//by Competition for inter_2Nx2N } // merge模式 xCheckRDCostMerge2Nx2N( rpcBestCU, rpcTempCU, &earlyDetectionSkipMode );//by Merge for inter_2Nx2N rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); // 2Nx2N模式 if(!m_pcEncCfg->getUseEarlySkipDetection()) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode()) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } } if (bIsLosslessMode) { iQP = iMinQP; } } if(!earlyDetectionSkipMode) { // 在實際的處理過程當中,對LCU的劃分都是以4x4大小的塊進行劃分的,這是為了處理方便,然後以Z掃描的方式進行掃描,這也是為了方便遞迴 // 遍歷每一種量化步長 for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++) { const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP); if (bIsLosslessMode) { iQP = lowestQP; } rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); // do inter modes, NxN, 2NxN, and Nx2N /* ** 普通的幀間預測(普通的幀間預測就是AMVP)開始: ** 注意:這裡不再處理merge模式和普通幀間的2Nx2N劃分模式, ** 這是因為前面已經處理過2Nx2N的劃分模式了,merge模式只對於2Nx2N的劃分才有效 ** 因此下面的處理是沒有merge模式和2Nx2N的劃分模式的 */ if( rpcBestCU->getSlice()->getSliceType() != I_SLICE ) { // NxN模式的處理 if(!( (rpcBestCU->getWidth(0)==8) && (rpcBestCU->getHeight(0)==8) )) { if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth && doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_NxN ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); } } // Nx2N模式的處理 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_Nx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_Nx2N ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } // 2NxN的模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxN ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxN) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } #if 1 //! Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N) // 接下來是2NxnU、2NxnD、nLx2N、nRx2N的劃分模式的處理 /* ** 接下來的處理有點講究: ** 1、首先測試AMP_ENC_SPEEDUP巨集(表示是否加快編碼速度)是否開啟 ** 2、如果AMP_ENC_SPEEDUP巨集開啟 ** (1)預設情況下,如果TestAMP_Hor、TestAMP_Ver為真,那麼可以處理2NxnU、2NxnD、nLx2N、nRx2N這四種模式 ** (2)如果TestAMP_Hor、TestAMP_Ver為假,但是開啟了AMP_MRG巨集,而且TestMergeAMP_Hor、TestMergeAMP_Ver為真,那麼還是可以處理2NxnU、2NxnD、nLx2N、nRx2N這四種模式 ** 否則不再處理2NxnU、2NxnD、nLx2N、nRx2N這四種模式 ** (3)由於上面會根據一些條件來判斷是否需要處理2NxnU、2NxnD、nLx2N、nRx2N這四種模式,因此某些時候速度會快一點 ** 3、如果AMP_ENC_SPEEDUP關閉 ** 那麼直接處理2NxnU、2NxnD、nLx2N、nRx2N這四種模式,因為沒有了條件限制,這四種模式都要測試,因此,速度會慢一點 */ if( pcPic->getSlice(0)->getSPS()->getAMPAcc(uiDepth) ) { #if AMP_ENC_SPEEDUP Bool bTestAMP_Hor = false, bTestAMP_Ver = false; #if AMP_MRG Bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false; // 測試TestAMP_Hor和TestAMP_Ver是否為真 deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver); #else // else of AMP_MRG deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver); #endif // end of AMP_MRG //! Do horizontal AMP // TestAMP_Hor為真的話,可以使用2NxnU和2NxnD這兩種劃分模式 if ( bTestAMP_Hor ) { // 處理2NxnU模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } // 處理2NxnD模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } } #if AMP_MRG // TestMergeAMP_Hor為真的話可以使用2NxnU、2NxnD這兩種模式 else if ( bTestMergeAMP_Hor ) { // 處理2NxnU模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU, true ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } // 處理2NxnD模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD, true ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } } #endif // end of AMP_MRG //! Do horizontal AMP // TestAMP_Ver為真可以處理nLx2N、nRx2N兩種模式 if ( bTestAMP_Ver ) { // 處理nLx2N模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } // 處理nRx2N模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); } } #if AMP_MRG // TestMergeAMP_Ver為真可以處理nLx2N、nRx2N模式 else if ( bTestMergeAMP_Ver ) { // 處理nLx2N模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N, true ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N ) { doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0; } } // 處理nRx2N模式 if(doNotBlockPu) { xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N, true ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); } } #endif // end of AMP_MRG #else // else of AMP_ENC_SPEEDUP xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); #endif // end of AMP_ENC_SPEEDUP } #endif } // 幀間預測結束!!!! // do normal intra modes // speedup for inter frames // 幀內預測開始,幀內預測只有兩種劃分:2Nx2N、NxN if( rpcBestCU->getSlice()->getSliceType() == I_SLICE || rpcBestCU->getCbf( 0, TEXT_LUMA ) != 0 || rpcBestCU->getCbf( 0, TEXT_CHROMA_U ) != 0 || rpcBestCU->getCbf( 0, TEXT_CHROMA_V ) != 0 ) // avoid very complex intra if it is unlikely { // 幀內2Nx2N模式 xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); // 幀內NxN if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth )//g_uiMaxCUDepth=4,g_uiAddCUDepth=1 { if( rpcTempCU->getWidth(0) > ( 1 << rpcTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() ) ) { xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_NxN ); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); } } } // 幀內預測結束!!! // test PCM // 嘗試PCM模式 if(pcPic->getSlice(0)->getSPS()->getUsePCM() && rpcTempCU->getWidth(0) <= (1<<pcPic->getSlice(0)->getSPS()->getPCMLog2MaxSize()) && rpcTempCU->getWidth(0) >= (1<<pcPic->getSlice(0)->getSPS()->getPCMLog2MinSize()) ) { UInt uiRawBits = (2 * g_bitDepthY + g_bitDepthC) * rpcBestCU->getWidth(0) * rpcBestCU->getHeight(0) / 2; UInt uiBestBits = rpcBestCU->getTotalBits(); if((uiBestBits > uiRawBits) || (rpcBestCU->getTotalCost() > m_pcRdCost->calcRdCost(uiRawBits, 0))) { xCheckIntraPCM (rpcBestCU, rpcTempCU); rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); } } if (bIsLosslessMode) { iQP = iMinQP; } } } // 重置位元數 m_pcEntropyCoder->resetBits(); // 對分割標誌進行編碼 m_pcEntropyCoder->encodeSplitFlag( rpcBestCU, 0, uiDepth, true ); // 位元數量統計 rpcBestCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits rpcBestCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded(); // 總的消耗統計 rpcBestCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcBestCU->getTotalBits(), rpcBestCU->getTotalDistortion() ); // Early CU determination // HM15.0的配置中沒有使用早期的CU if( m_pcEncCfg->getUseEarlyCU() && rpcBestCU->isSkipped(0) ) { bSubBranch = false; } else { bSubBranch = true; } }//if(!bSliceEnd && !bSliceStart && bInsidePicture ) else if(!(bSliceEnd && bInsidePicture)) { bBoundary = true; } // 刪除了無關程式碼(對理解預測沒有太大用處的程式碼)... // 從最小量化步長到最大量化步長,遞迴處理子CU,然後選取最優的量化步長和最優劃分模式 for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++) { const Bool bIsLosslessMode = false; // False at this level. Next level down may set it to true. rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); // further split // 進一步的分割 if( bSubBranch && uiDepth < g_uiMaxCUDepth - g_uiAddCUDepth ) { UChar uhNextDepth = uiDepth+1; TComDataCU* pcSubBestPartCU = m_ppcBestCU[uhNextDepth]; TComDataCU* pcSubTempPartCU = m_ppcTempCU[uhNextDepth]; // 進一步的分割,當前CU又被劃分成為4個子CU for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++ ) { pcSubBestPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP ); // clear sub partition datas or init. pcSubTempPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP ); // clear sub partition datas or init. Bool bInSlice = pcSubBestPartCU->getSCUAddr()+pcSubBestPartCU->getTotalNumPart()>pcSlice->getSliceSegmentCurStartCUAddr()&&pcSubBestPartCU->getSCUAddr()<pcSlice->getSliceSegmentCurEndCUAddr(); if(bInSlice && ( pcSubBestPartCU->getCUPelX() < pcSlice->getSPS()->getPicWidthInLumaSamples() ) && ( pcSubBestPartCU->getCUPelY() < pcSlice->getSPS()->getPicHeightInLumaSamples() ) ) { if ( 0 == uiPartUnitIdx) //initialize RD with previous depth buffer { m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); } else { m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]); } #if AMP_ENC_SPEEDUP // 如果啟用了編碼加速選項 if ( rpcBestCU->isIntra(0) ) { xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth, SIZE_NONE ); } else { xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth, rpcBestCU->getPartitionSize(0) ); } #else // 沒有使用編碼加速選項 // 遞迴處理子CU xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth ); #endif rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth ); // Keep best part data to current temporary data. xCopyYuv2Tmp( pcSubBestPartCU->getTotalNumPart()*uiPartUnitIdx, uhNextDepth ); } else if (bInSlice) { pcSubBestPartCU->copyToPic( uhNextDepth ); rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth ); } } // 計算並更新最優的代價——begin if( !bBoundary ) { m_pcEntropyCoder->resetBits(); m_pcEntropyCoder->encodeSplitFlag( rpcTempCU, 0, uiDepth, true ); rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded(); } // 計算RD代價 rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() ); if( (g_uiMaxCUWidth>>uiDepth) == rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() && rpcTempCU->getSlice()->getPPS()->getUseDQP()) { Bool hasResidual = false; for( UInt uiBlkIdx = 0; uiBlkIdx < rpcTempCU->getTotalNumPart(); uiBlkIdx ++) { if( ( pcPic->getCU( rpcTempCU->getAddr() )->getSliceSegmentStartCU(uiBlkIdx+rpcTempCU->getZorderIdxInCU()) == rpcTempCU->getSlice()->getSliceSegmentCurStartCUAddr() ) && ( rpcTempCU->getCbf( uiBlkIdx, TEXT_LUMA ) || rpcTempCU->getCbf( uiBlkIdx, TEXT_CHROMA_U ) || rpcTempCU->getCbf( uiBlkIdx, TEXT_CHROMA_V ) ) ) { hasResidual = true; break; } } UInt uiTargetPartIdx; if ( pcPic->getCU( rpcTempCU->getAddr() )->getSliceSegmentStartCU(rpcTempCU->getZorderIdxInCU()) != pcSlice->getSliceSegmentCurStartCUAddr() ) { uiTargetPartIdx = pcSlice->getSliceSegmentCurStartCUAddr() % pcPic->getNumPartInCU() - rpcTempCU->getZorderIdxInCU(); } else { uiTargetPartIdx = 0; } if ( hasResidual ) { #if !RDO_WITHOUT_DQP_BITS m_pcEntropyCoder->resetBits(); m_pcEntropyCoder->encodeQP( rpcTempCU, uiTargetPartIdx, false ); rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // dQP bits rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded(); rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() ); #endif Bool foundNonZeroCbf = false; rpcTempCU->setQPSubCUs( rpcTempCU->getRefQP( uiTargetPartIdx ), rpcTempCU, 0, uiDepth, foundNonZeroCbf ); assert( foundNonZeroCbf ); } else { rpcTempCU->setQPSubParts( rpcTempCU->getRefQP( uiTargetPartIdx ), 0, uiDepth ); // set QP to default QP } } m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]); Bool isEndOfSlice = rpcBestCU->getSlice()->getSliceMode()==FIXED_NUMBER_OF_BYTES && (rpcBestCU->getTotalBits()>rpcBestCU->getSlice()->getSliceArgument()<<3); Bool isEndOfSliceSegment = rpcBestCU->getSlice()->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && (rpcBestCU->getTotalBits()>rpcBestCU->getSlice()->getSliceSegmentArgument()<<3); if(isEndOfSlice||isEndOfSliceSegment) { rpcBestCU->getTotalCost()=rpcTempCU->getTotalCost()+1; } // 選擇最優的劃分模式 xCheckBestMode( rpcBestCU, rpcTempCU, uiDepth); // RD compare current larger prediction // 計算並更新最優代價——end } // with sub partitioned prediction. } // 刪除了無關程式碼(對理解預測沒有太大用處的程式碼)... }