x265中encodeResAndCalcRdInterCU()分析(版本2.8)
阿新 • • 發佈:2018-12-17
二. 原始碼註釋分析:
/* =======Analysed by: yangxin =======Date: 2018.10 =======Function: encodeResAndCalcRdInterCU() merge模式編碼殘差並進行RD-cost計算,以及進行熵編碼 */ /* encode residual and calculate rate-distortion for a CU block. * Note: this function overwrites the RD cost variables of interMode, but leaves the sa8d cost unharmed */ void Search::encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom) { ProfileCUScope(interMode.cu, interRDOElapsedTime[cuGeom.depth], countInterRDO[cuGeom.depth]); CUData& cu = interMode.cu; Yuv* reconYuv = &interMode.reconYuv; Yuv* predYuv = &interMode.predYuv; uint32_t depth = cuGeom.depth; ShortYuv* resiYuv = &m_rqt[depth].tmpResiYuv; const Yuv* fencYuv = interMode.fencYuv; X265_CHECK(!cu.isIntra(0), "intra CU not expected\n"); uint32_t log2CUSize = cuGeom.log2CUSize; int sizeIdx = log2CUSize - 2; resiYuv->subtract(*fencYuv, *predYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);//--原始影象和預測圖象相減,計算殘差 uint32_t tuDepthRange[2]; cu.getInterTUQtDepthRange(tuDepthRange, 0);//--得到tu深度範圍 m_entropyCoder.load(m_rqt[depth].cur); if ((m_limitTU & X265_TU_LIMIT_DFS) && !(m_limitTU & X265_TU_LIMIT_NEIGH)) m_maxTUDepth = -1; else if (m_limitTU & X265_TU_LIMIT_BFS) memset(&m_cacheTU, 0, sizeof(TUInfoCache)); ////////===================================//////// Cost costs; if (m_limitTU & X265_TU_LIMIT_NEIGH) { /* Save and reload maxTUDepth to avoid changing of maxTUDepth between modes */ int32_t tempDepth = m_maxTUDepth; if (m_maxTUDepth != -1) { uint32_t splitFlag = interMode.cu.m_partSize[0] != SIZE_2Nx2N; uint32_t minSize = tuDepthRange[0]; uint32_t maxSize = tuDepthRange[1]; maxSize = X265_MIN(maxSize, cuGeom.log2CUSize - splitFlag); m_maxTUDepth = x265_clip3(cuGeom.log2CUSize - maxSize, cuGeom.log2CUSize - minSize, (uint32_t)m_maxTUDepth); } estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);//--估計殘差的率失真代價和位元代價,量化和變換**** m_maxTUDepth = tempDepth; } else estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);//--****** uint32_t tqBypass = cu.m_tqBypass[0];//--lossless flags if (!tqBypass) { sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size); if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize)); cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize)); } /* Consider the RD cost of not signaling any residual */ m_entropyCoder.load(m_rqt[depth].cur); m_entropyCoder.resetBits(); m_entropyCoder.codeQtRootCbfZero(); uint32_t cbf0Bits = m_entropyCoder.getNumberOfWrittenBits(); uint32_t cbf0Energy; uint64_t cbf0Cost; if (m_rdCost.m_psyRd) { cbf0Energy = m_rdCost.psyCost(log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size); cbf0Cost = m_rdCost.calcPsyRdCost(cbf0Dist, cbf0Bits, cbf0Energy); } else if(m_rdCost.m_ssimRd) { cbf0Energy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size, log2CUSize, TEXT_LUMA, 0); cbf0Cost = m_rdCost.calcSsimRdCost(cbf0Dist, cbf0Bits, cbf0Energy); } else cbf0Cost = m_rdCost.calcRdCost(cbf0Dist, cbf0Bits); if (cbf0Cost < costs.rdcost) { cu.clearCbf(); cu.setTUDepthSubParts(0, 0, depth); } } if (cu.getQtRootCbf(0)) saveResidualQTData(cu, *resiYuv, 0, 0);//--儲存殘差量化變化資料,四叉樹遞迴呼叫,應該有tu的再次劃分 //--熵編碼*****// /* calculate signal bits for inter/merge/skip coded CU */ m_entropyCoder.load(m_rqt[depth].cur); m_entropyCoder.resetBits();//--清零操作 if (m_slice->m_pps->bTransquantBypassEnabled) m_entropyCoder.codeCUTransquantBypassFlag(tqBypass); //--計算bits uint32_t coeffBits, bits, mvBits; if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))//--merge/skip { cu.setPredModeSubParts(MODE_SKIP); /* Merge/Skip */ coeffBits = mvBits = 0; m_entropyCoder.codeSkipFlag(cu, 0); int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits(); m_entropyCoder.codeMergeIndex(cu, 0); mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits; bits = mvBits + skipFlagBits;//-- } else//--Amvp { m_entropyCoder.codeSkipFlag(cu, 0); int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits(); m_entropyCoder.codePredMode(cu.m_predMode[0]); m_entropyCoder.codePartSize(cu, 0, cuGeom.depth); m_entropyCoder.codePredInfo(cu, 0); mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits; bool bCodeDQP = m_slice->m_pps->bUseDQP; m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange); bits = m_entropyCoder.getNumberOfWrittenBits();//-- coeffBits = bits - mvBits - skipFlagBits; } m_entropyCoder.store(interMode.contexts);//-- if (cu.getQtRootCbf(0))//--從已經編碼塊標誌進行量化變換 reconYuv->addClip(*predYuv, *resiYuv, log2CUSize, m_frame->m_fencPic->m_picCsp); else reconYuv->copyFromYuv(*predYuv);//--重建影象直接copy預測影象 //////--------------------------------------------------------------------------------------------------------///// // update with clipped distortion and cost (qp estimation loop uses unclipped values) //--luma sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size); interMode.distortion = bestLumaDist; //--chroma if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize)); bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize)); interMode.chromaDistortion = bestChromaDist; interMode.distortion += bestChromaDist;//--總失真 } if (m_rdCost.m_psyRd) interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size); else if(m_rdCost.m_ssimRd) interMode.ssimEnergy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size, cu.m_log2CUSize[0], TEXT_LUMA, 0); interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size); interMode.totalBits = bits;//--總bits interMode.lumaDistortion = bestLumaDist; interMode.coeffBits = coeffBits; interMode.mvBits = mvBits; cu.m_distortion[0] = interMode.distortion;//--儲存總失真 updateModeCost(interMode);//--總代價cost checkDQP(interMode, cuGeom); }