1. 程式人生 > >x265中encodeResAndCalcRdInterCU()分析(版本2.8)

x265中encodeResAndCalcRdInterCU()分析(版本2.8)

二. 原始碼註釋分析: 

/*
 =======Analysed by:  yangxin
 =======Date:         2018.10
 =======Function:     encodeResAndCalcRdInterCU()   merge模式編碼殘差並進行RD-cost計算,以及進行熵編碼
*/
/* encode residual and calculate rate-distortion for a CU block.
 * Note: this function overwrites the RD cost variables of interMode, but leaves the sa8d cost unharmed */
void Search::encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom)
{
    ProfileCUScope(interMode.cu, interRDOElapsedTime[cuGeom.depth], countInterRDO[cuGeom.depth]);

    CUData& cu = interMode.cu;
    Yuv* reconYuv = &interMode.reconYuv;
    Yuv* predYuv = &interMode.predYuv;
    uint32_t depth = cuGeom.depth;
    ShortYuv* resiYuv = &m_rqt[depth].tmpResiYuv;
    const Yuv* fencYuv = interMode.fencYuv;

    X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");

    uint32_t log2CUSize = cuGeom.log2CUSize;
    int sizeIdx = log2CUSize - 2;

    resiYuv->subtract(*fencYuv, *predYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);//--原始影象和預測圖象相減,計算殘差

    uint32_t tuDepthRange[2];
    cu.getInterTUQtDepthRange(tuDepthRange, 0);//--得到tu深度範圍

    m_entropyCoder.load(m_rqt[depth].cur);

    if ((m_limitTU & X265_TU_LIMIT_DFS) && !(m_limitTU & X265_TU_LIMIT_NEIGH))
        m_maxTUDepth = -1;
    else if (m_limitTU & X265_TU_LIMIT_BFS)
        memset(&m_cacheTU, 0, sizeof(TUInfoCache));

	////////===================================////////
    Cost costs;
    if (m_limitTU & X265_TU_LIMIT_NEIGH)
    {
        /* Save and reload maxTUDepth to avoid changing of maxTUDepth between modes */
        int32_t tempDepth = m_maxTUDepth;
        if (m_maxTUDepth != -1)
        {
            uint32_t splitFlag = interMode.cu.m_partSize[0] != SIZE_2Nx2N;
            uint32_t minSize = tuDepthRange[0];
            uint32_t maxSize = tuDepthRange[1];
            maxSize = X265_MIN(maxSize, cuGeom.log2CUSize - splitFlag);
            m_maxTUDepth = x265_clip3(cuGeom.log2CUSize - maxSize, cuGeom.log2CUSize - minSize, (uint32_t)m_maxTUDepth);
        }
        estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);//--估計殘差的率失真代價和位元代價,量化和變換****
        m_maxTUDepth = tempDepth;
    }
    else
        estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);//--******


    uint32_t tqBypass = cu.m_tqBypass[0];//--lossless flags
    if (!tqBypass)
    {
        sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
        {
            cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
            cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
        }

        /* Consider the RD cost of not signaling any residual */
        m_entropyCoder.load(m_rqt[depth].cur);
        m_entropyCoder.resetBits();
        m_entropyCoder.codeQtRootCbfZero();
        uint32_t cbf0Bits = m_entropyCoder.getNumberOfWrittenBits();

        uint32_t cbf0Energy; uint64_t cbf0Cost;
        if (m_rdCost.m_psyRd)
        {
            cbf0Energy = m_rdCost.psyCost(log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
            cbf0Cost = m_rdCost.calcPsyRdCost(cbf0Dist, cbf0Bits, cbf0Energy);
        }
        else if(m_rdCost.m_ssimRd)
        {
            cbf0Energy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size, log2CUSize, TEXT_LUMA, 0);
            cbf0Cost = m_rdCost.calcSsimRdCost(cbf0Dist, cbf0Bits, cbf0Energy);
        }
        else
            cbf0Cost = m_rdCost.calcRdCost(cbf0Dist, cbf0Bits);

        if (cbf0Cost < costs.rdcost)
        {
            cu.clearCbf();
            cu.setTUDepthSubParts(0, 0, depth);
        }
    }

    if (cu.getQtRootCbf(0))
        saveResidualQTData(cu, *resiYuv, 0, 0);//--儲存殘差量化變化資料,四叉樹遞迴呼叫,應該有tu的再次劃分

	//--熵編碼*****//
    /* calculate signal bits for inter/merge/skip coded CU */
    m_entropyCoder.load(m_rqt[depth].cur);

    m_entropyCoder.resetBits();//--清零操作
    if (m_slice->m_pps->bTransquantBypassEnabled)
        m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);


	//--計算bits
    uint32_t coeffBits, bits, mvBits;
    if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))//--merge/skip
    {
        cu.setPredModeSubParts(MODE_SKIP);

        /* Merge/Skip */
        coeffBits = mvBits = 0;
        m_entropyCoder.codeSkipFlag(cu, 0);
        int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
        m_entropyCoder.codeMergeIndex(cu, 0);
        mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;
        bits = mvBits + skipFlagBits;//--
    }
    else//--Amvp
    {
        m_entropyCoder.codeSkipFlag(cu, 0);
        int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
        m_entropyCoder.codePredMode(cu.m_predMode[0]);
        m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
        m_entropyCoder.codePredInfo(cu, 0);
        mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;

        bool bCodeDQP = m_slice->m_pps->bUseDQP;
        m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange);
        bits = m_entropyCoder.getNumberOfWrittenBits();//--

        coeffBits = bits - mvBits - skipFlagBits;
    }

    m_entropyCoder.store(interMode.contexts);//--

    if (cu.getQtRootCbf(0))//--從已經編碼塊標誌進行量化變換
        reconYuv->addClip(*predYuv, *resiYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);
    else
        reconYuv->copyFromYuv(*predYuv);//--重建影象直接copy預測影象


	//////--------------------------------------------------------------------------------------------------------/////
    // update with clipped distortion and cost (qp estimation loop uses unclipped values)
	//--luma
    sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
    interMode.distortion = bestLumaDist;
	//--chroma
    if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
    {
        sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
        bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
        interMode.chromaDistortion = bestChromaDist;
        interMode.distortion += bestChromaDist;//--總失真
    }
    if (m_rdCost.m_psyRd)
        interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
    else if(m_rdCost.m_ssimRd)
        interMode.ssimEnergy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size, cu.m_log2CUSize[0], TEXT_LUMA, 0);

    interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
    interMode.totalBits = bits;//--總bits
    interMode.lumaDistortion = bestLumaDist;
    interMode.coeffBits = coeffBits;
    interMode.mvBits = mvBits;
    cu.m_distortion[0] = interMode.distortion;//--儲存總失真
    updateModeCost(interMode);//--總代價cost
    checkDQP(interMode, cuGeom);
}