/* find the best inter prediction for each PU of specified mode 为CU中的每一个PU找到最佳inter pred,并累加上其ME cost 最佳inter pred指的是mv,mvd,mvpIdx,dir,list等 过程: 1.遍历每一个PU,以PU为计算单位 1.为运动估计加载PU的YUV,初始化失真函数指针,设置运动估计方法及下采样等级 2.进行merge估计,得到最优merge备选MV及其cost 3.初始化前后向运动估计bestME[dir]开销为MAX,用来存储当前运动方向上的最优预测 4.得到当前block模式的bits开销 5.加载当前PU相邻PU的MV 6.初始化前后向运动估计的ref,加载其refMask 7.对当前PU,针对其每个预测方向(前向/后向)遍历,找到当前预测方向上的最优预测bestME[dir] 1.对当前PU,当前预测方向,遍历参考帧列表中每一帧 1.若refMask表示当前参考帧不可用,则跳过该参考帧分析 2.累计bits = block模式bits + mvp_idx_bits + refIdx_bits 3.构造AMVP备选集 4.基于sad,选择AMVP备选集中最优MVP 5.根据最优MVP及searchRange设置运动估计搜索窗口 6.进行运动估计,得到最优MV及其satd 7.累计bits+=AMVP最优MVP与运动估计最优MV之差的bits 8.计算运动估计最优MV的mvcost 9.得到最优MV的cost 10.若当前cost小于bestME[dir]的cost,则更新 8.若是Bslice,且没有双向预测限制,且PUsize!=2Nx2N,且前后向都又bestME,则进行双向预测计算 1.取前后向的bestME为双向预测的前后向MV 2.计算satd ·若bChromaSATD,则 1.拷贝前后向的MV及其refIdx 2.进行运动估计 3.累加Luma和Chroma的satd到satd ·若非bChromaSATD,则 1.取前后向参考帧的YUV 2.对前后向参考像素进行均值计算,结果即为双向参考预测像素 3.计算其satd 3.计算双向预测的bits及cost 4.若前后向运动估计最优MV存在非0向量,则以0向量为中心重新设置搜索窗口,并检查mvp是否超出窗口 5.若前后向运动估计最优MV存在非0向量,且mvp都没有超出搜索窗口,则 ·若bChromaSATD 1.则设置mv为0向量 2.进行运动补偿 3.并计算重新累计luma和chroma的satd ·若非bChromaSATD 1.取前后向参考帧co-located像素值(其实也就是0向量) 2.对前后向参考帧co-located像素值进行均值计算,得到0向量的预测像素 3.进行satd计算 6.以0向量为运动估计最优MV,重新计算MVP,得到其mvp, mvpidx, bits, cost 7.若MV为0向量且重新计算后的mvp的cost小于最初计算的双向预测cost,则将其更新为双向预测最优 9.为当前PU选择最优的预测模式 ·merge最优,写mergeFlag为true、记录mergeIdx、将merge前后向的MV和refIdx记录到PU ·双向最优,设置PU预测方向为双向,写mergeFlag为false、将双向预测前后向的MV和refIdx记录到PU,记录双向mvpIdx和mvd ·前向最优,设置PU预测方向为前向,写mergeFlag为false、将前向预测的MV和refIdx记录到PU,记录前向mvpIdx和mvd ·后向最优,设置PU预测方向为后向,写mergeFlag为false、将后向预测的MV和refIdx记录到PU,记录后向mvpIdx和mvd 10.对当前PU的最优预测模式进行运动补偿 2.累加上运动估计的bits到当前interMode的bits上,并返回 */
void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2])
{
ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
//取interMode的CUData
CUData& cu = interMode.cu;
//取interMode的predYUV
Yuv* predYuv = &interMode.predYuv;
/* 12 mv candidates including lowresMV 低分辨率lowresMV为之前进行lookahead时候对1/2下采样后的帧进行运动估计的MV*/
MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];
const Slice *slice = m_slice;
int numPart = cu.getNumPartInter(0);
//单向预测?双向预测?
int numPredDir = slice->isInterP() ? 1 : 2;
//参考帧列表中帧数量
const int* numRefIdx = slice->m_numRefIdx;
//lastMode记录上一个PU的dir,前向0后向1双向2
uint32_t lastMode = 0;
int totalmebits = 0;
MV mvzero(0, 0);
Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
MergeData merge;
memset(&merge, 0, sizeof(merge));
bool useAsMVP = false;
//遍历每个PU
for (int puIdx = 0; puIdx < numPart; puIdx++)
{
//取当前PU的bestME
MotionData* bestME = interMode.bestME[puIdx];
//构造PU
PredictionUnit pu(cu, cuGeom, puIdx);
//为m_me加载PU的YUV,并初始化失真计算函数指针,设置运动估计方法及下采样等级
m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, bChromaMC);
//初始化useAsMVP为false
useAsMVP = false;
//定义初始化x265_analysis_inter_data
x265_analysis_inter_data* interDataCTU = NULL;
//定义并计算当前CU的索引
int cuIdx;
cuIdx = (interMode.cu.m_cuAddr * m_param->num4x4Partitions) + cuGeom.absPartIdx;
//analysisReuseLevel = 0 且 interRefine>1
if (m_param->analysisReuseLevel == 10 && m_param->interRefine > 1)
{
//加载API外加载的编码分析数据
interDataCTU = m_frame->m_analysisData.interData;
/* 若interMode的predMode = 加载的predMode && interMode的partSize = 加载的partSize && interMode的Depth = 加载的Depth && 加载的数据中,当前PU不merge ==》useAsMVP = true */
if ((cu.m_predMode[pu.puAbsPartIdx] == interDataCTU->modes[cuIdx + pu.puAbsPartIdx])
&& (cu.m_partSize[pu.puAbsPartIdx] == interDataCTU->partSize[cuIdx + pu.puAbsPartIdx])
&& !(interDataCTU->mergeFlag[cuIdx + puIdx])
&& (cu.m_cuDepth[0] == interDataCTU->depth[cuIdx]))
useAsMVP = true;
}
/* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes 找到最优merge备选集,并返回其cost */
uint32_t mrgCost = numPart == 1 ? MAX_UINT : mergeEstimation(cu, cuGeom, pu, puIdx, merge);
//初始化前后向bestME的cost为MAX
bestME[0].cost = MAX_UINT;
bestME[1].cost = MAX_UINT;
//得到block模式(模式包含partSize和预测方向)的bits开销,即2Nx2N、2NxN、2NxnU等等
getBlkBits((PartSize)cu.m_partSize[0], slice->isInterP(), puIdx, lastMode, m_listSelBits);
bool bDoUnidir = true;
//加载当前PU相邻MV备选集到interNeighbours中
cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours);
/* Uni-directional prediction analysisLoad && analysisReuseLevel > 1 && analysisReuseLevel != 10 或 analysisMultiPassRefine && bStatRead 或 bAnalysisType == AVC_INFO 或 useAsMVP */
if ((m_param->analysisLoad && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel != 10)
|| (m_param->analysisMultiPassRefine && m_param->rc.bStatRead) || (m_param->bAnalysisType == AVC_INFO) || (useAsMVP))
{
//遍历前后向参考方向
for (int list = 0; list < numPredDir; list++)
{
//初始化ref = -1
int ref = -1;
//加载当前PU当前预测方向上的ref
if (useAsMVP) //是否有API外信息载入,若有则加载信息
ref = interDataCTU->refIdx[list][cuIdx + puIdx];
else //若无则加载前面分析计算的bestME的ref
ref = bestME[list].ref;
//若仍然无ref,则该参考方向不可用,continue
if (ref < 0)
continue;
//累加bits,即之前的block_bits + mvp_idx_bits + refIdx_bits
uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
bits += getTUBits(ref, numRefIdx[list]);
//加载AMVP备选集到amvpCand中
int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
//取AMVP备选集
const MV* amvp = interMode.amvpCand[list][ref];
//选择AMVP备选集中最优备选MV,基于sad开销
int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
MV mvmin, mvmax, outmv, mvp;
//取最优AMVP备选MV
mvp = amvp[mvpIdx];
//若运动估计算法为SEA
if (m_param->searchMethod == X265_SEA)
{
int puX = puIdx & 1;
int puY = puIdx >> 1;
for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
}
//根据mvp计算搜索范围,输出到[mvmin,mvmax]
setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
MV mvpIn = mvp;
int satdCost;
if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx)
mvpIn = bestME[list].mv;
if (useAsMVP)
{
MV bestmv, mvpSel[3];
int mvpIdxSel[3];
satdCost = m_me.COST_MAX;
mvpSel[0] = interDataCTU->mv[list][cuIdx + puIdx].word;
mvpIdxSel[0] = interDataCTU->mvpIdx[list][cuIdx + puIdx];
if (m_param->mvRefine > 1)
{
mvpSel[1] = interMode.amvpCand[list][ref][mvpIdx];
mvpIdxSel[1] = mvpIdx;
if (m_param->mvRefine > 2)
{
mvpSel[2] = interMode.amvpCand[list][ref][!mvpIdx];
mvpIdxSel[2] = !mvpIdx;
}
}
for (int cand = 0; cand < m_param->mvRefine; cand++)
{
if (cand && (mvpSel[cand] == mvpSel[cand - 1] || (cand == 2 && mvpSel[cand] == mvpSel[cand - 2])))
continue;
setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
int bcost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvpSel[cand], numMvc, mvc, m_param->searchRange, bestmv, m_param->maxSlices,
m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
if (satdCost > bcost)
{
satdCost = bcost;
outmv = bestmv;
mvp = mvpSel[cand];
mvpIdx = mvpIdxSel[cand];
}
}
} // end of if (useAsMVP)
else
{
satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvpIn, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices,
m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
}
/* Get total cost of partition, but only include MV bit cost once */
bits += m_me.bitcost(outmv);
uint32_t mvCost = m_me.mvcost(outmv);
uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);
/* Refine MVP selection, updates: mvpIdx, bits, cost */
if (!(m_param->analysisMultiPassRefine || useAsMVP))
mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
else
{
/* It is more accurate to compare with actual mvp that was used in motionestimate than amvp[mvpIdx]. Here the actual mvp is bestME from pass 1 for that mvpIdx */
int diffBits = m_me.bitcost(outmv, amvp[!mvpIdx]) - m_me.bitcost(outmv, mvpIn);
if (diffBits < 0)
{
mvpIdx = !mvpIdx;
uint32_t origOutBits = bits;
bits = origOutBits + diffBits;
cost = (cost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(bits);
}
mvp = amvp[mvpIdx];
}
if (cost < bestME[list].cost)
{
bestME[list].mv = outmv;
bestME[list].mvp = mvp;
bestME[list].mvpIdx = mvpIdx;
bestME[list].cost = cost;
bestME[list].bits = bits;
bestME[list].mvCost = mvCost;
bestME[list].ref = ref;
}
bDoUnidir = false;
}
}
//多线程并行运动估计,一个参考帧一个线程
else if (m_param->bDistributeMotionEstimation)
{
PME pme(*this, interMode, cuGeom, pu, puIdx);
pme.m_jobTotal = 0;
pme.m_jobAcquired = 1; /* reserve L0-0 or L1-0 */
uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
for (int list = 0; list < numPredDir; list++)
{
int idx = 0;
for (int ref = 0; ref < numRefIdx[list]; ref++)
{
if (!(refMask & (1 << ref)))
continue;
pme.m_jobs.ref[list][idx++] = ref;
pme.m_jobTotal++;
}
pme.m_jobs.refCnt[list] = idx;
/* the second list ref bits start at bit 16 */
refMask >>= 16;
}
if (pme.m_jobTotal > 2)
{
pme.tryBondPeers(*m_frame->m_encData->m_jobProvider, pme.m_jobTotal - 1);
processPME(pme, *this);
int ref = pme.m_jobs.refCnt[0] ? pme.m_jobs.ref[0][0] : pme.m_jobs.ref[1][0];
singleMotionEstimation(*this, interMode, pu, puIdx, 0, ref); /* L0-0 or L1-0 */
bDoUnidir = false;
ProfileCUScopeNamed(pmeWaitScope, interMode.cu, pmeBlockTime, countPMEMasters);
pme.waitForExit();
}
/* if no peer threads were bonded, fall back to doing unidirectional * searches ourselves without overhead of singleMotionEstimation() */
} //end of if (m_param->bDistributeMotionEstimation)
if (bDoUnidir)
{
//初始化前后向最优运动估计的ref = -1
interMode.bestME[puIdx][0].ref = interMode.bestME[puIdx][1].ref = -1;
/* refMask一共四字节32bits, 低16bits表示前向参考帧列表中各帧是否可用 高16bits表示后向参考帧列表中各帧是否可用 */
uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
for (int list = 0; list < numPredDir; list++) 遍历每个预测方向
{
//遍历当前预测方向参考列表中每一帧
for (int ref = 0; ref < numRefIdx[list]; ref++)
{
ProfileCounter(interMode.cu, totalMotionReferences[cuGeom.depth]);
//若refMask标注当前帧不可用,则continue
if (!(refMask & (1 << ref)))
{
ProfileCounter(interMode.cu, skippedMotionReferences[cuGeom.depth]);
continue;
}
//累计bits = mode_bits + MVP_idx_bits + refIdx_bits
uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
bits += getTUBits(ref, numRefIdx[list]);
//构造AMVP备选集
int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
const MV* amvp = interMode.amvpCand[list][ref];
//基于sad选AMVP备选集中的最优MVP
int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
MV mvmin, mvmax, outmv, mvp_lowres;
MV mvp = amvp[mvpIdx]; //amvp最优mvp
bool bLowresMVP = false;
//若既不analysisSave,也不analysisLoad
if (!m_param->analysisSave && !m_param->analysisLoad) /* Prevents load/save outputs from diverging when lowresMV is not available */
{
//得到低分辨率的运动向量lmv
MV lmv = getLowresMV(cu, pu, list, ref);
if (lmv.notZero()) //若低分辨率的mv可用,则存储下来
mvc[numMvc++] = lmv;
if (m_param->bEnableHME) //若允许层级运动估计,则拷贝lmv到mvp_lowres
mvp_lowres = lmv;
}
//若运动估计算法为SEA,则进行配置??
if (m_param->searchMethod == X265_SEA)
{
int puX = puIdx & 1;
int puY = puIdx >> 1;
for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
}
//基于amvp最优备选MV和searchRange设置搜索窗口[mvmin~mvmax]
setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
//进行运动估计,得到最优MV到outmv,及其satd
int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices,
m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
//使用3层级像素运动估计 && mvp低分辨率非零 && mvp低分辨率!=mvp
if (m_param->bEnableHME && mvp_lowres.notZero() && mvp_lowres != mvp)
{
MV outmv_lowres;
setSearchRange(cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);
int lowresMvCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange, outmv_lowres, m_param->maxSlices,
m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
if (lowresMvCost < satdCost)
{
outmv = outmv_lowres;
satdCost = lowresMvCost;
bLowresMVP = true;
}
}
/* Get total cost of partition, but only include MV bit cost once */
//累加AMVP的最优MVP与运动估计最优MV之间差值mvd的bit开销
bits += m_me.bitcost(outmv);
uint32_t mvCost = m_me.mvcost(outmv);
uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);
/* Update LowresMVP to best AMVP cand*/
if (bLowresMVP)
updateMVP(amvp[mvpIdx], outmv, bits, cost, mvp_lowres);
/* Refine MVP selection, updates: mvpIdx, bits, cost 检查是否AMVP中另一个备选项相对当前的MVP,cost更优?若是则更新 */
mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
//更新当前list方向的最优运动信息
if (cost < bestME[list].cost)
{
bestME[list].mv = outmv; //运动估计最优MV
bestME[list].mvp = mvp; //AMVP最优MV
bestME[list].mvpIdx = mvpIdx; //AMVP最优MV在备选集中的索引
bestME[list].ref = ref; //参考帧索引
bestME[list].cost = cost; //开销
bestME[list].bits = bits; //bist开销
bestME[list].mvCost = mvCost; //mvCost
}
}// end of for (int ref = 0; ref < numRefIdx[list]; ref++)
/* the second list ref bits start at bit 16 取后16bits为后向参考帧的refMask情况 */
refMask >>= 16;
}// end of for (int list = 0; list < numPredDir; list++)
}// end of if (bDoUnidir)
/* Bi-directional prediction 双向预测 */
MotionData bidir[2];
uint32_t bidirCost = MAX_UINT;
int bidirBits = 0;
//Bslice && 当前CU无双向预测限制 && pu非2Nx2N && 前后向都存在bestME
if (slice->isInterB() && !cu.isBipredRestriction() && /* biprediction is possible for this PU */
cu.m_partSize[pu.puAbsPartIdx] != SIZE_2Nx2N && /* 2Nx2N biprediction is handled elsewhere */
bestME[0].cost != MAX_UINT && bestME[1].cost != MAX_UINT)
{
//记录bestME
bidir[0] = bestME[0];
bidir[1] = bestME[1];
int satdCost;
if (m_me.bChromaSATD) //若bChromaSATD
{
//拷贝前向MV及其refIdx
cu.m_mv[0][pu.puAbsPartIdx] = bidir[0].mv;
cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)bidir[0].ref;
//拷贝后向MV及其refIdx
cu.m_mv[1][pu.puAbsPartIdx] = bidir[1].mv;
cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)bidir[1].ref;
//进行luma&&chroma运动补偿
motionCompensation(cu, pu, tmpPredYuv, true, true);
//累加luma和chroma的satd
satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size) +
m_me.bufChromaSATD(tmpPredYuv, pu.puAbsPartIdx);
}
else
{
//取前后向参考帧的YUV
PicYuv* refPic0 = slice->m_refReconPicList[0][bestME[0].ref];
PicYuv* refPic1 = slice->m_refReconPicList[1][bestME[1].ref];
//取当前的predYUV
Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
/* Generate reference subpels */
//将前向参考像素拷贝到bidirYuv[0]中
predInterLumaPixel(pu, bidirYuv[0], *refPic0, bestME[0].mv);
//将后向参考像素拷贝到bidirYuv[1]中
predInterLumaPixel(pu, bidirYuv[1], *refPic1, bestME[1].mv);
//对前向后向参考的像素进行均值计算到tmpPredYuv中
primitives.pu[m_me.partEnum].pixelavg_pp[(tmpPredYuv.m_size % 64 == 0) && (bidirYuv[0].m_size % 64 == 0) && (bidirYuv[1].m_size % 64 == 0)](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, bidirYuv[0].getLumaAddr(pu.puAbsPartIdx), bidirYuv[0].m_size,
bidirYuv[1].getLumaAddr(pu.puAbsPartIdx), bidirYuv[1].m_size, 32);
//计算tmpPredYuv的satd
satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
}
//计算双向预测的bits开销
bidirBits = bestME[0].bits + bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
//计算双向预测的cost
bidirCost = satdCost + m_rdCost.getCost(bidirBits);
//前向&&后向的运动估计向量是否非0向量
bool bTryZero = bestME[0].mv.notZero() || bestME[1].mv.notZero();
if (bTryZero) //若存在非0向量
{
/* Do not try zero MV if unidir motion predictors are beyond * valid search area */
MV mvmin, mvmax;
int merange = X265_MAX(m_param->sourceWidth, m_param->sourceHeight);
//重置搜索范围,以0向量为中心,merange为范围,得到搜索范围[mvmin,mvmax]
setSearchRange(cu, mvzero, merange, mvmin, mvmax);
mvmax.y += 2; // there is some pad for subpel refine
//搜索范围超分4倍
mvmin <<= 2;
mvmax <<= 2;
//检查mvp是否超过搜索范围
bTryZero &= bestME[0].mvp.checkRange(mvmin, mvmax);
bTryZero &= bestME[1].mvp.checkRange(mvmin, mvmax);
}
if (bTryZero) //若存在非0向量,且mvp都在搜索范围内
{
/* coincident blocks of the two reference pictures */
if (m_me.bChromaSATD) //若bChromaSATD
{
//设置mv为0向量
cu.m_mv[0][pu.puAbsPartIdx] = mvzero;
cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)bidir[0].ref;
cu.m_mv[1][pu.puAbsPartIdx] = mvzero;
cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)bidir[1].ref;
//进行运动补偿
motionCompensation(cu, pu, tmpPredYuv, true, true);
//累加luma和chroma的satd
satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size) +
m_me.bufChromaSATD(tmpPredYuv, pu.puAbsPartIdx);
}
else //若非bChromaSATD
{
//取前向预测co-located参考像素
const pixel* ref0 = m_slice->m_mref[0][bestME[0].ref].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx);
//取后向预测co-located参考像素
const pixel* ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx);
//得到stride
intptr_t refStride = slice->m_mref[0][0].lumaStride;
//进行前后向参考像素均值计算
primitives.pu[m_me.partEnum].pixelavg_pp[(tmpPredYuv.m_size % 64 == 0) && (refStride % 64 == 0)](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32);
//得到satd
satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
}
//得到前向mvp及mvpIdx
MV mvp0 = bestME[0].mvp;
int mvpIdx0 = bestME[0].mvpIdx;
//计算前向me的bits
uint32_t bits0 = bestME[0].bits - m_me.bitcost(bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
//得到后向mvp及mvpIdx
MV mvp1 = bestME[1].mvp;
int mvpIdx1 = bestME[1].mvpIdx;
//计算后向me的bits
uint32_t bits1 = bestME[1].bits - m_me.bitcost(bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
//累计cost
uint32_t cost = satdCost + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1);
/* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost 以mvzero为最优运动估计向量,重新计算最优MVP,并输出其mvzero,mvpIdx,bits及cost*/
mvp0 = checkBestMVP(interMode.amvpCand[0][bestME[0].ref], mvzero, mvpIdx0, bits0, cost);
mvp1 = checkBestMVP(interMode.amvpCand[1][bestME[1].ref], mvzero, mvpIdx1, bits1, cost);
//若MV为0向量且从小计算后的mvp的cost小于最初计算的双向预测cost,则更新
if (cost < bidirCost)
{
bidir[0].mv = mvzero;
bidir[1].mv = mvzero;
bidir[0].mvp = mvp0;
bidir[1].mvp = mvp1;
bidir[0].mvpIdx = mvpIdx0;
bidir[1].mvpIdx = mvpIdx1;
bidirCost = cost;
bidirBits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
}
}
}
/* select best option and store into CU 为当前PU在 前向/后向/双向/merge 四种中最优的模式 */
//若merge最优
if (mrgCost < bidirCost && mrgCost < bestME[0].cost && mrgCost < bestME[1].cost)
{
//标记当前PU为merge
cu.m_mergeFlag[pu.puAbsPartIdx] = true;
cu.m_mvpIdx[0][pu.puAbsPartIdx] = merge.index; /* merge candidate ID is stored in L0 MVP idx */
cu.setPUInterDir(merge.dir, pu.puAbsPartIdx, puIdx);
cu.setPUMv(0, merge.mvField[0].mv, pu.puAbsPartIdx, puIdx);
cu.setPURefIdx(0, merge.mvField[0].refIdx, pu.puAbsPartIdx, puIdx);
cu.setPUMv(1, merge.mvField[1].mv, pu.puAbsPartIdx, puIdx);
cu.setPURefIdx(1, merge.mvField[1].refIdx, pu.puAbsPartIdx, puIdx);
totalmebits += merge.bits;
}
//若双向最优
else if (bidirCost < bestME[0].cost && bidirCost < bestME[1].cost)
{
//lastMode为2,表示双向,为下一PU的getBlkBits服务
lastMode = 2;
//标记PU的mergeFlag为false
cu.m_mergeFlag[pu.puAbsPartIdx] = false;
//设置dir为双向
cu.setPUInterDir(3, pu.puAbsPartIdx, puIdx);
/* 前向 */
//设置前向MV和refIdx
cu.setPUMv(0, bidir[0].mv, pu.puAbsPartIdx, puIdx);
cu.setPURefIdx(0, bestME[0].ref, pu.puAbsPartIdx, puIdx);
//记录前向mvd和mvpIdx
cu.m_mvd[0][pu.puAbsPartIdx] = bidir[0].mv - bidir[0].mvp;
cu.m_mvpIdx[0][pu.puAbsPartIdx] = bidir[0].mvpIdx;
/* 后向 */
//设置后向MV和refIdx
cu.setPUMv(1, bidir[1].mv, pu.puAbsPartIdx, puIdx);
cu.setPURefIdx(1, bestME[1].ref, pu.puAbsPartIdx, puIdx);
//记录后向mvd和mvpIdx
cu.m_mvd[1][pu.puAbsPartIdx] = bidir[1].mv - bidir[1].mvp;
cu.m_mvpIdx[1][pu.puAbsPartIdx] = bidir[1].mvpIdx;
//记录下PU的运动估计bits开销
totalmebits += bidirBits;
}
//若前向最优
else if (bestME[0].cost <= bestME[1].cost)
{
//lastMode为0,表示前向,为下一PU的getBlkBits服务
lastMode = 0;
//标记PU的mergeflag为false
cu.m_mergeFlag[pu.puAbsPartIdx] = false;
//记录PU预测方向为前向
cu.setPUInterDir(1, pu.puAbsPartIdx, puIdx);
//记录PU的前向预测MV和refIdx
cu.setPUMv(0, bestME[0].mv, pu.puAbsPartIdx, puIdx);
cu.setPURefIdx(0, bestME[0].ref, pu.puAbsPartIdx, puIdx);
//记录PU的前向预测mvd和mvpIdx
cu.m_mvd[0][pu.puAbsPartIdx] = bestME[0].mv - bestME[0].mvp;
cu.m_mvpIdx[0][pu.puAbsPartIdx] = bestME[0].mvpIdx;
//置PU的后向RefIdx无效,且MV为0
cu.setPURefIdx(1, REF_NOT_VALID, pu.puAbsPartIdx, puIdx);
cu.setPUMv(1, mvzero, pu.puAbsPartIdx, puIdx);
totalmebits += bestME[0].bits;
}
//若后向最优
else
{
//lastMode为1,表示后向,为下一PU的getBlkBits服务
lastMode = 1;
//标记PU的mergeflag为false
cu.m_mergeFlag[pu.puAbsPartIdx] = false;
//记录PU预测方向为后向
cu.setPUInterDir(2, pu.puAbsPartIdx, puIdx);
//记录PU的后向预测MV和refIdx
cu.setPUMv(1, bestME[1].mv, pu.puAbsPartIdx, puIdx);
cu.setPURefIdx(1, bestME[1].ref, pu.puAbsPartIdx, puIdx);
//记录PU的后向预测mvd和mvpIdx
cu.m_mvd[1][pu.puAbsPartIdx] = bestME[1].mv - bestME[1].mvp;
cu.m_mvpIdx[1][pu.puAbsPartIdx] = bestME[1].mvpIdx;
//置PU的前向RefIdx无效,且MV为0
cu.setPURefIdx(0, REF_NOT_VALID, pu.puAbsPartIdx, puIdx);
cu.setPUMv(0, mvzero, pu.puAbsPartIdx, puIdx);
//累加上当前PU的运动估计bits开销
totalmebits += bestME[1].bits;
}
//进行运动补偿
motionCompensation(cu, pu, *predYuv, true, bChromaMC);
} //end of for (int puIdx = 0; puIdx < numPart; puIdx++)
//记录下当前CU的运动估计bits
interMode.sa8dBits += totalmebits;
}
今天的文章Search::predInterSearch()分享到此就结束了,感谢您的阅读,如果确实帮到您,您可以动动手指转发给其他人。
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/58397.html