VTM10.0量化之RDOQ技术
Posted 神遁克里苏
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了VTM10.0量化之RDOQ技术相关的知识,希望对你有一定的参考价值。
其中第一步的量化与普通量化相同,步骤如下:
代码理解见注释(仅个人理解,欢迎指正):
void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx)
const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
const SPS &sps = *tu.cs->sps;
const CompArea &rect = tu.blocks[compID];
const uint32_t uiWidth = rect.width;
const uint32_t uiHeight = rect.height;
const ChannelType chType = toChannelType(compID);
const int channelBitDepth = sps.getBitDepth( chType );
const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
*/
// Represents scaling through forward transform
int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
if (tu.mtsIdx[compID] == MTS_SKIP && extendedPrecision)
iTransformShift = std::max<int>(0, iTransformShift);
double d64BlockUncodedCost = 0;
const uint32_t uiLog2BlockWidth = floorLog2(uiWidth);
const uint32_t uiLog2BlockHeight = floorLog2(uiHeight);
const uint32_t uiMaxNumCoeff = rect.area();
CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
int scalingListType = getScalingListType(tu.cu->predMode, compID);
CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
const TCoeff *plSrcCoeff = pSrc.buf;
TCoeff *piDstCoeff = tu.getCoeffs(compID).buf;
double *pdCostCoeff = m_pdCostCoeff;
double *pdCostSig = m_pdCostSig;
double *pdCostCoeff0 = m_pdCostCoeff0;
int *rateIncUp = m_rateIncUp;
int *rateIncDown = m_rateIncDown;
int *sigRateDelta = m_sigRateDelta;
TCoeff *deltaU = m_deltaU;
memset(piDstCoeff, 0, sizeof(*piDstCoeff) * uiMaxNumCoeff);
memset( m_pdCostCoeff, 0, sizeof( double ) * uiMaxNumCoeff );
memset( m_pdCostSig, 0, sizeof( double ) * uiMaxNumCoeff );
memset( m_rateIncUp, 0, sizeof( int ) * uiMaxNumCoeff );
memset( m_rateIncDown, 0, sizeof( int ) * uiMaxNumCoeff );
memset( m_sigRateDelta, 0, sizeof( int ) * uiMaxNumCoeff );
memset( m_deltaU, 0, sizeof( TCoeff ) * uiMaxNumCoeff );
const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
const int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
const bool disableSMForLFNST = tu.cs->slice->getExplicitScalingListUsed() ? tu.cs->slice->getSPS()->getDisableScalingMatrixForLfnstBlks() : false;
const bool isLfnstApplied = tu.cu->lfnstIdx > 0 && (tu.cu->isSepTree() ? true : isLuma(compID));
const bool disableSMForACT = tu.cs->slice->getSPS()->getScalingMatrixForAlternativeColourSpaceDisabledFlag() && (tu.cs->slice->getSPS()->getScalingMatrixDesignatedColourSpaceFlag() == tu.cu->colorTransform);
const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied, disableSMForLFNST, disableSMForACT);
const int defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
const double defaultErrorScale = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
const int iCGSizeM1 = (1 << cctx.log2CGSize()) - 1;
int iCGLastScanPos = -1;
double d64BaseCost = 0;
int iLastScanPos = -1;
int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
int remRegBins = (uiWidth * uiHeight * ctxBinSampleRatio) >> 4;
uint32_t goRiceParam = 0;
double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
int iScanPos;
coeffGroupRDStats rdStats;
#if ENABLE_TRACING
DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
#endif
const uint32_t lfnstIdx = tu.cu->lfnstIdx;
const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
//遍历cg
cctx.initSubblock( subSetId );
uint32_t maxNonZeroPosInCG = iCGSizeM1;
if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
maxNonZeroPosInCG = 7;
memset( &rdStats, 0, sizeof (coeffGroupRDStats));
for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
iScanPos = cctx.minSubPos() + iScanPosinCG;
uint32_t blkPos = cctx.blockPos( iScanPos );
piDstCoeff[ blkPos ] = 0;
for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
//遍历cg中的点,按照z行扫描顺序遍历
iScanPos = cctx.minSubPos() + iScanPosinCG;
//===== quantization =====第一步,预量化
uint32_t uiBlkPos = cctx.blockPos(iScanPos);
// set coeff
//defaultQuantisationCoefficient是MF
const int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
const double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
//d*MF
const int64_t tmpLevel = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
//lLevelDouble,应该还是d*MF
const Intermediate_Int lLevelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
//计算出量化值
uint32_t uiMaxAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
const double dErr = double( lLevelDouble );
pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;//计算量化成0的cost
d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];//d64BlockUncodedCost表示tu内部全部量化为0的cost
piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;//把量化值放在piDstCoeff[ uiBlkPos ]中
if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
//设置当前tu块中最后一个量化系数
iLastScanPos = iScanPos;
iCGLastScanPos = cctx.subSetId();
if ( iLastScanPos >= 0 )//说明存在非0量化值
#if ENABLE_TRACING
uint32_t uiCGPosY = cctx.cgPosX();
uint32_t uiCGPosX = cctx.cgPosY();
uint32_t uiPosY = cctx.posY( iScanPos );
uint32_t uiPosX = cctx.posX( iScanPos );
DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
#endif
//===== coefficient level estimation =====第二步,确定最优量化值
unsigned ctxIdSig = 0;
if( iScanPos != iLastScanPos )//如果不是最后一个量化系数
ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
uint32_t uiLevel;
uint8_t ctxOffset = cctx.ctxOffsetAbs ();
uint32_t uiParCtx = cctx.parityCtxIdAbs ( ctxOffset );
uint32_t uiGt1Ctx = cctx.greater1CtxIdAbs ( ctxOffset );
uint32_t uiGt2Ctx = cctx.greater2CtxIdAbs ( ctxOffset );
uint32_t goRiceZero = 0;
if( remRegBins < 4 )
unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
goRiceParam = g_auiGoRiceParsCoeff [ sumAbs ];
goRiceZero = g_auiGoRicePosCoeff0(0, goRiceParam);
const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
if( iScanPos == iLastScanPos )//如果是最后一个量化系数
//在xGetCodedLevel中获取当前系数的最优量化值,放在uiLevel中
uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );
else//不是最后一个位置的量化系数
DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );
sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \\n", uiLevel );
DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\\n", (int64_t)( pdCostCoeff[iScanPos] ) );
deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
if( uiLevel > 0 )//量化值大于0的,计算附近3个值的rate
int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
else // uiLevel == 0
if( remRegBins < 4 )
int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
else
rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
piDstCoeff[ uiBlkPos ] = uiLevel;//当前量化值为uiLevel
d64BaseCost += pdCostCoeff [ iScanPos ];//计算cost和,d64BaseCost最后是整个tu的cost
if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
goRiceParam = 0;
else if( remRegBins >= 4 )
int sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
goRiceParam = g_auiGoRiceParsCoeff[sumAll];
remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
else//如果还不存在非0量化值
d64BaseCost += pdCostCoeff0[ iScanPos ];//那就加上量化为0 的cost
rdStats.d64SigCost += pdCostSig[ iScanPos ];//加上编码这个位置是否为0的cost
if (iScanPosinCG == 0 )//如果是cg中的左上角第一个点
rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];//编码cg中的左上角第一个点一个符号位的cost
if (piDstCoeff[ uiBlkPos ] )//如果当前系数非0
cctx.setSigGroup();
//d64CodedLevelandDist就加上(编码这个量化值的cost-符号的cost),d64CodedLevelandDist为量化值的cost(不加上符号的)
rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];//应该是只算error的cost和码率,不包括符号
rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];//d64UncodedDist为量化为全0的cost,只包括error和码率
if ( iScanPosinCG != 0 )//如果不是cg左上角第一个点,但是量化系数非0
rdStats.iNNZbeforePos0++;
//end for (iScanPosinCG)//当前cg内部的遍历
if (iCGLastScanPos >= 0)//如果目前tu块中已存在非0的量化系数
if( cctx.subSetId() )
if( !cctx.isSigGroup() )
const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
else
//跳过最后一个含有非0系数的cg,在下面的步骤(确定最后一个非0系数)时会处理它
if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
if ( rdStats.iNNZbeforePos0 == 0 )//说明除了左上角第一个点,其余量化值都为0
d64BaseCost -= rdStats.d64SigCost_0;//减去标识这个位置有非0系数的cost
rdStats.d64SigCost -= rdStats.d64SigCost_0;//rdStats.d64SigCost也减去标识这个位置有非0系数的cost
// rd-cost if SigCoeffGroupFlag = 0, initialization
//如果SigCoeffGroupFlag=0,则rd-cost初始化
double d64CostZeroCG = d64BaseCost; //这里获得的是目前算的tu中的cost
const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
if (cctx.subSetId() < iCGLastScanPos)
d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup,1);//1表示标识当前cg中含有非0系数的cost
d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);//0表示标识当前cg为全0的cost
pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);//标识当前cg中含有非0系数的cost
// try to convert the current coeff group from non-zero to all-zero
//计算把非0 的全部变为0的cost
//加上把非0变成0的失真
d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
//减去保持非0系数的成本
d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
//减去标识所有0和非0的cost
d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
// if we can save cost, change this block to all-zero block
if ( d64CostZeroCG < d64BaseCost )//如果当前cg量化为0的cost小于 保持量化值不变的cost
cctx.resetSigGroup();
d64BaseCost = d64CostZeroCG;
if (cctx.subSetId() < iCGLastScanPos)
pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
// reset coeffs to 0 in this block
for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
//遍历当前cg中的每个位置
iScanPos = cctx.minSubPos() + iScanPosinCG;
uint32_t uiBlkPos = cctx.blockPos( iScanPos );
if (piDstCoeff[ uiBlkPos ])//如果量化系数不为0
piDstCoeff [ uiBlkPos ] = 0;//将该处的量化系数置0
pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];//cost为置0的cost
pdCostSig [ iScanPos ] = 0;//标识是否含有非0系数的cost也为0
// end if ( d64CostAllZeros < d64BaseCost )
// end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
else
cctx.setSigGroup();
//end for (cctx.subSetId)//当前cg全部结束,进入下一个cg
//===== estimate last position =====第4步,确定最后一个位置
if ( iLastScanPos < 0 )//如果当前tu中所有cg遍历结束后,全是0,那么返回
return;
double d64BestCost = 0;
int iBestLastIdxP1 = 0;
if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
d64BestCost = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
else
bool previousCbf = tu.cbf[COMPONENT_Cb];
bool lastCbfIsInferred = false;
if( useIntraSubPartitions )
bool rootCbfSoFar = false;
bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth());
if( isLastSubPartition )
TransformUnit* tuPointer = tu.cu->firstTU;
for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, tu.depth);
tuPointer = tuPointer->next;
if( !rootCbfSoFar )
lastCbfIsInferred = true;
if( !lastCbfIsInferred )
previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
if( !lastCbfIsInferred )
d64BestCost = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
else
d64BestCost = d64BlockUncodedCost;//d64BlockUncodedCost为量化为全0的cost
int lastBitsX[LAST_SIGNIFICANT_GROUPS] = 0 ;
int lastBitsY[LAST_SIGNIFICANT_GROUPS] = 0 ;
int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
int bitsX = 0;
int bitsY = 0;
int ctxId;
//X-coordinate
for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
lastBitsX[ ctxId ] = bitsX + fB.intBits[ 0 ];
bitsX += fB.intBits[ 1 ];
lastBitsX[ctxId] = bitsX;
//Y-coordinate
for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
lastBitsY[ ctxId ] = bitsY + fB.intBits[ 0 ];
bitsY += fB.intBits[ 1 ];
lastBitsY[ctxId] = bitsY;
bool bFoundLast = false;
for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
//遍历cg(从最后一个有非0系数的cg开始)
d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];//先减去当前cg标识为(存在非0系数)的cost
if (cctx.isSigGroup( iCGScanPos ) )
uint32_t maxNonZeroPosInCG = iCGSizeM1;
if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
maxNonZeroPosInCG = 7;
for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
//遍历cg中的系数
iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
if (iScanPos > iLastScanPos)//如果iScanPos > 最后一个非0系数的位置(也就是还没遍历到最后一个非0系数哪里,则continue)
continue;
uint32_t uiBlkPos = cctx.blockPos( iScanPos );
if( piDstCoeff[ uiBlkPos ] )//如果当前量化值不为0,把当前系数作为最后一个量化系数
uint32_t uiPosY = uiBlkPos >> uiLog2BlockWidth;
uint32_t uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
double d64CostLast = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );//得到编码最后一个系数位置 的bits
//加上编码这一个系数的bits,减去编码标识这个系数为非0系数的cost
double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
if( totalCost < d64BestCost )//如果总的cost小于d64BestCost(d64BestCost为量化为全0的cost)
iBestLastIdxP1 = iScanPos + 1;
d64BestCost = totalCost;
if( piDstCoeff[ uiBlkPos ] > 1 )//如果遇到大于1的系数,那么跳出循环
bFoundLast = true;
break;
d64BaseCost -= pdCostCoeff[ iScanPos ];//减去保持量化值不变的cost
d64BaseCost += pdCostCoeff0[ iScanPos ];//加上量化为0的cost (因为要遍历下一个点了,当前的点应该置0了)
else//如果量化值为0
d64BaseCost -= pdCostSig[ iScanPos ];//那就减去标识这个位置为0的cost
//end for
if (bFoundLast)
break;
// end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\\n", (int64_t)( d64BlockUncodedCost ) );
DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded =%d\\n", (int64_t)( d64BaseCost ) );
// end for
for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
//对整个tu遍历,从刚刚选出的最后一个非0系数开始
//记录当前点的量化值,放在piDstCoeff中
int blkPos = cctx.blockPos( scanPos );
TCoeff level = piDstCoeff[ blkPos ];
uiAbsSum += level;
piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
//===== clean uncoded coefficients =====清除未编码的系数
for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
//SDH技术
if( cctx.signHiding() && uiAbsSum>=2)//如果使用SDH技术,并且系数绝对值之和大于等于2
const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
/ (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
+ 0.5);
int lastCG = -1;
int absSum = 0 ;
int n ;
for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
int subPos = subSet << cctx.log2CGSize();
int firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
absSum = 0 ;
for( n = iCGSizeM1; n >= 0; --n )
if( piDstCoeff[ cctx.blockPos( n + subPos )] )
lastNZPosInCG = n;
break;
for( n = 0; n <= iCGSizeM1; n++ )
if( piDstCoeff[ cctx.blockPos( n + subPos )] )
firstNZPosInCG = n;
break;
for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
if(lastNZPosInCG>=0 && lastCG==-1)
lastCG = 1;
if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
if( signbit!=(absSum&0x1) ) // hide but need tune
// calculate the cost
int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
int minPos = -1, finalChange = 0, curChange = 0;
for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
uint32_t uiBlkPos = cctx.blockPos( n + subPos );
if(piDstCoeff[ uiBlkPos ] != 0 )
int64_t costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
int64_t costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
costDown -= (4<<SCALE_BITS);
if(costUp<costDown)
curCost = costUp;
curChange = 1;
else
curChange = -1;
if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
curCost = std::numeric_limits<int64_t>::max();
else
curCost = costDown;
else
curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
curChange = 1 ;
if(n<firstNZPosInCG)
uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
if(thissignbit != signbit )
curCost = std::numeric_limits<int64_t>::max();
if( curCost<minCostInc)
minCostInc = curCost;
finalChange = curChange;
minPos = uiBlkPos;
if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
finalChange = -1;
if(plSrcCoeff[minPos]>=0)
piDstCoeff[minPos] += finalChange ;
else
piDstCoeff[minPos] -= finalChange ;
if(lastCG==1)
lastCG=0 ;
最后一段为SDH技术
SDH技术为:首先计算CG内所有非零系数幅值绝对值之和;然后对和值进行奇偶判断,若和值为偶数,则最后一个非零系数的符号被判为“+”,若和值为奇数,则最后一个非零系数的符号被判为“-”。使用SDH 技术,解码端直接判断CG中最后一个非零系数的符号,因此编码端可以省略它的语法元素coeff_sign_flag的嫡编码。然而,若SDH 的最终结果与CG中最后一个非零系数的真实符号不一致,需要对CG中的系数进行调整以使其保持一致,可以采用以下两种方法。
一种方法是编码过程中采用率失真优化量化 (RDOQ)的方法,即编码器允许使用SDH技术,通过调整量化系数,来使SDH判决结果与CG中最后一个非零系数的真实符号保持一致。具体哪个系数修改以及怎样修改,则根据率失真代价来决定。这种方法是基于RDOQ进行的,无须增加额外的运算量,因此编码复杂度增加不多。
对于不进行RDOQ的编码器,引入下面的方法。在一个CG中,计算原始系数值和反量化系数值之间的差值,对差值最大的量化值进行修正:若差值为正,则量化值加1,若差值为负,则量化值减1。由于差值最大的系数最接近其可行量化值,因此这种量化值的调整所产生的影响较小,且复杂度很低。
是否采用SDH技术需要显式标识,图像参数集中的语法元素sign_data_hiding_enabled_flag 置为1表示允许编码器应用SDH技术。具体使用方法规定:当编码器允许使用SDH技术且当前编码的CG中第一个非零系数和最后一个非零系数之间的间隔大于等于4时,则该CG才能省略最后一个非零系数符号的嫡编码。
以上是关于VTM10.0量化之RDOQ技术的主要内容,如果未能解决你的问题,请参考以下文章