forked from mirror/libbpg
1709 lines
55 KiB
C++
1709 lines
55 KiB
C++
/*****************************************************************************
|
|
* Copyright (C) 2013 x265 project
|
|
*
|
|
* Authors: Steve Borho <steve@borho.org>
|
|
* Min Chen <chenm003@163.com>
|
|
* Praveen Kumar Tiwari <praveen@multicorewareinc.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
|
*
|
|
* This program is also available under a commercial proprietary license.
|
|
* For more information, contact us at license @ x265.com.
|
|
*****************************************************************************/
|
|
|
|
#include "common.h"
|
|
#include "frame.h"
|
|
#include "framedata.h"
|
|
#include "picyuv.h"
|
|
#include "sao.h"
|
|
|
|
namespace {
|
|
|
|
inline int32_t roundIBDI(int32_t num, int32_t den)
|
|
{
|
|
return num >= 0 ? ((num * 2 + den) / (den * 2)) : -((-num * 2 + den) / (den * 2));
|
|
}
|
|
|
|
/* get the sign of input variable (TODO: this is a dup, make common) */
|
|
inline int8_t signOf(int x)
|
|
{
|
|
return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
|
|
}
|
|
|
|
inline int signOf2(const int a, const int b)
|
|
{
|
|
// NOTE: don't reorder below compare, both ICL, VC, GCC optimize strong depends on order!
|
|
int r = 0;
|
|
if (a < b)
|
|
r = -1;
|
|
if (a > b)
|
|
r = 1;
|
|
return r;
|
|
}
|
|
|
|
inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
|
|
{
|
|
return (count * offset - offsetOrg * 2) * offset;
|
|
}
|
|
} // end anonymous namespace
|
|
|
|
|
|
namespace X265_NS {
|
|
|
|
const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
|
|
{
|
|
1, // 0
|
|
2, // 1
|
|
0, // 2
|
|
3, // 3
|
|
4 // 4
|
|
};
|
|
|
|
SAO::SAO()
|
|
{
|
|
m_count = NULL;
|
|
m_offset = NULL;
|
|
m_offsetOrg = NULL;
|
|
m_countPreDblk = NULL;
|
|
m_offsetOrgPreDblk = NULL;
|
|
m_refDepth = 0;
|
|
m_lumaLambda = 0;
|
|
m_chromaLambda = 0;
|
|
m_param = NULL;
|
|
m_clipTable = NULL;
|
|
m_clipTableBase = NULL;
|
|
m_tmpU1[0] = NULL;
|
|
m_tmpU1[1] = NULL;
|
|
m_tmpU1[2] = NULL;
|
|
m_tmpU2[0] = NULL;
|
|
m_tmpU2[1] = NULL;
|
|
m_tmpU2[2] = NULL;
|
|
m_tmpL1 = NULL;
|
|
m_tmpL2 = NULL;
|
|
|
|
m_depthSaoRate[0][0] = 0;
|
|
m_depthSaoRate[0][1] = 0;
|
|
m_depthSaoRate[0][2] = 0;
|
|
m_depthSaoRate[0][3] = 0;
|
|
m_depthSaoRate[1][0] = 0;
|
|
m_depthSaoRate[1][1] = 0;
|
|
m_depthSaoRate[1][2] = 0;
|
|
m_depthSaoRate[1][3] = 0;
|
|
}
|
|
|
|
bool SAO::create(x265_param* param)
|
|
{
|
|
m_param = param;
|
|
if (param->internalCsp != X265_CSP_I400) {
|
|
m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
|
|
m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
|
|
m_numPlanes = 3;
|
|
} else {
|
|
m_numPlanes = 1;
|
|
}
|
|
|
|
m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
|
|
m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
|
|
|
|
const pixel maxY = (1 << X265_DEPTH) - 1;
|
|
const pixel rangeExt = maxY >> 1;
|
|
int numCtu = m_numCuInWidth * m_numCuInHeight;
|
|
|
|
CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
|
|
|
|
CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);
|
|
CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1);
|
|
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
// SAO asm code will read 1 pixel before and after, so pad by 2
|
|
CHECKED_MALLOC(m_tmpU1[i], pixel, m_param->sourceWidth + 2);
|
|
m_tmpU1[i] += 1;
|
|
CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth + 2);
|
|
m_tmpU2[i] += 1;
|
|
}
|
|
|
|
CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
|
|
CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
|
|
CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
|
|
|
|
CHECKED_MALLOC(m_countPreDblk, PerPlane, numCtu);
|
|
CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numCtu);
|
|
|
|
m_clipTable = &(m_clipTableBase[rangeExt]);
|
|
|
|
for (int i = 0; i < rangeExt; i++)
|
|
m_clipTableBase[i] = 0;
|
|
|
|
for (int i = 0; i < maxY; i++)
|
|
m_clipTable[i] = (pixel)i;
|
|
|
|
for (int i = maxY; i < maxY + rangeExt; i++)
|
|
m_clipTable[i] = maxY;
|
|
|
|
return true;
|
|
|
|
fail:
|
|
return false;
|
|
}
|
|
|
|
void SAO::destroy()
|
|
{
|
|
X265_FREE(m_clipTableBase);
|
|
|
|
X265_FREE(m_tmpL1);
|
|
X265_FREE(m_tmpL2);
|
|
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
if (m_tmpU1[i]) X265_FREE(m_tmpU1[i] - 1);
|
|
if (m_tmpU2[i]) X265_FREE(m_tmpU2[i] - 1);
|
|
}
|
|
|
|
X265_FREE(m_count);
|
|
X265_FREE(m_offset);
|
|
X265_FREE(m_offsetOrg);
|
|
X265_FREE(m_countPreDblk);
|
|
X265_FREE(m_offsetOrgPreDblk);
|
|
}
|
|
|
|
/* allocate memory for SAO parameters */
|
|
void SAO::allocSaoParam(SAOParam* saoParam) const
|
|
{
|
|
saoParam->numCuInWidth = m_numCuInWidth;
|
|
|
|
saoParam->ctuParam[0] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
|
|
saoParam->ctuParam[1] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
|
|
saoParam->ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
|
|
}
|
|
|
|
void SAO::startSlice(Frame* frame, Entropy& initState, int qp)
|
|
{
|
|
Slice* slice = frame->m_encData->m_slice;
|
|
int qpCb = qp;
|
|
if (m_param->internalCsp == X265_CSP_I420)
|
|
qpCb = x265_clip3(QP_MIN, QP_MAX_MAX, (int)g_chromaScale[qp + slice->m_pps->chromaQpOffset[0]]);
|
|
else
|
|
qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0], QP_MAX_SPEC);
|
|
m_lumaLambda = x265_lambda2_tab[qp];
|
|
m_chromaLambda = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma
|
|
m_frame = frame;
|
|
|
|
switch (slice->m_sliceType)
|
|
{
|
|
case I_SLICE:
|
|
m_refDepth = 0;
|
|
break;
|
|
case P_SLICE:
|
|
m_refDepth = 1;
|
|
break;
|
|
case B_SLICE:
|
|
m_refDepth = 2 + !IS_REFERENCED(frame);
|
|
break;
|
|
}
|
|
|
|
resetStats();
|
|
|
|
m_entropyCoder.load(initState);
|
|
m_rdContexts.next.load(initState);
|
|
m_rdContexts.cur.load(initState);
|
|
|
|
SAOParam* saoParam = frame->m_encData->m_saoParam;
|
|
if (!saoParam)
|
|
{
|
|
saoParam = new SAOParam;
|
|
allocSaoParam(saoParam);
|
|
frame->m_encData->m_saoParam = saoParam;
|
|
}
|
|
|
|
saoParam->bSaoFlag[0] = true;
|
|
saoParam->bSaoFlag[1] = (m_numPlanes > 1);
|
|
|
|
m_numNoSao[0] = 0; // Luma
|
|
m_numNoSao[1] = 0; // Chroma
|
|
|
|
// NOTE: Allow SAO automatic turn-off only when frame parallelism is disabled.
|
|
if (m_param->frameNumThreads == 1)
|
|
{
|
|
if (m_refDepth > 0 && m_depthSaoRate[0][m_refDepth - 1] > SAO_ENCODING_RATE)
|
|
saoParam->bSaoFlag[0] = false;
|
|
if (m_refDepth > 0 && m_depthSaoRate[1][m_refDepth - 1] > SAO_ENCODING_RATE_CHROMA)
|
|
saoParam->bSaoFlag[1] = false;
|
|
}
|
|
}
|
|
|
|
// CTU-based SAO process without slice granularity
|
|
void SAO::processSaoCu(int addr, int typeIdx, int plane)
|
|
{
|
|
int x, y;
|
|
PicYuv* reconPic = m_frame->m_reconPic;
|
|
pixel* rec = reconPic->getPlaneAddr(plane, addr);
|
|
intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
|
|
uint32_t picWidth = m_param->sourceWidth;
|
|
uint32_t picHeight = m_param->sourceHeight;
|
|
const CUData* cu = m_frame->m_encData->getPicCTU(addr);
|
|
int ctuWidth = g_maxCUSize;
|
|
int ctuHeight = g_maxCUSize;
|
|
uint32_t lpelx = cu->m_cuPelX;
|
|
uint32_t tpely = cu->m_cuPelY;
|
|
if (plane)
|
|
{
|
|
picWidth >>= m_hChromaShift;
|
|
picHeight >>= m_vChromaShift;
|
|
ctuWidth >>= m_hChromaShift;
|
|
ctuHeight >>= m_vChromaShift;
|
|
lpelx >>= m_hChromaShift;
|
|
tpely >>= m_vChromaShift;
|
|
}
|
|
uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
|
|
uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
|
|
ctuWidth = rpelx - lpelx;
|
|
ctuHeight = bpely - tpely;
|
|
|
|
int startX;
|
|
int startY;
|
|
int endX;
|
|
int endY;
|
|
pixel* tmpL;
|
|
pixel* tmpU;
|
|
|
|
int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1, signLeft1[2];
|
|
int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
|
|
|
|
memset(_upBuff1 + MAX_CU_SIZE, 0, 2 * sizeof(int8_t)); /* avoid valgrind uninit warnings */
|
|
|
|
{
|
|
const pixel* recR = &rec[ctuWidth - 1];
|
|
for (int i = 0; i < ctuHeight + 1; i++)
|
|
{
|
|
m_tmpL2[i] = *recR;
|
|
recR += stride;
|
|
}
|
|
|
|
tmpL = m_tmpL1;
|
|
tmpU = &(m_tmpU1[plane][lpelx]);
|
|
}
|
|
|
|
switch (typeIdx)
|
|
{
|
|
case SAO_EO_0: // dir: -
|
|
{
|
|
pixel firstPxl = 0, lastPxl = 0, row1FirstPxl = 0, row1LastPxl = 0;
|
|
startX = !lpelx;
|
|
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
|
|
if (ctuWidth & 15)
|
|
{
|
|
for (y = 0; y < ctuHeight; y++)
|
|
{
|
|
int signLeft = signOf(rec[startX] - tmpL[y]);
|
|
for (x = startX; x < endX; x++)
|
|
{
|
|
int signRight = signOf(rec[x] - rec[x + 1]);
|
|
int edgeType = signRight + signLeft + 2;
|
|
signLeft = -signRight;
|
|
|
|
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
|
|
}
|
|
|
|
rec += stride;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (y = 0; y < ctuHeight; y += 2)
|
|
{
|
|
signLeft1[0] = signOf(rec[startX] - tmpL[y]);
|
|
signLeft1[1] = signOf(rec[stride + startX] - tmpL[y + 1]);
|
|
|
|
if (!lpelx)
|
|
{
|
|
firstPxl = rec[0];
|
|
row1FirstPxl = rec[stride];
|
|
}
|
|
|
|
if (rpelx == picWidth)
|
|
{
|
|
lastPxl = rec[ctuWidth - 1];
|
|
row1LastPxl = rec[stride + ctuWidth - 1];
|
|
}
|
|
|
|
primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, signLeft1, stride);
|
|
|
|
if (!lpelx)
|
|
{
|
|
rec[0] = firstPxl;
|
|
rec[stride] = row1FirstPxl;
|
|
}
|
|
|
|
if (rpelx == picWidth)
|
|
{
|
|
rec[ctuWidth - 1] = lastPxl;
|
|
rec[stride + ctuWidth - 1] = row1LastPxl;
|
|
}
|
|
|
|
rec += 2 * stride;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case SAO_EO_1: // dir: |
|
|
{
|
|
startY = !tpely;
|
|
endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
|
|
if (!tpely)
|
|
rec += stride;
|
|
|
|
if (ctuWidth & 15)
|
|
{
|
|
for (x = 0; x < ctuWidth; x++)
|
|
upBuff1[x] = signOf(rec[x] - tmpU[x]);
|
|
|
|
for (y = startY; y < endY; y++)
|
|
{
|
|
for (x = 0; x < ctuWidth; x++)
|
|
{
|
|
int8_t signDown = signOf(rec[x] - rec[x + stride]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x] = -signDown;
|
|
|
|
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
|
|
}
|
|
|
|
rec += stride;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
primitives.sign(upBuff1, rec, tmpU, ctuWidth);
|
|
|
|
int diff = (endY - startY) % 2;
|
|
for (y = startY; y < endY - diff; y += 2)
|
|
{
|
|
primitives.saoCuOrgE1_2Rows(rec, upBuff1, m_offsetEo, stride, ctuWidth);
|
|
rec += 2 * stride;
|
|
}
|
|
if (diff & 1)
|
|
primitives.saoCuOrgE1(rec, upBuff1, m_offsetEo, stride, ctuWidth);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case SAO_EO_2: // dir: 135
|
|
{
|
|
startX = !lpelx;
|
|
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
|
|
|
|
startY = !tpely;
|
|
endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
|
|
|
|
if (!tpely)
|
|
rec += stride;
|
|
|
|
if (!(ctuWidth & 15))
|
|
{
|
|
int8_t firstSign, lastSign;
|
|
|
|
if (!lpelx)
|
|
firstSign = upBuff1[0];
|
|
|
|
if (rpelx == picWidth)
|
|
lastSign = upBuff1[ctuWidth - 1];
|
|
|
|
primitives.sign(upBuff1, rec, &tmpU[- 1], ctuWidth);
|
|
|
|
if (!lpelx)
|
|
upBuff1[0] = firstSign;
|
|
|
|
if (rpelx == picWidth)
|
|
upBuff1[ctuWidth - 1] = lastSign;
|
|
}
|
|
else
|
|
{
|
|
for (x = startX; x < endX; x++)
|
|
upBuff1[x] = signOf(rec[x] - tmpU[x - 1]);
|
|
}
|
|
|
|
if (ctuWidth & 15)
|
|
{
|
|
for (y = startY; y < endY; y++)
|
|
{
|
|
upBufft[startX] = signOf(rec[stride + startX] - tmpL[y]);
|
|
for (x = startX; x < endX; x++)
|
|
{
|
|
int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBufft[x + 1] = -signDown;
|
|
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
|
|
}
|
|
|
|
std::swap(upBuff1, upBufft);
|
|
|
|
rec += stride;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (y = startY; y < endY; y++)
|
|
{
|
|
int8_t iSignDown2 = signOf(rec[stride + startX] - tmpL[y]);
|
|
|
|
primitives.saoCuOrgE2[endX > 16](rec + startX, upBufft + startX, upBuff1 + startX, m_offsetEo, endX - startX, stride);
|
|
|
|
upBufft[startX] = iSignDown2;
|
|
|
|
std::swap(upBuff1, upBufft);
|
|
rec += stride;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case SAO_EO_3: // dir: 45
|
|
{
|
|
startX = !lpelx;
|
|
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
|
|
|
|
startY = !tpely;
|
|
endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
|
|
|
|
if (!tpely)
|
|
rec += stride;
|
|
|
|
if (ctuWidth & 15)
|
|
{
|
|
for (x = startX - 1; x < endX; x++)
|
|
upBuff1[x] = signOf(rec[x] - tmpU[x + 1]);
|
|
|
|
for (y = startY; y < endY; y++)
|
|
{
|
|
x = startX;
|
|
int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x - 1] = -signDown;
|
|
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
|
|
|
|
for (x = startX + 1; x < endX; x++)
|
|
{
|
|
signDown = signOf(rec[x] - rec[x + stride - 1]);
|
|
edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x - 1] = -signDown;
|
|
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
|
|
}
|
|
|
|
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
|
|
|
|
rec += stride;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int8_t firstSign, lastSign;
|
|
|
|
if (lpelx)
|
|
firstSign = signOf(rec[-1] - tmpU[0]);
|
|
if (rpelx == picWidth)
|
|
lastSign = upBuff1[ctuWidth - 1];
|
|
|
|
primitives.sign(upBuff1, rec, &tmpU[1], ctuWidth);
|
|
|
|
if (lpelx)
|
|
upBuff1[-1] = firstSign;
|
|
if (rpelx == picWidth)
|
|
upBuff1[ctuWidth - 1] = lastSign;
|
|
|
|
for (y = startY; y < endY; y++)
|
|
{
|
|
x = startX;
|
|
int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x - 1] = -signDown;
|
|
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
|
|
|
|
primitives.saoCuOrgE3[endX > 16](rec, upBuff1, m_offsetEo, stride - 1, startX, endX);
|
|
|
|
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
|
|
|
|
rec += stride;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case SAO_BO:
|
|
{
|
|
const int8_t* offsetBo = m_offsetBo;
|
|
|
|
if (ctuWidth & 15)
|
|
{
|
|
#define SAO_BO_BITS 5
|
|
const int boShift = X265_DEPTH - SAO_BO_BITS;
|
|
for (y = 0; y < ctuHeight; y++)
|
|
{
|
|
for (x = 0; x < ctuWidth; x++)
|
|
{
|
|
int val = rec[x] + offsetBo[rec[x] >> boShift];
|
|
if (val < 0)
|
|
val = 0;
|
|
else if (val > ((1 << X265_DEPTH) - 1))
|
|
val = ((1 << X265_DEPTH) - 1);
|
|
rec[x] = (pixel)val;
|
|
}
|
|
rec += stride;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
primitives.saoCuOrgB0(rec, offsetBo, ctuWidth, ctuHeight, stride);
|
|
}
|
|
break;
|
|
}
|
|
default: break;
|
|
}
|
|
|
|
// if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
|
|
std::swap(m_tmpL1, m_tmpL2);
|
|
}
|
|
|
|
/* Process SAO all units */
|
|
void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
|
|
{
|
|
PicYuv* reconPic = m_frame->m_reconPic;
|
|
intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
|
|
uint32_t picWidth = m_param->sourceWidth;
|
|
int ctuWidth = g_maxCUSize;
|
|
int ctuHeight = g_maxCUSize;
|
|
if (plane)
|
|
{
|
|
picWidth >>= m_hChromaShift;
|
|
ctuWidth >>= m_hChromaShift;
|
|
ctuHeight >>= m_vChromaShift;
|
|
}
|
|
|
|
if (!idxY)
|
|
{
|
|
pixel* rec = reconPic->m_picOrg[plane];
|
|
memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
|
|
}
|
|
|
|
int addr = idxY * m_numCuInWidth;
|
|
pixel* rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
|
|
|
|
for (int i = 0; i < ctuHeight + 1; i++)
|
|
{
|
|
m_tmpL1[i] = rec[0];
|
|
rec += stride;
|
|
}
|
|
|
|
rec -= (stride << 1);
|
|
|
|
memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidth);
|
|
|
|
for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
|
|
{
|
|
addr = idxY * m_numCuInWidth + idxX;
|
|
|
|
bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
|
|
int typeIdx = ctuParam[addr].typeIdx;
|
|
|
|
if (typeIdx >= 0)
|
|
{
|
|
if (!mergeLeftFlag)
|
|
{
|
|
if (typeIdx == SAO_BO)
|
|
{
|
|
memset(m_offsetBo, 0, sizeof(m_offsetBo));
|
|
|
|
for (int i = 0; i < SAO_NUM_OFFSET; i++)
|
|
m_offsetBo[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
|
|
}
|
|
else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
|
|
{
|
|
int offset[NUM_EDGETYPE];
|
|
offset[0] = 0;
|
|
for (int i = 0; i < SAO_NUM_OFFSET; i++)
|
|
offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
|
|
|
|
for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
|
|
m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
|
|
}
|
|
}
|
|
processSaoCu(addr, typeIdx, plane);
|
|
}
|
|
else if (idxX != (m_numCuInWidth - 1))
|
|
{
|
|
rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
|
|
|
|
for (int i = 0; i < ctuHeight + 1; i++)
|
|
{
|
|
m_tmpL1[i] = rec[ctuWidth - 1];
|
|
rec += stride;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::swap(m_tmpU1[plane], m_tmpU2[plane]);
|
|
}
|
|
|
|
void SAO::resetSaoUnit(SaoCtuParam* saoUnit)
|
|
{
|
|
saoUnit->mergeMode = SAO_MERGE_NONE;
|
|
saoUnit->typeIdx = -1;
|
|
saoUnit->bandPos = 0;
|
|
|
|
for (int i = 0; i < SAO_NUM_OFFSET; i++)
|
|
saoUnit->offset[i] = 0;
|
|
}
|
|
|
|
void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
|
|
{
|
|
saoUnitDst->mergeMode = saoUnitSrc->mergeMode;
|
|
saoUnitDst->typeIdx = saoUnitSrc->typeIdx;
|
|
saoUnitDst->bandPos = saoUnitSrc->bandPos;
|
|
|
|
for (int i = 0; i < SAO_NUM_OFFSET; i++)
|
|
saoUnitDst->offset[i] = saoUnitSrc->offset[i];
|
|
}
|
|
|
|
/* Calculate SAO statistics for current CTU without non-crossing slice */
|
|
void SAO::calcSaoStatsCu(int addr, int plane)
|
|
{
|
|
const PicYuv* reconPic = m_frame->m_reconPic;
|
|
const CUData* cu = m_frame->m_encData->getPicCTU(addr);
|
|
const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
|
|
const pixel* rec0 = reconPic->getPlaneAddr(plane, addr);
|
|
const pixel* fenc;
|
|
const pixel* rec;
|
|
intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
|
|
uint32_t picWidth = m_param->sourceWidth;
|
|
uint32_t picHeight = m_param->sourceHeight;
|
|
int ctuWidth = g_maxCUSize;
|
|
int ctuHeight = g_maxCUSize;
|
|
uint32_t lpelx = cu->m_cuPelX;
|
|
uint32_t tpely = cu->m_cuPelY;
|
|
if (plane)
|
|
{
|
|
picWidth >>= m_hChromaShift;
|
|
picHeight >>= m_vChromaShift;
|
|
ctuWidth >>= m_hChromaShift;
|
|
ctuHeight >>= m_vChromaShift;
|
|
lpelx >>= m_hChromaShift;
|
|
tpely >>= m_vChromaShift;
|
|
}
|
|
uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
|
|
uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
|
|
ctuWidth = rpelx - lpelx;
|
|
ctuHeight = bpely - tpely;
|
|
|
|
int startX;
|
|
int startY;
|
|
int endX;
|
|
int endY;
|
|
|
|
int skipB = plane ? 2 : 4;
|
|
int skipR = plane ? 3 : 5;
|
|
|
|
int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
|
|
int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
|
|
|
|
// SAO_BO:
|
|
{
|
|
if (m_param->bSaoNonDeblocked)
|
|
{
|
|
skipB = plane ? 1 : 3;
|
|
skipR = plane ? 2 : 4;
|
|
}
|
|
|
|
endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
|
|
endY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
|
|
|
|
primitives.saoCuStatsBO(fenc0, rec0, stride, endX, endY, m_offsetOrg[plane][SAO_BO], m_count[plane][SAO_BO]);
|
|
}
|
|
|
|
{
|
|
// SAO_EO_0: // dir: -
|
|
{
|
|
if (m_param->bSaoNonDeblocked)
|
|
{
|
|
skipB = plane ? 1 : 3;
|
|
skipR = plane ? 3 : 5;
|
|
}
|
|
|
|
startX = !lpelx;
|
|
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
|
|
|
|
primitives.saoCuStatsE0(fenc0 + startX, rec0 + startX, stride, endX - startX, ctuHeight - skipB, m_offsetOrg[plane][SAO_EO_0], m_count[plane][SAO_EO_0]);
|
|
}
|
|
|
|
// SAO_EO_1: // dir: |
|
|
{
|
|
if (m_param->bSaoNonDeblocked)
|
|
{
|
|
skipB = plane ? 2 : 4;
|
|
skipR = plane ? 2 : 4;
|
|
}
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startY = !tpely;
|
|
endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
|
|
endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
|
|
if (!tpely)
|
|
{
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
primitives.sign(upBuff1, rec, &rec[- stride], ctuWidth);
|
|
|
|
primitives.saoCuStatsE1(fenc0 + startY * stride, rec0 + startY * stride, stride, upBuff1, endX, endY - startY, m_offsetOrg[plane][SAO_EO_1], m_count[plane][SAO_EO_1]);
|
|
}
|
|
|
|
// SAO_EO_2: // dir: 135
|
|
{
|
|
if (m_param->bSaoNonDeblocked)
|
|
{
|
|
skipB = plane ? 2 : 4;
|
|
skipR = plane ? 3 : 5;
|
|
}
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = !lpelx;
|
|
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
|
|
|
|
startY = !tpely;
|
|
endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
|
|
if (!tpely)
|
|
{
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
primitives.sign(&upBuff1[startX], &rec[startX], &rec[startX - stride - 1], (endX - startX));
|
|
|
|
primitives.saoCuStatsE2(fenc0 + startX + startY * stride, rec0 + startX + startY * stride, stride, upBuff1 + startX, upBufft + startX, endX - startX, endY - startY, m_offsetOrg[plane][SAO_EO_2], m_count[plane][SAO_EO_2]);
|
|
}
|
|
|
|
// SAO_EO_3: // dir: 45
|
|
{
|
|
if (m_param->bSaoNonDeblocked)
|
|
{
|
|
skipB = plane ? 2 : 4;
|
|
skipR = plane ? 3 : 5;
|
|
}
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = !lpelx;
|
|
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
|
|
|
|
startY = !tpely;
|
|
endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
|
|
|
|
if (!tpely)
|
|
{
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
primitives.sign(&upBuff1[startX - 1], &rec[startX - 1], &rec[startX - 1 - stride + 1], (endX - startX + 1));
|
|
|
|
primitives.saoCuStatsE3(fenc0 + startX + startY * stride, rec0 + startX + startY * stride, stride, upBuff1 + startX, endX - startX, endY - startY, m_offsetOrg[plane][SAO_EO_3], m_count[plane][SAO_EO_3]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SAO::calcSaoStatsCu_BeforeDblk(Frame* frame, int idxX, int idxY)
|
|
{
|
|
int addr = idxX + m_numCuInWidth * idxY;
|
|
|
|
int x, y;
|
|
const CUData* cu = frame->m_encData->getPicCTU(addr);
|
|
const PicYuv* reconPic = m_frame->m_reconPic;
|
|
const pixel* fenc;
|
|
const pixel* rec;
|
|
intptr_t stride = reconPic->m_stride;
|
|
uint32_t picWidth = m_param->sourceWidth;
|
|
uint32_t picHeight = m_param->sourceHeight;
|
|
int ctuWidth = g_maxCUSize;
|
|
int ctuHeight = g_maxCUSize;
|
|
uint32_t lpelx = cu->m_cuPelX;
|
|
uint32_t tpely = cu->m_cuPelY;
|
|
uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
|
|
uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
|
|
ctuWidth = rpelx - lpelx;
|
|
ctuHeight = bpely - tpely;
|
|
|
|
int startX;
|
|
int startY;
|
|
int endX;
|
|
int endY;
|
|
int firstX, firstY;
|
|
int32_t* stats;
|
|
int32_t* count;
|
|
|
|
int skipB, skipR;
|
|
|
|
int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
|
|
int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
|
|
|
|
const int boShift = X265_DEPTH - SAO_BO_BITS;
|
|
|
|
memset(m_countPreDblk[addr], 0, sizeof(PerPlane));
|
|
memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
|
|
|
|
for (int plane = 0; plane < NUM_PLANE; plane++)
|
|
{
|
|
if (plane == 1)
|
|
{
|
|
stride = reconPic->m_strideC;
|
|
picWidth >>= m_hChromaShift;
|
|
picHeight >>= m_vChromaShift;
|
|
ctuWidth >>= m_hChromaShift;
|
|
ctuHeight >>= m_vChromaShift;
|
|
lpelx >>= m_hChromaShift;
|
|
tpely >>= m_vChromaShift;
|
|
rpelx >>= m_hChromaShift;
|
|
bpely >>= m_vChromaShift;
|
|
}
|
|
|
|
// SAO_BO:
|
|
|
|
skipB = plane ? 1 : 3;
|
|
skipR = plane ? 2 : 4;
|
|
|
|
stats = m_offsetOrgPreDblk[addr][plane][SAO_BO];
|
|
count = m_countPreDblk[addr][plane][SAO_BO];
|
|
|
|
const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
|
|
const pixel* rec0 = reconPic->getPlaneAddr(plane, addr);
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
|
|
startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
|
|
|
|
for (y = 0; y < ctuHeight; y++)
|
|
{
|
|
for (x = (y < startY ? startX : 0); x < ctuWidth; x++)
|
|
{
|
|
int classIdx = 1 + (rec[x] >> boShift);
|
|
stats[classIdx] += (fenc[x] - rec[x]);
|
|
count[classIdx]++;
|
|
}
|
|
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
// SAO_EO_0: // dir: -
|
|
{
|
|
skipB = plane ? 1 : 3;
|
|
skipR = plane ? 3 : 5;
|
|
|
|
stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0];
|
|
count = m_countPreDblk[addr][plane][SAO_EO_0];
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
|
|
startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
|
|
firstX = !lpelx;
|
|
// endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
|
|
endX = ctuWidth - 1; // not refer right CTU
|
|
|
|
for (y = 0; y < ctuHeight; y++)
|
|
{
|
|
x = (y < startY ? startX : firstX);
|
|
int signLeft = signOf(rec[x] - rec[x - 1]);
|
|
for (; x < endX; x++)
|
|
{
|
|
int signRight = signOf(rec[x] - rec[x + 1]);
|
|
int edgeType = signRight + signLeft + 2;
|
|
signLeft = -signRight;
|
|
|
|
stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
|
|
count[s_eoTable[edgeType]]++;
|
|
}
|
|
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
}
|
|
|
|
// SAO_EO_1: // dir: |
|
|
{
|
|
skipB = plane ? 2 : 4;
|
|
skipR = plane ? 2 : 4;
|
|
|
|
stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1];
|
|
count = m_countPreDblk[addr][plane][SAO_EO_1];
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
|
|
startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
|
|
firstY = !tpely;
|
|
// endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
|
|
endY = ctuHeight - 1; // not refer below CTU
|
|
if (!tpely)
|
|
{
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
for (x = startX; x < ctuWidth; x++)
|
|
upBuff1[x] = signOf(rec[x] - rec[x - stride]);
|
|
|
|
for (y = firstY; y < endY; y++)
|
|
{
|
|
for (x = (y < startY - 1 ? startX : 0); x < ctuWidth; x++)
|
|
{
|
|
int signDown = signOf(rec[x] - rec[x + stride]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x] = -signDown;
|
|
|
|
if (x < startX && y < startY)
|
|
continue;
|
|
|
|
stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
|
|
count[s_eoTable[edgeType]]++;
|
|
}
|
|
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
}
|
|
|
|
// SAO_EO_2: // dir: 135
|
|
{
|
|
skipB = plane ? 2 : 4;
|
|
skipR = plane ? 3 : 5;
|
|
|
|
stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2];
|
|
count = m_countPreDblk[addr][plane][SAO_EO_2];
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
|
|
startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
|
|
firstX = !lpelx;
|
|
firstY = !tpely;
|
|
// endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
|
|
// endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
|
|
endX = ctuWidth - 1; // not refer right CTU
|
|
endY = ctuHeight - 1; // not refer below CTU
|
|
if (!tpely)
|
|
{
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
for (x = startX; x < endX; x++)
|
|
upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
|
|
|
|
for (y = firstY; y < endY; y++)
|
|
{
|
|
x = (y < startY - 1 ? startX : firstX);
|
|
upBufft[x] = signOf(rec[x + stride] - rec[x - 1]);
|
|
for (; x < endX; x++)
|
|
{
|
|
int signDown = signOf(rec[x] - rec[x + stride + 1]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBufft[x + 1] = -signDown;
|
|
|
|
if (x < startX && y < startY)
|
|
continue;
|
|
|
|
stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
|
|
count[s_eoTable[edgeType]]++;
|
|
}
|
|
|
|
std::swap(upBuff1, upBufft);
|
|
|
|
rec += stride;
|
|
fenc += stride;
|
|
}
|
|
}
|
|
|
|
// SAO_EO_3: // dir: 45
|
|
{
|
|
skipB = plane ? 2 : 4;
|
|
skipR = plane ? 3 : 5;
|
|
|
|
stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3];
|
|
count = m_countPreDblk[addr][plane][SAO_EO_3];
|
|
|
|
fenc = fenc0;
|
|
rec = rec0;
|
|
|
|
startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
|
|
startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
|
|
firstX = !lpelx;
|
|
firstY = !tpely;
|
|
// endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
|
|
// endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
|
|
endX = ctuWidth - 1; // not refer right CTU
|
|
endY = ctuHeight - 1; // not refer below CTU
|
|
if (!tpely)
|
|
{
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
for (x = startX - 1; x < endX; x++)
|
|
upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
|
|
|
|
for (y = firstY; y < endY; y++)
|
|
{
|
|
for (x = (y < startY - 1 ? startX : firstX); x < endX; x++)
|
|
{
|
|
int signDown = signOf(rec[x] - rec[x + stride - 1]);
|
|
int edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x - 1] = -signDown;
|
|
|
|
if (x < startX && y < startY)
|
|
continue;
|
|
|
|
stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
|
|
count[s_eoTable[edgeType]]++;
|
|
}
|
|
|
|
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
|
|
|
|
rec += stride;
|
|
fenc += stride;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* reset offset statistics */
|
|
void SAO::resetStats()
|
|
{
|
|
memset(m_count, 0, sizeof(PerClass) * NUM_PLANE);
|
|
memset(m_offset, 0, sizeof(PerClass) * NUM_PLANE);
|
|
memset(m_offsetOrg, 0, sizeof(PerClass) * NUM_PLANE);
|
|
}
|
|
|
|
void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
|
|
{
|
|
if (!saoParam->bSaoFlag[0])
|
|
m_depthSaoRate[0][m_refDepth] = 1.0;
|
|
else
|
|
m_depthSaoRate[0][m_refDepth] = m_numNoSao[0] / ((double)numctus);
|
|
|
|
if (!saoParam->bSaoFlag[1])
|
|
m_depthSaoRate[1][m_refDepth] = 1.0;
|
|
else
|
|
m_depthSaoRate[1][m_refDepth] = m_numNoSao[1] / ((double)numctus);
|
|
}
|
|
|
|
void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY)
|
|
{
|
|
SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
|
|
double mergeDist[NUM_MERGE_MODE];
|
|
bool allowMerge[2]; // left, up
|
|
allowMerge[1] = (idxY > 0);
|
|
|
|
for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
|
|
{
|
|
int addr = idxX + idxY * m_numCuInWidth;
|
|
int addrUp = idxY ? addr - m_numCuInWidth : -1;
|
|
int addrLeft = idxX ? addr - 1 : -1;
|
|
allowMerge[0] = (idxX > 0);
|
|
|
|
m_entropyCoder.load(m_rdContexts.cur);
|
|
if (allowMerge[0])
|
|
m_entropyCoder.codeSaoMerge(0);
|
|
if (allowMerge[1])
|
|
m_entropyCoder.codeSaoMerge(0);
|
|
m_entropyCoder.store(m_rdContexts.temp);
|
|
// reset stats Y, Cb, Cr
|
|
for (int plane = 0; plane < m_numPlanes; plane++)
|
|
{
|
|
for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
|
|
{
|
|
for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
|
|
{
|
|
m_offset[plane][j][k] = 0;
|
|
if (m_param->bSaoNonDeblocked)
|
|
{
|
|
m_count[plane][j][k] = m_countPreDblk[addr][plane][j][k];
|
|
m_offsetOrg[plane][j][k] = m_offsetOrgPreDblk[addr][plane][j][k];
|
|
}
|
|
else
|
|
{
|
|
m_count[plane][j][k] = 0;
|
|
m_offsetOrg[plane][j][k] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
saoParam->ctuParam[plane][addr].mergeMode = SAO_MERGE_NONE;
|
|
saoParam->ctuParam[plane][addr].typeIdx = -1;
|
|
saoParam->ctuParam[plane][addr].bandPos = 0;
|
|
if (saoParam->bSaoFlag[plane > 0])
|
|
calcSaoStatsCu(addr, plane);
|
|
}
|
|
|
|
saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
|
|
|
|
if (m_numPlanes > 1) {
|
|
sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
|
|
}
|
|
|
|
if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
|
|
{
|
|
// Cost of new SAO_params
|
|
m_entropyCoder.load(m_rdContexts.cur);
|
|
m_entropyCoder.resetBits();
|
|
if (allowMerge[0])
|
|
m_entropyCoder.codeSaoMerge(0);
|
|
if (allowMerge[1])
|
|
m_entropyCoder.codeSaoMerge(0);
|
|
for (int plane = 0; plane < m_numPlanes; plane++)
|
|
{
|
|
if (saoParam->bSaoFlag[plane > 0])
|
|
m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
|
|
}
|
|
|
|
uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
|
|
double bestCost = mergeDist[0] + (double)rate;
|
|
m_entropyCoder.store(m_rdContexts.temp);
|
|
|
|
// Cost of Merge
|
|
for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
|
|
{
|
|
if (!allowMerge[mergeIdx])
|
|
continue;
|
|
|
|
m_entropyCoder.load(m_rdContexts.cur);
|
|
m_entropyCoder.resetBits();
|
|
if (allowMerge[0])
|
|
m_entropyCoder.codeSaoMerge(1 - mergeIdx);
|
|
if (allowMerge[1] && (mergeIdx == 1))
|
|
m_entropyCoder.codeSaoMerge(1);
|
|
|
|
rate = m_entropyCoder.getNumberOfWrittenBits();
|
|
double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
|
|
if (mergeCost < bestCost)
|
|
{
|
|
SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
|
|
bestCost = mergeCost;
|
|
m_entropyCoder.store(m_rdContexts.temp);
|
|
for (int plane = 0; plane < m_numPlanes; plane++)
|
|
{
|
|
mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
|
|
if (saoParam->bSaoFlag[plane > 0])
|
|
copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeIdx]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (saoParam->ctuParam[0][addr].typeIdx < 0)
|
|
m_numNoSao[0]++;
|
|
if (saoParam->ctuParam[1][addr].typeIdx < 0)
|
|
m_numNoSao[1]++;
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.store(m_rdContexts.cur);
|
|
}
|
|
}
|
|
}
|
|
|
|
/** rate distortion optimization of SAO unit */
|
|
inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
|
|
{
|
|
int64_t estDist = 0;
|
|
|
|
for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
|
|
{
|
|
int32_t count = m_count[plane][typeIdx][classIdx];
|
|
int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
|
|
int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
|
|
|
|
if (typeIdx == SAO_BO)
|
|
{
|
|
currentDistortionTableBo[classIdx - 1] = 0;
|
|
currentRdCostTableBo[classIdx - 1] = lambda;
|
|
}
|
|
if (count)
|
|
{
|
|
int offset = roundIBDI(offsetOrg << (X265_DEPTH - 8), count);
|
|
offset = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
|
|
if (typeIdx < SAO_BO)
|
|
{
|
|
if (classIdx < 3)
|
|
offset = X265_MAX(offset, 0);
|
|
else
|
|
offset = X265_MIN(offset, 0);
|
|
}
|
|
offsetOut = estIterOffset(typeIdx, classIdx, lambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
|
|
}
|
|
else
|
|
{
|
|
offsetOrg = 0;
|
|
offsetOut = 0;
|
|
}
|
|
if (typeIdx != SAO_BO)
|
|
estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
|
|
}
|
|
|
|
return estDist;
|
|
}
|
|
|
|
inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
|
|
{
|
|
int offsetOut = 0;
|
|
|
|
// Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
|
|
double tempMinCost = lambda;
|
|
while (offset != 0)
|
|
{
|
|
// Calculate the bits required for signalling the offset
|
|
int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : (abs(offset) + 1);
|
|
if (abs(offset) == OFFSET_THRESH - 1)
|
|
tempRate--;
|
|
|
|
// Do the dequntization before distorion calculation
|
|
int tempOffset = offset << SAO_BIT_INC;
|
|
int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg);
|
|
double tempCost = ((double)tempDist + lambda * (double)tempRate);
|
|
if (tempCost < tempMinCost)
|
|
{
|
|
tempMinCost = tempCost;
|
|
offsetOut = offset;
|
|
if (typeIdx == SAO_BO)
|
|
{
|
|
currentDistortionTableBo[classIdx - 1] = (int)tempDist;
|
|
currentRdCostTableBo[classIdx - 1] = tempCost;
|
|
}
|
|
}
|
|
offset = (offset > 0) ? (offset - 1) : (offset + 1);
|
|
}
|
|
|
|
return offsetOut;
|
|
}
|
|
|
|
void SAO::saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam* mergeSaoParam, double* mergeDist)
|
|
{
|
|
int64_t bestDist = 0;
|
|
|
|
SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
|
|
|
|
double bestRDCostTableBo = MAX_DOUBLE;
|
|
int bestClassTableBo = 0;
|
|
int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
|
|
double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
|
|
|
|
resetSaoUnit(lclCtuParam);
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.resetBits();
|
|
m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
|
|
double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
|
|
|
|
for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
|
|
{
|
|
int64_t estDist = estSaoTypeDist(0, typeIdx, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
|
|
|
|
if (typeIdx == SAO_BO)
|
|
{
|
|
// Estimate Best Position
|
|
for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
|
|
{
|
|
double currentRDCost = 0.0;
|
|
for (int j = i; j < i + SAO_BO_LEN; j++)
|
|
currentRDCost += currentRdCostTableBo[j];
|
|
|
|
if (currentRDCost < bestRDCostTableBo)
|
|
{
|
|
bestRDCostTableBo = currentRDCost;
|
|
bestClassTableBo = i;
|
|
}
|
|
}
|
|
|
|
// Re code all Offsets
|
|
// Code Center
|
|
estDist = 0;
|
|
for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
|
|
estDist += currentDistortionTableBo[classIdx];
|
|
}
|
|
SaoCtuParam ctuParamRdo;
|
|
ctuParamRdo.mergeMode = SAO_MERGE_NONE;
|
|
ctuParamRdo.typeIdx = typeIdx;
|
|
ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
|
|
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
|
|
ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
|
|
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.resetBits();
|
|
m_entropyCoder.codeSaoOffset(ctuParamRdo, 0);
|
|
|
|
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
|
|
double cost = (double)estDist + m_lumaLambda * (double)estRate;
|
|
|
|
if (cost < dCostPartBest)
|
|
{
|
|
dCostPartBest = cost;
|
|
copySaoUnit(lclCtuParam, &ctuParamRdo);
|
|
bestDist = estDist;
|
|
}
|
|
}
|
|
|
|
mergeDist[0] = ((double)bestDist / m_lumaLambda);
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
|
|
m_entropyCoder.store(m_rdContexts.temp);
|
|
|
|
// merge left or merge up
|
|
|
|
for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
|
|
{
|
|
SaoCtuParam* mergeSrcParam = NULL;
|
|
if (addrLeft >= 0 && mergeIdx == 0)
|
|
mergeSrcParam = &(saoParam->ctuParam[0][addrLeft]);
|
|
else if (addrUp >= 0 && mergeIdx == 1)
|
|
mergeSrcParam = &(saoParam->ctuParam[0][addrUp]);
|
|
if (mergeSrcParam)
|
|
{
|
|
int64_t estDist = 0;
|
|
int typeIdx = mergeSrcParam->typeIdx;
|
|
if (typeIdx >= 0)
|
|
{
|
|
int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
|
|
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
|
|
{
|
|
int mergeOffset = mergeSrcParam->offset[classIdx];
|
|
estDist += estSaoDist(m_count[0][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + bandPos + 1]);
|
|
}
|
|
}
|
|
|
|
copySaoUnit(&mergeSaoParam[mergeIdx], mergeSrcParam);
|
|
mergeSaoParam[mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
|
|
|
|
mergeDist[mergeIdx + 1] = ((double)estDist / m_lumaLambda);
|
|
}
|
|
else
|
|
resetSaoUnit(&mergeSaoParam[mergeIdx]);
|
|
}
|
|
}
|
|
|
|
void SAO::sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist)
|
|
{
|
|
int64_t bestDist = 0;
|
|
|
|
SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
|
|
|
|
double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
|
|
int bestClassTableBo[2] = { 0, 0 };
|
|
int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
|
|
|
|
resetSaoUnit(lclCtuParam[0]);
|
|
resetSaoUnit(lclCtuParam[1]);
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.resetBits();
|
|
m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
|
|
m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
|
|
|
|
double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
|
|
|
|
for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
|
|
{
|
|
int64_t estDist[2];
|
|
if (typeIdx == SAO_BO)
|
|
{
|
|
// Estimate Best Position
|
|
for (int compIdx = 0; compIdx < 2; compIdx++)
|
|
{
|
|
double bestRDCostTableBo = MAX_DOUBLE;
|
|
estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
|
|
for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
|
|
{
|
|
double currentRDCost = 0.0;
|
|
for (int j = i; j < i + SAO_BO_LEN; j++)
|
|
currentRDCost += currentRdCostTableBo[j];
|
|
|
|
if (currentRDCost < bestRDCostTableBo)
|
|
{
|
|
bestRDCostTableBo = currentRDCost;
|
|
bestClassTableBo[compIdx] = i;
|
|
}
|
|
}
|
|
|
|
// Re code all Offsets
|
|
// Code Center
|
|
estDist[compIdx] = 0;
|
|
for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
|
|
estDist[compIdx] += currentDistortionTableBo[classIdx];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
estDist[0] = estSaoTypeDist(1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
|
|
estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
|
|
}
|
|
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.resetBits();
|
|
|
|
SaoCtuParam ctuParamRdo[2];
|
|
for (int compIdx = 0; compIdx < 2; compIdx++)
|
|
{
|
|
ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE;
|
|
ctuParamRdo[compIdx].typeIdx = typeIdx;
|
|
ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
|
|
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
|
|
ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
|
|
|
|
m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
|
|
}
|
|
|
|
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
|
|
double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
|
|
|
|
if (cost < costPartBest)
|
|
{
|
|
costPartBest = cost;
|
|
copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]);
|
|
copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]);
|
|
bestDist = (estDist[0] + estDist[1]);
|
|
}
|
|
}
|
|
|
|
mergeDist[0] += ((double)bestDist / m_chromaLambda);
|
|
m_entropyCoder.load(m_rdContexts.temp);
|
|
m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
|
|
m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
|
|
m_entropyCoder.store(m_rdContexts.temp);
|
|
|
|
// merge left or merge up
|
|
|
|
for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
|
|
{
|
|
for (int compIdx = 0; compIdx < 2; compIdx++)
|
|
{
|
|
int plane = compIdx + 1;
|
|
SaoCtuParam* mergeSrcParam = NULL;
|
|
if (addrLeft >= 0 && mergeIdx == 0)
|
|
mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]);
|
|
else if (addrUp >= 0 && mergeIdx == 1)
|
|
mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
|
|
if (mergeSrcParam)
|
|
{
|
|
int64_t estDist = 0;
|
|
int typeIdx = mergeSrcParam->typeIdx;
|
|
if (typeIdx >= 0)
|
|
{
|
|
int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
|
|
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
|
|
{
|
|
int mergeOffset = mergeSrcParam->offset[classIdx];
|
|
estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
|
|
}
|
|
}
|
|
|
|
copySaoUnit(&mergeSaoParam[plane][mergeIdx], mergeSrcParam);
|
|
mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
|
|
mergeDist[mergeIdx + 1] += ((double)estDist / m_chromaLambda);
|
|
}
|
|
else
|
|
resetSaoUnit(&mergeSaoParam[plane][mergeIdx]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// NOTE: must put in namespace X265_NS since we need class SAO
|
|
void saoCuStatsBO_c(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
|
|
{
|
|
int x, y;
|
|
const int boShift = X265_DEPTH - SAO_BO_BITS;
|
|
|
|
for (y = 0; y < endY; y++)
|
|
{
|
|
for (x = 0; x < endX; x++)
|
|
{
|
|
int classIdx = 1 + (rec[x] >> boShift);
|
|
stats[classIdx] += (fenc[x] - rec[x]);
|
|
count[classIdx]++;
|
|
}
|
|
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
}
|
|
|
|
void saoCuStatsE0_c(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
|
|
{
|
|
int x, y;
|
|
int32_t tmp_stats[SAO::NUM_EDGETYPE];
|
|
int32_t tmp_count[SAO::NUM_EDGETYPE];
|
|
|
|
memset(tmp_stats, 0, sizeof(tmp_stats));
|
|
memset(tmp_count, 0, sizeof(tmp_count));
|
|
|
|
for (y = 0; y < endY; y++)
|
|
{
|
|
int signLeft = signOf(rec[0] - rec[-1]);
|
|
for (x = 0; x < endX; x++)
|
|
{
|
|
int signRight = signOf2(rec[x], rec[x + 1]);
|
|
X265_CHECK(signRight == signOf(rec[x] - rec[x + 1]), "signDown check failure\n");
|
|
uint32_t edgeType = signRight + signLeft + 2;
|
|
signLeft = -signRight;
|
|
|
|
X265_CHECK(edgeType <= 4, "edgeType check failure\n");
|
|
tmp_stats[edgeType] += (fenc[x] - rec[x]);
|
|
tmp_count[edgeType]++;
|
|
}
|
|
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
for (x = 0; x < SAO::NUM_EDGETYPE; x++)
|
|
{
|
|
stats[SAO::s_eoTable[x]] += tmp_stats[x];
|
|
count[SAO::s_eoTable[x]] += tmp_count[x];
|
|
}
|
|
}
|
|
|
|
void saoCuStatsE1_c(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count)
|
|
{
|
|
X265_CHECK(endX <= MAX_CU_SIZE, "endX check failure\n");
|
|
X265_CHECK(endY <= MAX_CU_SIZE, "endY check failure\n");
|
|
|
|
int x, y;
|
|
int32_t tmp_stats[SAO::NUM_EDGETYPE];
|
|
int32_t tmp_count[SAO::NUM_EDGETYPE];
|
|
|
|
memset(tmp_stats, 0, sizeof(tmp_stats));
|
|
memset(tmp_count, 0, sizeof(tmp_count));
|
|
|
|
for (y = 0; y < endY; y++)
|
|
{
|
|
for (x = 0; x < endX; x++)
|
|
{
|
|
int signDown = signOf2(rec[x], rec[x + stride]);
|
|
X265_CHECK(signDown == signOf(rec[x] - rec[x + stride]), "signDown check failure\n");
|
|
uint32_t edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x] = (int8_t)(-signDown);
|
|
|
|
tmp_stats[edgeType] += (fenc[x] - rec[x]);
|
|
tmp_count[edgeType]++;
|
|
}
|
|
fenc += stride;
|
|
rec += stride;
|
|
}
|
|
|
|
for (x = 0; x < SAO::NUM_EDGETYPE; x++)
|
|
{
|
|
stats[SAO::s_eoTable[x]] += tmp_stats[x];
|
|
count[SAO::s_eoTable[x]] += tmp_count[x];
|
|
}
|
|
}
|
|
|
|
void saoCuStatsE2_c(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int8_t *upBufft, int endX, int endY, int32_t *stats, int32_t *count)
|
|
{
|
|
X265_CHECK(endX < MAX_CU_SIZE, "endX check failure\n");
|
|
X265_CHECK(endY < MAX_CU_SIZE, "endY check failure\n");
|
|
|
|
int x, y;
|
|
int32_t tmp_stats[SAO::NUM_EDGETYPE];
|
|
int32_t tmp_count[SAO::NUM_EDGETYPE];
|
|
|
|
memset(tmp_stats, 0, sizeof(tmp_stats));
|
|
memset(tmp_count, 0, sizeof(tmp_count));
|
|
|
|
for (y = 0; y < endY; y++)
|
|
{
|
|
upBufft[0] = signOf(rec[stride] - rec[-1]);
|
|
for (x = 0; x < endX; x++)
|
|
{
|
|
int signDown = signOf2(rec[x], rec[x + stride + 1]);
|
|
X265_CHECK(signDown == signOf(rec[x] - rec[x + stride + 1]), "signDown check failure\n");
|
|
uint32_t edgeType = signDown + upBuff1[x] + 2;
|
|
upBufft[x + 1] = (int8_t)(-signDown);
|
|
tmp_stats[edgeType] += (fenc[x] - rec[x]);
|
|
tmp_count[edgeType]++;
|
|
}
|
|
|
|
std::swap(upBuff1, upBufft);
|
|
|
|
rec += stride;
|
|
fenc += stride;
|
|
}
|
|
|
|
for (x = 0; x < SAO::NUM_EDGETYPE; x++)
|
|
{
|
|
stats[SAO::s_eoTable[x]] += tmp_stats[x];
|
|
count[SAO::s_eoTable[x]] += tmp_count[x];
|
|
}
|
|
}
|
|
|
|
void saoCuStatsE3_c(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count)
|
|
{
|
|
X265_CHECK(endX < MAX_CU_SIZE, "endX check failure\n");
|
|
X265_CHECK(endY < MAX_CU_SIZE, "endY check failure\n");
|
|
|
|
int x, y;
|
|
int32_t tmp_stats[SAO::NUM_EDGETYPE];
|
|
int32_t tmp_count[SAO::NUM_EDGETYPE];
|
|
|
|
memset(tmp_stats, 0, sizeof(tmp_stats));
|
|
memset(tmp_count, 0, sizeof(tmp_count));
|
|
|
|
for (y = 0; y < endY; y++)
|
|
{
|
|
for (x = 0; x < endX; x++)
|
|
{
|
|
int signDown = signOf2(rec[x], rec[x + stride - 1]);
|
|
X265_CHECK(signDown == signOf(rec[x] - rec[x + stride - 1]), "signDown check failure\n");
|
|
X265_CHECK(abs(upBuff1[x]) <= 1, "upBuffer1 check failure\n");
|
|
|
|
uint32_t edgeType = signDown + upBuff1[x] + 2;
|
|
upBuff1[x - 1] = (int8_t)(-signDown);
|
|
tmp_stats[edgeType] += (fenc[x] - rec[x]);
|
|
tmp_count[edgeType]++;
|
|
}
|
|
|
|
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
|
|
|
|
rec += stride;
|
|
fenc += stride;
|
|
}
|
|
|
|
for (x = 0; x < SAO::NUM_EDGETYPE; x++)
|
|
{
|
|
stats[SAO::s_eoTable[x]] += tmp_stats[x];
|
|
count[SAO::s_eoTable[x]] += tmp_count[x];
|
|
}
|
|
}
|
|
|
|
void setupSaoPrimitives_c(EncoderPrimitives &p)
|
|
{
|
|
// TODO: move other sao functions to here
|
|
p.saoCuStatsBO = saoCuStatsBO_c;
|
|
p.saoCuStatsE0 = saoCuStatsE0_c;
|
|
p.saoCuStatsE1 = saoCuStatsE1_c;
|
|
p.saoCuStatsE2 = saoCuStatsE2_c;
|
|
p.saoCuStatsE3 = saoCuStatsE3_c;
|
|
}
|
|
}
|
|
|