libbpg-0.9.6

This commit is contained in:
King_DuckZ 2015-10-27 11:46:00 +01:00
parent 3035b41edf
commit 35a8402710
248 changed files with 232891 additions and 100 deletions

View file

@ -0,0 +1,48 @@
# vim: syntax=cmake
if(GCC)
add_definitions(-Wno-uninitialized)
if(CC_HAS_NO_STRICT_OVERFLOW)
# GCC 4.9.2 gives warnings we know we can ignore in this file
set_source_files_properties(slicetype.cpp PROPERTIES COMPILE_FLAGS -Wno-strict-overflow)
endif(CC_HAS_NO_STRICT_OVERFLOW)
endif()
if(MSVC)
add_definitions(/wd4701) # potentially uninitialized local variable 'foo' used
endif()
if(LINKED_8BIT)
list(APPEND APIFLAGS "-DLINKED_8BIT=1")
endif(LINKED_8BIT)
if(LINKED_10BIT)
list(APPEND APIFLAGS "-DLINKED_10BIT=1")
endif(LINKED_10BIT)
if(LINKED_12BIT)
list(APPEND APIFLAGS "-DLINKED_12BIT=1")
endif(LINKED_12BIT)
if(ENABLE_SHARED)
list(APPEND APIFLAGS "-DENABLE_SHARED=1")
endif(ENABLE_SHARED)
string(REPLACE ";" " " APIFLAGSTR "${APIFLAGS}")
set_source_files_properties(api.cpp PROPERTIES COMPILE_FLAGS "${APIFLAGSTR}")
add_library(encoder OBJECT ../x265.h
analysis.cpp analysis.h
search.cpp search.h
bitcost.cpp bitcost.h rdcost.h
motion.cpp motion.h
slicetype.cpp slicetype.h
frameencoder.cpp frameencoder.h
framefilter.cpp framefilter.h
level.cpp level.h
nal.cpp nal.h
sei.cpp sei.h
sao.cpp sao.h
entropy.cpp entropy.h
dpb.cpp dpb.h
ratecontrol.cpp ratecontrol.h
reference.cpp reference.h
encoder.cpp encoder.h
api.cpp
weightPrediction.cpp)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,171 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
* Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_ANALYSIS_H
#define X265_ANALYSIS_H
#include "common.h"
#include "predict.h"
#include "quant.h"
#include "yuv.h"
#include "shortyuv.h"
#include "cudata.h"
#include "entropy.h"
#include "search.h"
namespace X265_NS {
// private namespace
class Entropy;
class Analysis : public Search
{
public:
enum {
PRED_MERGE,
PRED_SKIP,
PRED_INTRA,
PRED_2Nx2N,
PRED_BIDIR,
PRED_Nx2N,
PRED_2NxN,
PRED_SPLIT,
PRED_2NxnU,
PRED_2NxnD,
PRED_nLx2N,
PRED_nRx2N,
PRED_INTRA_NxN, /* 4x4 intra PU blocks for 8x8 CU */
PRED_LOSSLESS, /* lossless encode of best mode */
MAX_PRED_TYPES
};
struct ModeDepth
{
Mode pred[MAX_PRED_TYPES];
Mode* bestMode;
Yuv fencYuv;
CUDataMemPool cuMemPool;
};
class PMODE : public BondedTaskGroup
{
public:
Analysis& master;
const CUGeom& cuGeom;
int modes[MAX_PRED_TYPES];
PMODE(Analysis& m, const CUGeom& g) : master(m), cuGeom(g) {}
void processTasks(int workerThreadId);
protected:
PMODE operator=(const PMODE&);
};
void processPmode(PMODE& pmode, Analysis& slave);
ModeDepth m_modeDepth[NUM_CU_DEPTH];
bool m_bTryLossless;
bool m_bChromaSa8d;
Analysis();
bool create(ThreadLocalData* tld);
void destroy();
Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
protected:
/* Analysis data for load/save modes, keeps getting incremented as CTU analysis proceeds and data is consumed or read */
analysis_intra_data* m_reuseIntraDataCTU;
analysis_inter_data* m_reuseInterDataCTU;
int32_t* m_reuseRef;
uint32_t* m_reuseBestMergeCand;
uint32_t m_splitRefIdx[4];
/* full analysis for an I-slice CU */
void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
/* full analysis for a P or B slice CU */
uint32_t compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
/* measure merge and skip */
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isShareMergeCand);
/* measure inter options */
void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
/* encode current bestMode losslessly, pick best RD cost */
void tryLossless(const CUGeom& cuGeom);
/* add the RD cost of coding a split flag (0 or 1) to the given mode */
void addSplitFlagCost(Mode& mode, uint32_t depth);
/* work-avoidance heuristics for RD levels < 5 */
uint32_t topSkipMinDepth(const CUData& parentCTU, const CUGeom& cuGeom);
bool recursionDepthCheck(const CUData& parentCTU, const CUGeom& cuGeom, const Mode& bestMode);
/* generate residual and recon pixels for an entire CTU recursively (RD0) */
void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom);
/* check whether current mode is the new best */
inline void checkBestMode(Mode& mode, uint32_t depth)
{
X265_CHECK(mode.ok(), "mode costs are uninitialized\n");
ModeDepth& md = m_modeDepth[depth];
if (md.bestMode)
{
if (mode.rdCost < md.bestMode->rdCost)
md.bestMode = &mode;
}
else
md.bestMode = &mode;
}
};
struct ThreadLocalData
{
Analysis analysis;
void destroy() { analysis.destroy(); }
};
}
#endif // ifndef X265_ANALYSIS_H

523
x265/source/encoder/api.cpp Normal file
View file

@ -0,0 +1,523 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "bitstream.h"
#include "param.h"
#include "encoder.h"
#include "entropy.h"
#include "level.h"
#include "nal.h"
#include "bitcost.h"
/* multilib namespace reflectors */
#if LINKED_8BIT
namespace x265_8bit {
const x265_api* x265_api_get(int bitDepth);
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
}
#endif
#if LINKED_10BIT
namespace x265_10bit {
const x265_api* x265_api_get(int bitDepth);
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
}
#endif
#if LINKED_12BIT
namespace x265_12bit {
const x265_api* x265_api_get(int bitDepth);
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
}
#endif
#if EXPORT_C_API
/* these functions are exported as C functions (default) */
using namespace X265_NS;
extern "C" {
#else
/* these functions exist within private namespace (multilib) */
namespace X265_NS {
#endif
x265_encoder *x265_encoder_open(x265_param *p)
{
if (!p)
return NULL;
#if _MSC_VER
#pragma warning(disable: 4127) // conditional expression is constant, yes I know
#endif
#if HIGH_BIT_DEPTH
if (X265_DEPTH != 10 && X265_DEPTH != 12)
#else
if (X265_DEPTH != 8)
#endif
{
x265_log(p, X265_LOG_ERROR, "Build error, internal bit depth mismatch\n");
return NULL;
}
Encoder* encoder = NULL;
x265_param* param = PARAM_NS::x265_param_alloc();
x265_param* latestParam = PARAM_NS::x265_param_alloc();
if (!param || !latestParam)
goto fail;
memcpy(param, p, sizeof(x265_param));
x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", PFX(version_str));
x265_log(param, X265_LOG_INFO, "build info %s\n", PFX(build_info_str));
x265_setup_primitives(param);
if (x265_check_params(param))
goto fail;
if (x265_set_globals(param))
goto fail;
encoder = new Encoder;
if (!param->rc.bEnableSlowFirstPass)
PARAM_NS::x265_param_apply_fastfirstpass(param);
// may change params for auto-detect, etc
encoder->configure(param);
// may change rate control and CPB params
if (!enforceLevel(*param, encoder->m_vps))
goto fail;
// will detect and set profile/tier/level in VPS
determineLevel(*param, encoder->m_vps);
if (!param->bAllowNonConformance && encoder->m_vps.ptl.profileIdc == Profile::NONE)
{
x265_log(param, X265_LOG_INFO, "non-conformant bitstreams not allowed (--allow-non-conformance)\n");
goto fail;
}
encoder->create();
encoder->m_latestParam = latestParam;
memcpy(latestParam, param, sizeof(x265_param));
if (encoder->m_aborted)
goto fail;
x265_print_params(param);
return encoder;
fail:
delete encoder;
PARAM_NS::x265_param_free(param);
PARAM_NS::x265_param_free(latestParam);
return NULL;
}
int x265_encoder_headers(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal)
{
if (pp_nal && enc)
{
Encoder *encoder = static_cast<Encoder*>(enc);
Entropy sbacCoder;
Bitstream bs;
encoder->getStreamHeaders(encoder->m_nalList, sbacCoder, bs);
*pp_nal = &encoder->m_nalList.m_nal[0];
if (pi_nal) *pi_nal = encoder->m_nalList.m_numNal;
return encoder->m_nalList.m_occupancy;
}
return -1;
}
void x265_encoder_parameters(x265_encoder *enc, x265_param *out)
{
if (enc && out)
{
Encoder *encoder = static_cast<Encoder*>(enc);
memcpy(out, encoder->m_param, sizeof(x265_param));
}
}
int x265_encoder_reconfig(x265_encoder* enc, x265_param* param_in)
{
if (!enc || !param_in)
return -1;
x265_param save;
Encoder* encoder = static_cast<Encoder*>(enc);
memcpy(&save, encoder->m_latestParam, sizeof(x265_param));
int ret = encoder->reconfigureParam(encoder->m_latestParam, param_in);
if (ret)
/* reconfigure failed, recover saved param set */
memcpy(encoder->m_latestParam, &save, sizeof(x265_param));
else
{
encoder->m_reconfigured = true;
x265_print_reconfigured_params(&save, encoder->m_latestParam);
}
return ret;
}
int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
{
if (!enc)
return -1;
Encoder *encoder = static_cast<Encoder*>(enc);
int numEncoded;
// While flushing, we cannot return 0 until the entire stream is flushed
do
{
numEncoded = encoder->encode(pic_in, pic_out);
}
while (numEncoded == 0 && !pic_in && encoder->m_numDelayedPic);
// do not allow reuse of these buffers for more than one picture. The
// encoder now owns these analysisData buffers.
if (pic_in)
{
pic_in->analysisData.intraData = NULL;
pic_in->analysisData.interData = NULL;
}
if (pp_nal && numEncoded > 0)
{
*pp_nal = &encoder->m_nalList.m_nal[0];
if (pi_nal) *pi_nal = encoder->m_nalList.m_numNal;
}
else if (pi_nal)
*pi_nal = 0;
return numEncoded;
}
void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats, uint32_t statsSizeBytes)
{
if (enc && outputStats)
{
Encoder *encoder = static_cast<Encoder*>(enc);
encoder->fetchStats(outputStats, statsSizeBytes);
}
}
void x265_encoder_log(x265_encoder* enc, int, char **)
{
if (enc)
{
Encoder *encoder = static_cast<Encoder*>(enc);
x265_log(encoder->m_param, X265_LOG_WARNING, "x265_encoder_log is now deprecated\n");
}
}
void x265_encoder_close(x265_encoder *enc)
{
if (enc)
{
Encoder *encoder = static_cast<Encoder*>(enc);
encoder->stopJobs();
encoder->printSummary();
encoder->destroy();
delete encoder;
ATOMIC_DEC(&g_ctuSizeConfigured);
}
}
void x265_cleanup(void)
{
if (!g_ctuSizeConfigured)
{
BitCost::destroy();
CUData::s_partSet[0] = NULL; /* allow CUData to adjust to new CTU size */
}
}
x265_picture *x265_picture_alloc()
{
return (x265_picture*)x265_malloc(sizeof(x265_picture));
}
void x265_picture_init(x265_param *param, x265_picture *pic)
{
memset(pic, 0, sizeof(x265_picture));
pic->bitDepth = param->internalBitDepth;
pic->colorSpace = param->internalCsp;
pic->forceqp = X265_QP_AUTO;
pic->quantOffsets = NULL;
if (param->analysisMode)
{
uint32_t widthInCU = (param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
uint32_t heightInCU = (param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
uint32_t numCUsInFrame = widthInCU * heightInCU;
pic->analysisData.numCUsInFrame = numCUsInFrame;
pic->analysisData.numPartitions = NUM_4x4_PARTITIONS;
}
}
void x265_picture_free(x265_picture *p)
{
return x265_free(p);
}
static const x265_api libapi =
{
X265_MAJOR_VERSION,
X265_BUILD,
sizeof(x265_param),
sizeof(x265_picture),
sizeof(x265_analysis_data),
sizeof(x265_zone),
sizeof(x265_stats),
PFX(max_bit_depth),
PFX(version_str),
PFX(build_info_str),
&PARAM_NS::x265_param_alloc,
&PARAM_NS::x265_param_free,
&PARAM_NS::x265_param_default,
&PARAM_NS::x265_param_parse,
&PARAM_NS::x265_param_apply_profile,
&PARAM_NS::x265_param_default_preset,
&x265_picture_alloc,
&x265_picture_free,
&x265_picture_init,
&x265_encoder_open,
&x265_encoder_parameters,
&x265_encoder_reconfig,
&x265_encoder_headers,
&x265_encoder_encode,
&x265_encoder_get_stats,
&x265_encoder_log,
&x265_encoder_close,
&x265_cleanup,
sizeof(x265_frame_stats),
};
typedef const x265_api* (*api_get_func)(int bitDepth);
typedef const x265_api* (*api_query_func)(int bitDepth, int apiVersion, int* err);
#define xstr(s) str(s)
#define str(s) #s
#if _WIN32
#define ext ".dll"
#elif MACOS
#include <dlfcn.h>
#define ext ".dylib"
#else
#include <dlfcn.h>
#define ext ".so"
#endif
#if ENABLE_SHARED
static int g_recursion /* = 0 */;
#endif
const x265_api* x265_api_get(int bitDepth)
{
if (bitDepth && bitDepth != X265_DEPTH)
{
#if LINKED_8BIT
if (bitDepth == 8) return x265_8bit::x265_api_get(0);
#endif
#if LINKED_10BIT
if (bitDepth == 10) return x265_10bit::x265_api_get(0);
#endif
#if LINKED_12BIT
if (bitDepth == 12) return x265_12bit::x265_api_get(0);
#endif
#if ENABLE_SHARED
const char* libname = NULL;
const char* method = "x265_api_get_" xstr(X265_BUILD);
const char* multilibname = "libx265" ext;
if (bitDepth == 12)
libname = "libx265_main12" ext;
else if (bitDepth == 10)
libname = "libx265_main10" ext;
else if (bitDepth == 8)
libname = "libx265_main" ext;
else
return NULL;
const x265_api* api = NULL;
int reqDepth = 0;
if (g_recursion > 1)
return NULL;
else
g_recursion++;
#if _WIN32
HMODULE h = LoadLibraryA(libname);
if (!h)
{
h = LoadLibraryA(multilibname);
reqDepth = bitDepth;
}
if (h)
{
api_get_func get = (api_get_func)GetProcAddress(h, method);
if (get)
api = get(reqDepth);
}
#else
void* h = dlopen(libname, RTLD_LAZY | RTLD_LOCAL);
if (!h)
{
h = dlopen(multilibname, RTLD_LAZY | RTLD_LOCAL);
reqDepth = bitDepth;
}
if (h)
{
api_get_func get = (api_get_func)dlsym(h, method);
if (get)
api = get(reqDepth);
}
#endif
g_recursion--;
if (api && bitDepth != api->bit_depth)
{
x265_log(NULL, X265_LOG_WARNING, "%s does not support requested bitDepth %d\n", libname, bitDepth);
return NULL;
}
return api;
#else
return NULL;
#endif
}
return &libapi;
}
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err)
{
if (apiVersion < 51)
{
/* builds before 1.6 had re-ordered public structs */
if (err) *err = X265_API_QUERY_ERR_VER_REFUSED;
return NULL;
}
if (err) *err = X265_API_QUERY_ERR_NONE;
if (bitDepth && bitDepth != X265_DEPTH)
{
#if LINKED_8BIT
if (bitDepth == 8) return x265_8bit::x265_api_query(0, apiVersion, err);
#endif
#if LINKED_10BIT
if (bitDepth == 10) return x265_10bit::x265_api_query(0, apiVersion, err);
#endif
#if LINKED_12BIT
if (bitDepth == 12) return x265_12bit::x265_api_query(0, apiVersion, err);
#endif
#if ENABLE_SHARED
const char* libname = NULL;
const char* method = "x265_api_query";
const char* multilibname = "libx265" ext;
if (bitDepth == 12)
libname = "libx265_main12" ext;
else if (bitDepth == 10)
libname = "libx265_main10" ext;
else if (bitDepth == 8)
libname = "libx265_main" ext;
else
{
if (err) *err = X265_API_QUERY_ERR_LIB_NOT_FOUND;
return NULL;
}
const x265_api* api = NULL;
int reqDepth = 0;
int e = X265_API_QUERY_ERR_LIB_NOT_FOUND;
if (g_recursion > 1)
{
if (err) *err = X265_API_QUERY_ERR_LIB_NOT_FOUND;
return NULL;
}
else
g_recursion++;
#if _WIN32
HMODULE h = LoadLibraryA(libname);
if (!h)
{
h = LoadLibraryA(multilibname);
reqDepth = bitDepth;
}
if (h)
{
e = X265_API_QUERY_ERR_FUNC_NOT_FOUND;
api_query_func query = (api_query_func)GetProcAddress(h, method);
if (query)
api = query(reqDepth, apiVersion, err);
}
#else
void* h = dlopen(libname, RTLD_LAZY | RTLD_LOCAL);
if (!h)
{
h = dlopen(multilibname, RTLD_LAZY | RTLD_LOCAL);
reqDepth = bitDepth;
}
if (h)
{
e = X265_API_QUERY_ERR_FUNC_NOT_FOUND;
api_query_func query = (api_query_func)dlsym(h, method);
if (query)
api = query(reqDepth, apiVersion, err);
}
#endif
g_recursion--;
if (api && bitDepth != api->bit_depth)
{
x265_log(NULL, X265_LOG_WARNING, "%s does not support requested bitDepth %d\n", libname, bitDepth);
if (err) *err = X265_API_QUERY_ERR_WRONG_BITDEPTH;
return NULL;
}
if (err) *err = api ? X265_API_QUERY_ERR_NONE : e;
return api;
#else
if (err) *err = X265_API_QUERY_ERR_WRONG_BITDEPTH;
return NULL;
#endif
}
return &libapi;
}
} /* end namespace or extern "C" */

View file

@ -0,0 +1,91 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "primitives.h"
#include "bitcost.h"
using namespace X265_NS;
void BitCost::setQP(unsigned int qp)
{
if (!s_costs[qp])
{
ScopedLock s(s_costCalcLock);
// Now that we have acquired the lock, check again if another thread calculated
// this row while we were blocked
if (!s_costs[qp])
{
x265_emms(); // just to be safe
CalculateLogs();
s_costs[qp] = new uint16_t[4 * BC_MAX_MV + 1] + 2 * BC_MAX_MV;
double lambda = x265_lambda_tab[qp];
// estimate same cost for negative and positive MVD
for (int i = 0; i <= 2 * BC_MAX_MV; i++)
s_costs[qp][i] = s_costs[qp][-i] = (uint16_t)X265_MIN(s_bitsizes[i] * lambda + 0.5f, (1 << 15) - 1);
}
}
m_cost = s_costs[qp];
}
/***
* Class static data and methods
*/
uint16_t *BitCost::s_costs[BC_MAX_QP];
float *BitCost::s_bitsizes;
Lock BitCost::s_costCalcLock;
void BitCost::CalculateLogs()
{
if (!s_bitsizes)
{
s_bitsizes = new float[2 * BC_MAX_MV + 1];
s_bitsizes[0] = 0.718f;
float log2_2 = 2.0f / log(2.0f); // 2 x 1/log(2)
for (int i = 1; i <= 2 * BC_MAX_MV; i++)
s_bitsizes[i] = log((float)(i + 1)) * log2_2 + 1.718f;
}
}
void BitCost::destroy()
{
for (int i = 0; i < BC_MAX_QP; i++)
{
if (s_costs[i])
{
delete [] (s_costs[i] - 2 * BC_MAX_MV);
s_costs[i] = 0;
}
}
delete [] s_bitsizes;
s_bitsizes = 0;
}

View file

@ -0,0 +1,93 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_BITCOST_H
#define X265_BITCOST_H
#include "common.h"
#include "threading.h"
#include "mv.h"
namespace X265_NS {
// private x265 namespace
class BitCost
{
public:
BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0), m_mvp(0) {}
void setQP(unsigned int qp);
void setMVP(const MV& mvp) { m_mvp = mvp; m_cost_mvx = m_cost - mvp.x; m_cost_mvy = m_cost - mvp.y; }
// return bit cost of motion vector difference, multiplied by lambda
inline uint16_t mvcost(const MV& mv) const { return m_cost_mvx[mv.x] + m_cost_mvy[mv.y]; }
// return bit cost of motion vector difference, without lambda
inline uint32_t bitcost(const MV& mv) const
{
return (uint32_t)(s_bitsizes[abs(mv.x - m_mvp.x)] +
s_bitsizes[abs(mv.y - m_mvp.y)] + 0.5f);
}
static inline uint32_t bitcost(const MV& mv, const MV& mvp)
{
return (uint32_t)(s_bitsizes[abs(mv.x - mvp.x)] +
s_bitsizes[abs(mv.y - mvp.y)] + 0.5f);
}
static void destroy();
protected:
uint16_t *m_cost_mvx;
uint16_t *m_cost_mvy;
uint16_t *m_cost;
MV m_mvp;
BitCost& operator =(const BitCost&);
private:
/* default log2_max_mv_length_horizontal and log2_max_mv_length_horizontal
* are 15, specified in quarter-pel luma sample units. making the maximum
* signaled ful-pel motion distance 4096, max qpel is 32768 */
enum { BC_MAX_MV = (1 << 15) };
enum { BC_MAX_QP = 82 };
static float *s_bitsizes;
static uint16_t *s_costs[BC_MAX_QP];
static Lock s_costCalcLock;
static void CalculateLogs();
};
}
#endif // ifndef X265_BITCOST_H

303
x265/source/encoder/dpb.cpp Normal file
View file

@ -0,0 +1,303 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "frame.h"
#include "framedata.h"
#include "picyuv.h"
#include "slice.h"
#include "dpb.h"
using namespace X265_NS;
DPB::~DPB()
{
while (!m_freeList.empty())
{
Frame* curFrame = m_freeList.popFront();
curFrame->destroy();
delete curFrame;
}
while (!m_picList.empty())
{
Frame* curFrame = m_picList.popFront();
curFrame->destroy();
delete curFrame;
}
while (m_frameDataFreeList)
{
FrameData* next = m_frameDataFreeList->m_freeListNext;
m_frameDataFreeList->destroy();
m_frameDataFreeList->m_reconPic->destroy();
delete m_frameDataFreeList->m_reconPic;
delete m_frameDataFreeList;
m_frameDataFreeList = next;
}
}
// move unreferenced pictures from picList to freeList for recycle
void DPB::recycleUnreferenced()
{
Frame *iterFrame = m_picList.first();
while (iterFrame)
{
Frame *curFrame = iterFrame;
iterFrame = iterFrame->m_next;
if (!curFrame->m_encData->m_bHasReferences && !curFrame->m_countRefEncoders)
{
curFrame->m_reconRowCount.set(0);
curFrame->m_bChromaExtended = false;
// iterator is invalidated by remove, restart scan
m_picList.remove(*curFrame);
iterFrame = m_picList.first();
m_freeList.pushBack(*curFrame);
curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
m_frameDataFreeList = curFrame->m_encData;
curFrame->m_encData = NULL;
curFrame->m_reconPic = NULL;
}
}
}
void DPB::prepareEncode(Frame *newFrame)
{
Slice* slice = newFrame->m_encData->m_slice;
slice->m_poc = newFrame->m_poc;
int pocCurr = slice->m_poc;
int type = newFrame->m_lowres.sliceType;
bool bIsKeyFrame = newFrame->m_lowres.bKeyframe;
slice->m_nalUnitType = getNalUnitType(pocCurr, bIsKeyFrame);
if (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL)
m_lastIDR = pocCurr;
slice->m_lastIDR = m_lastIDR;
slice->m_sliceType = IS_X265_TYPE_B(type) ? B_SLICE : (type == X265_TYPE_P) ? P_SLICE : I_SLICE;
if (type == X265_TYPE_B)
{
newFrame->m_encData->m_bHasReferences = false;
// Adjust NAL type for unreferenced B frames (change from _R "referenced"
// to _N "non-referenced" NAL unit type)
switch (slice->m_nalUnitType)
{
case NAL_UNIT_CODED_SLICE_TRAIL_R:
slice->m_nalUnitType = m_bTemporalSublayer ? NAL_UNIT_CODED_SLICE_TSA_N : NAL_UNIT_CODED_SLICE_TRAIL_N;
break;
case NAL_UNIT_CODED_SLICE_RADL_R:
slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RADL_N;
break;
case NAL_UNIT_CODED_SLICE_RASL_R:
slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RASL_N;
break;
default:
break;
}
}
else
{
/* m_bHasReferences starts out as true for non-B pictures, and is set to false
* once no more pictures reference it */
newFrame->m_encData->m_bHasReferences = true;
}
m_picList.pushFront(*newFrame);
// Do decoding refresh marking if any
decodingRefreshMarking(pocCurr, slice->m_nalUnitType);
computeRPS(pocCurr, slice->isIRAP(), &slice->m_rps, slice->m_sps->maxDecPicBuffering);
// Mark pictures in m_piclist as unreferenced if they are not included in RPS
applyReferencePictureSet(&slice->m_rps, pocCurr);
slice->m_numRefIdx[0] = X265_MIN(m_maxRefL0, slice->m_rps.numberOfNegativePictures); // Ensuring L0 contains just the -ve POC
slice->m_numRefIdx[1] = X265_MIN(m_maxRefL1, slice->m_rps.numberOfPositivePictures);
slice->setRefPicList(m_picList);
X265_CHECK(slice->m_sliceType != B_SLICE || slice->m_numRefIdx[1], "B slice without L1 references (non-fatal)\n");
if (slice->m_sliceType == B_SLICE)
{
/* TODO: the lookahead should be able to tell which reference picture
* had the least motion residual. We should be able to use that here to
* select a colocation reference list and index */
slice->m_colFromL0Flag = false;
slice->m_colRefIdx = 0;
slice->m_bCheckLDC = false;
}
else
{
slice->m_bCheckLDC = true;
slice->m_colFromL0Flag = true;
slice->m_colRefIdx = 0;
}
slice->m_sLFaseFlag = (SLFASE_CONSTANT & (1 << (pocCurr % 31))) > 0;
/* Increment reference count of all motion-referenced frames to prevent them
* from being recycled. These counts are decremented at the end of
* compressFrame() */
int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
for (int l = 0; l < numPredDir; l++)
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
{
Frame *refpic = slice->m_refFrameList[l][ref];
ATOMIC_INC(&refpic->m_countRefEncoders);
}
}
}
void DPB::computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer)
{
unsigned int poci = 0, numNeg = 0, numPos = 0;
Frame* iterPic = m_picList.first();
while (iterPic && (poci < maxDecPicBuffer - 1))
{
if ((iterPic->m_poc != curPoc) && iterPic->m_encData->m_bHasReferences)
{
rps->poc[poci] = iterPic->m_poc;
rps->deltaPOC[poci] = rps->poc[poci] - curPoc;
(rps->deltaPOC[poci] < 0) ? numNeg++ : numPos++;
rps->bUsed[poci] = !isRAP;
poci++;
}
iterPic = iterPic->m_next;
}
rps->numberOfPictures = poci;
rps->numberOfPositivePictures = numPos;
rps->numberOfNegativePictures = numNeg;
rps->sortDeltaPOC();
}
/* Marking reference pictures when an IDR/CRA is encountered. */
void DPB::decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType)
{
if (nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL)
{
/* If the nal_unit_type is IDR, all pictures in the reference picture
* list are marked as "unused for reference" */
Frame* iterFrame = m_picList.first();
while (iterFrame)
{
if (iterFrame->m_poc != pocCurr)
iterFrame->m_encData->m_bHasReferences = false;
iterFrame = iterFrame->m_next;
}
}
else // CRA or No DR
{
if (m_bRefreshPending && pocCurr > m_pocCRA)
{
/* If the bRefreshPending flag is true (a deferred decoding refresh
* is pending) and the current temporal reference is greater than
* the temporal reference of the latest CRA picture (pocCRA), mark
* all reference pictures except the latest CRA picture as "unused
* for reference" and set the bRefreshPending flag to false */
Frame* iterFrame = m_picList.first();
while (iterFrame)
{
if (iterFrame->m_poc != pocCurr && iterFrame->m_poc != m_pocCRA)
iterFrame->m_encData->m_bHasReferences = false;
iterFrame = iterFrame->m_next;
}
m_bRefreshPending = false;
}
if (nalUnitType == NAL_UNIT_CODED_SLICE_CRA)
{
/* If the nal_unit_type is CRA, set the bRefreshPending flag to true
* and pocCRA to the temporal reference of the current picture */
m_bRefreshPending = true;
m_pocCRA = pocCurr;
}
}
/* Note that the current picture is already placed in the reference list and
* its marking is not changed. If the current picture has a nal_ref_idc
* that is not 0, it will remain marked as "used for reference" */
}
/** Function for applying picture marking based on the Reference Picture Set */
void DPB::applyReferencePictureSet(RPS *rps, int curPoc)
{
// loop through all pictures in the reference picture buffer
Frame* iterFrame = m_picList.first();
while (iterFrame)
{
if (iterFrame->m_poc != curPoc && iterFrame->m_encData->m_bHasReferences)
{
// loop through all pictures in the Reference Picture Set
// to see if the picture should be kept as reference picture
bool referenced = false;
for (int i = 0; i < rps->numberOfPositivePictures + rps->numberOfNegativePictures; i++)
{
if (iterFrame->m_poc == curPoc + rps->deltaPOC[i])
{
referenced = true;
break;
}
}
if (!referenced)
iterFrame->m_encData->m_bHasReferences = false;
}
iterFrame = iterFrame->m_next;
}
}
/* deciding the nal_unit_type */
NalUnitType DPB::getNalUnitType(int curPOC, bool bIsKeyFrame)
{
if (!curPOC)
return NAL_UNIT_CODED_SLICE_IDR_W_RADL;
if (bIsKeyFrame)
return m_bOpenGOP ? NAL_UNIT_CODED_SLICE_CRA : NAL_UNIT_CODED_SLICE_IDR_W_RADL;
if (m_pocCRA && curPOC < m_pocCRA)
// All leading pictures are being marked as TFD pictures here since
// current encoder uses all reference pictures while encoding leading
// pictures. An encoder can ensure that a leading picture can be still
// decodable when random accessing to a CRA/CRANT/BLA/BLANT picture by
// controlling the reference pictures used for encoding that leading
// picture. Such a leading picture need not be marked as a TFD picture.
return NAL_UNIT_CODED_SLICE_RASL_R;
if (m_lastIDR && curPOC < m_lastIDR)
return NAL_UNIT_CODED_SLICE_RADL_R;
return NAL_UNIT_CODED_SLICE_TRAIL_R;
}

80
x265/source/encoder/dpb.h Normal file
View file

@ -0,0 +1,80 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_DPB_H
#define X265_DPB_H
#include "piclist.h"
namespace X265_NS {
// private namespace for x265
class Frame;
class FrameData;
class Slice;
class DPB
{
public:
int m_lastIDR;
int m_pocCRA;
int m_maxRefL0;
int m_maxRefL1;
int m_bOpenGOP;
bool m_bRefreshPending;
bool m_bTemporalSublayer;
PicList m_picList;
PicList m_freeList;
FrameData* m_frameDataFreeList;
DPB(x265_param *param)
{
m_lastIDR = 0;
m_pocCRA = 0;
m_bRefreshPending = false;
m_frameDataFreeList = NULL;
m_maxRefL0 = param->maxNumReferences;
m_maxRefL1 = param->bBPyramid ? 2 : 1;
m_bOpenGOP = param->bOpenGOP;
m_bTemporalSublayer = !!param->bEnableTemporalSubLayers;
}
~DPB();
void prepareEncode(Frame*);
void recycleUnreferenced();
protected:
void computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer);
void applyReferencePictureSet(RPS *rps, int curPoc);
void decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType);
NalUnitType getNalUnitType(int curPoc, bool bIsKeyFrame);
};
}
#endif // X265_DPB_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,179 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_ENCODER_H
#define X265_ENCODER_H
#include "common.h"
#include "slice.h"
#include "scalinglist.h"
#include "x265.h"
#include "nal.h"
struct x265_encoder {};
namespace X265_NS {
// private namespace
extern const char g_sliceTypeToChar[3];
class Entropy;
struct EncStats
{
double m_psnrSumY;
double m_psnrSumU;
double m_psnrSumV;
double m_globalSsim;
double m_totalQp;
uint64_t m_accBits;
uint32_t m_numPics;
uint16_t m_maxCLL;
double m_maxFALL;
EncStats()
{
m_psnrSumY = m_psnrSumU = m_psnrSumV = m_globalSsim = 0;
m_accBits = 0;
m_numPics = 0;
m_totalQp = 0;
m_maxCLL = 0;
m_maxFALL = 0;
}
void addQP(double aveQp);
void addPsnr(double psnrY, double psnrU, double psnrV);
void addBits(uint64_t bits);
void addSsim(double ssim);
};
class FrameEncoder;
class DPB;
class Lookahead;
class RateControl;
class ThreadPool;
class Encoder : public x265_encoder
{
public:
int m_pocLast; // time index (POC)
int m_encodedFrameNum;
int m_outputCount;
int m_bframeDelay;
int64_t m_firstPts;
int64_t m_bframeDelayTime;
int64_t m_prevReorderedPts[2];
ThreadPool* m_threadPool;
FrameEncoder* m_frameEncoder[X265_MAX_FRAME_THREADS];
DPB* m_dpb;
Frame* m_exportedPic;
int m_numPools;
int m_curEncoder;
/* Collect statistics globally */
EncStats m_analyzeAll;
EncStats m_analyzeI;
EncStats m_analyzeP;
EncStats m_analyzeB;
int64_t m_encodeStartTime;
// weighted prediction
int m_numLumaWPFrames; // number of P frames with weighted luma reference
int m_numChromaWPFrames; // number of P frames with weighted chroma reference
int m_numLumaWPBiFrames; // number of B frames with weighted luma reference
int m_numChromaWPBiFrames; // number of B frames with weighted chroma reference
FILE* m_analysisFile;
int m_conformanceMode;
VPS m_vps;
SPS m_sps;
PPS m_pps;
NALList m_nalList;
ScalingList m_scalingList; // quantization matrix information
bool m_emitCLLSEI;
int m_lastBPSEI;
uint32_t m_numDelayedPic;
x265_param* m_param;
x265_param* m_latestParam;
RateControl* m_rateControl;
Lookahead* m_lookahead;
Window m_conformanceWindow;
bool m_bZeroLatency; // x265_encoder_encode() returns NALs for the input picture, zero lag
bool m_aborted; // fatal error detected
bool m_reconfigured; // reconfigure of encoder detected
uint32_t m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];
Encoder();
~Encoder() {}
void create();
void stopJobs();
void destroy();
int encode(const x265_picture* pic, x265_picture *pic_out);
int reconfigureParam(x265_param* encParam, x265_param* param);
void getStreamHeaders(NALList& list, Entropy& sbacCoder, Bitstream& bs);
void fetchStats(x265_stats* stats, size_t statsSizeBytes);
void printSummary();
char* statsString(EncStats&, char*);
void configure(x265_param *param);
void updateVbvPlan(RateControl* rc);
void allocAnalysis(x265_analysis_data* analysis);
void freeAnalysis(x265_analysis_data* analysis);
void readAnalysisFile(x265_analysis_data* analysis, int poc);
void writeAnalysisFile(x265_analysis_data* pic);
void finishFrameStats(Frame* pic, FrameEncoder *curEncoder, uint64_t bits, x265_frame_stats* frameStats);
protected:
void initVPS(VPS *vps);
void initSPS(SPS *sps);
void initPPS(PPS *pps);
};
}
#endif // ifndef X265_ENCODER_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,255 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_ENTROPY_H
#define X265_ENTROPY_H
#include "common.h"
#include "bitstream.h"
#include "frame.h"
#include "cudata.h"
#include "contexts.h"
#include "slice.h"
namespace X265_NS {
// private namespace
struct SaoCtuParam;
struct EstBitsSbac;
class ScalingList;
enum SplitType
{
DONT_SPLIT = 0,
VERTICAL_SPLIT = 1,
QUAD_SPLIT = 2,
NUMBER_OF_SPLIT_MODES = 3
};
struct TURecurse
{
uint32_t section;
uint32_t splitMode;
uint32_t absPartIdxTURelCU;
uint32_t absPartIdxStep;
TURecurse(SplitType splitType, uint32_t _absPartIdxStep, uint32_t _absPartIdxTU)
{
static const uint32_t partIdxStepShift[NUMBER_OF_SPLIT_MODES] = { 0, 1, 2 };
section = 0;
absPartIdxTURelCU = _absPartIdxTU;
splitMode = (uint32_t)splitType;
absPartIdxStep = _absPartIdxStep >> partIdxStepShift[splitMode];
}
bool isNextSection()
{
if (splitMode == DONT_SPLIT)
{
section++;
return false;
}
else
{
absPartIdxTURelCU += absPartIdxStep;
section++;
return section < (uint32_t)(1 << splitMode);
}
}
bool isLastSection() const
{
return (section + 1) >= (uint32_t)(1 << splitMode);
}
};
struct EstBitsSbac
{
int significantCoeffGroupBits[NUM_SIG_CG_FLAG_CTX][2];
int significantBits[2][NUM_SIG_FLAG_CTX];
int lastBits[2][10];
int greaterOneBits[NUM_ONE_FLAG_CTX][2];
int levelAbsBits[NUM_ABS_FLAG_CTX][2];
int blockCbpBits[NUM_QT_CBF_CTX][2];
int blockRootCbpBits[2];
};
class Entropy : public SyntaxElementWriter
{
public:
uint64_t m_pad;
uint8_t m_contextState[160]; // MAX_OFF_CTX_MOD + padding
/* CABAC state */
uint32_t m_low;
uint32_t m_range;
uint32_t m_bufferedByte;
int m_numBufferedBytes;
int m_bitsLeft;
uint64_t m_fracBits;
EstBitsSbac m_estBitsSbac;
Entropy();
void setBitstream(Bitstream* p) { m_bitIf = p; }
uint32_t getNumberOfWrittenBits()
{
X265_CHECK(!m_bitIf, "bit counting mode expected\n");
return (uint32_t)(m_fracBits >> 15);
}
#if CHECKED_BUILD || _DEBUG
bool m_valid;
void markInvalid() { m_valid = false; }
void markValid() { m_valid = true; }
#else
void markValid() { }
#endif
void zeroFract() { m_fracBits = 0; }
void resetBits();
void resetEntropy(const Slice& slice);
// SBAC RD
void load(const Entropy& src) { copyFrom(src); }
void store(Entropy& dest) const { dest.copyFrom(*this); }
void loadContexts(const Entropy& src) { copyContextsFrom(src); }
void loadIntraDirModeLuma(const Entropy& src);
void copyState(const Entropy& other);
void codeVPS(const VPS& vps);
void codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl);
void codePPS(const PPS& pps);
void codeVUI(const VUI& vui, int maxSubTLayers);
void codeAUD(const Slice& slice);
void codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers);
void codeSliceHeader(const Slice& slice, FrameData& encData);
void codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset);
void codeShortTermRefPicSet(const RPS& rps);
void finishSlice() { encodeBinTrm(1); finish(); dynamic_cast<Bitstream*>(m_bitIf)->writeByteAlignment(); }
void encodeCTU(const CUData& cu, const CUGeom& cuGeom);
void codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple);
void codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode);
void codeMergeIndex(const CUData& cu, uint32_t absPartIdx);
void codeMvd(const CUData& cu, uint32_t absPartIdx, int list);
void codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
void codePredInfo(const CUData& cu, uint32_t absPartIdx);
inline void codeQtCbfLuma(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth) { codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth), tuDepth); }
void codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel);
void codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2]);
void codeCoeffNxN(const CUData& cu, const coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
inline void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
inline void codeMVPIdx(uint32_t symbol) { encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]); }
inline void codeMergeFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.m_mergeFlag[absPartIdx], m_contextState[OFF_MERGE_FLAG_EXT_CTX]); }
inline void codeSkipFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.isSkipped(absPartIdx), m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); }
inline void codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth) { encodeBin(cu.m_cuDepth[absPartIdx] > depth, m_contextState[OFF_SPLIT_FLAG_CTX + cu.getCtxSplitFlag(absPartIdx, depth)]); }
inline void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx) { encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]); }
inline void codePredMode(int predMode) { encodeBin(predMode == MODE_INTRA ? 1 : 0, m_contextState[OFF_PRED_MODE_CTX]); }
inline void codeCUTransquantBypassFlag(uint32_t symbol) { encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]); }
inline void codeQtCbfLuma(uint32_t cbf, uint32_t tuDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + !tuDepth]); }
inline void codeQtCbfChroma(uint32_t cbf, uint32_t tuDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + 2 + tuDepth]); }
inline void codeQtRootCbf(uint32_t cbf) { encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
inline void codeTransformSkipFlags(uint32_t transformSkip, TextType ttype) { encodeBin(transformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]); }
void codeDeltaQP(const CUData& cu, uint32_t absPartIdx);
void codeSaoOffset(const SaoCtuParam& ctuParam, int plane);
/* RDO functions */
void estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
void estCBFBit(EstBitsSbac& estBitsSbac) const;
void estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const;
void estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
void estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const;
inline uint32_t bitsIntraModeNonMPM() const { return bitsCodeBin(0, m_contextState[OFF_ADI_CTX]) + 5; }
inline uint32_t bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const { return bitsCodeBin(1, m_contextState[OFF_ADI_CTX]) + (dir == preds[0] ? 1 : 2); }
inline uint32_t estimateCbfBits(uint32_t cbf, TextType ttype, uint32_t tuDepth) const { return bitsCodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][tuDepth]]); }
uint32_t bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const;
uint32_t bitsIntraMode(const CUData& cu, uint32_t absPartIdx) const
{
return bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]) + /* not skip */
bitsCodeBin(1, m_contextState[OFF_PRED_MODE_CTX]); /* intra */
}
/* these functions are only used to estimate the bits when cbf is 0 and will never be called when writing the bistream. */
inline void codeQtRootCbfZero() { encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
private:
/* CABAC private methods */
void start();
void finish();
void encodeBin(uint32_t binValue, uint8_t& ctxModel);
void encodeBinEP(uint32_t binValue);
void encodeBinsEP(uint32_t binValues, int numBins);
void encodeBinTrm(uint32_t binValue);
/* return the bits of encoding the context bin without updating */
inline uint32_t bitsCodeBin(uint32_t binValue, uint32_t ctxModel) const
{
uint64_t fracBits = (m_fracBits & 32767) + sbacGetEntropyBits(ctxModel, binValue);
return (uint32_t)(fracBits >> 15);
}
void encodeCU(const CUData& ctu, const CUGeom &cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP);
void finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP);
void writeOut();
/* SBac private methods */
void writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol);
void writeEpExGolomb(uint32_t symbol, uint32_t count);
void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers);
void codeScalingList(const ScalingList&);
void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
void codePredWeightTable(const Slice& slice);
void codeInterDir(const CUData& cu, uint32_t absPartIdx);
void codePUWise(const CUData& cu, uint32_t absPartIdx);
void codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list);
void codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list);
void codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol);
void codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx);
void encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
bool& bCodeDQP, const uint32_t depthRange[2]);
void copyFrom(const Entropy& src);
void copyContextsFrom(const Entropy& src);
};
}
#endif // ifndef X265_ENTROPY_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,234 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Shin Yee <shinyee@multicorewareinc.com>
* Min Chen <chenm003@163.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_FRAMEENCODER_H
#define X265_FRAMEENCODER_H
#include "common.h"
#include "wavefront.h"
#include "bitstream.h"
#include "frame.h"
#include "picyuv.h"
#include "md5.h"
#include "analysis.h"
#include "sao.h"
#include "entropy.h"
#include "framefilter.h"
#include "ratecontrol.h"
#include "reference.h"
#include "nal.h"
namespace X265_NS {
// private x265 namespace
class ThreadPool;
class Encoder;
#define ANGULAR_MODE_ID 2
#define AMP_ID 3
struct StatisticLog
{
uint64_t cntInter[4];
uint64_t cntIntra[4];
uint64_t cuInterDistribution[4][INTER_MODES];
uint64_t cuIntraDistribution[4][INTRA_MODES];
uint64_t cntIntraNxN;
uint64_t cntSkipCu[4];
uint64_t cntTotalCu[4];
uint64_t totalCu;
StatisticLog()
{
memset(this, 0, sizeof(StatisticLog));
}
};
/* manages the state of encoding one row of CTU blocks. When
* WPP is active, several rows will be simultaneously encoded. */
struct CTURow
{
Entropy bufferedEntropy; /* store CTU2 context for next row CTU0 */
Entropy rowGoOnCoder; /* store context between CTUs, code bitstream if !SAO */
FrameStats rowStats;
/* Threading variables */
/* This lock must be acquired when reading or writing m_active or m_busy */
Lock lock;
/* row is ready to run, has no neighbor dependencies. The row may have
* external dependencies (reference frame pixels) that prevent it from being
* processed, so it may stay with m_active=true for some time before it is
* encoded by a worker thread. */
volatile bool active;
/* row is being processed by a worker thread. This flag is only true when a
* worker thread is within the context of FrameEncoder::processRow(). This
* flag is used to detect multiple possible wavefront problems. */
volatile bool busy;
/* count of completed CUs in this row */
volatile uint32_t completed;
/* called at the start of each frame to initialize state */
void init(Entropy& initContext)
{
active = false;
busy = false;
completed = 0;
memset(&rowStats, 0, sizeof(rowStats));
rowGoOnCoder.load(initContext);
}
};
// Manages the wave-front processing of a single encoding frame
class FrameEncoder : public WaveFront, public Thread
{
public:
FrameEncoder();
virtual ~FrameEncoder() {}
virtual bool init(Encoder *top, int numRows, int numCols);
void destroy();
/* triggers encode of a new frame by the worker thread */
bool startCompressFrame(Frame* curFrame);
/* blocks until worker thread is done, returns access unit */
Frame *getEncodedPicture(NALList& list);
Event m_enable;
Event m_done;
Event m_completionEvent;
int m_localTldIdx;
volatile bool m_threadActive;
volatile bool m_bAllRowsStop;
volatile int m_completionCount;
volatile int m_vbvResetTriggerRow;
uint32_t m_numRows;
uint32_t m_numCols;
uint32_t m_filterRowDelay;
uint32_t m_filterRowDelayCus;
uint32_t m_refLagRows;
CTURow* m_rows;
RateControlEntry m_rce;
SEIDecodedPictureHash m_seiReconPictureDigest;
uint64_t m_SSDY;
uint64_t m_SSDU;
uint64_t m_SSDV;
double m_ssim;
uint64_t m_accessUnitBits;
uint32_t m_ssimCnt;
MD5Context m_state[3];
uint32_t m_crc[3];
uint32_t m_checksum[3];
volatile int m_activeWorkerCount; // count of workers currently encoding or filtering CTUs
volatile int m_totalActiveWorkerCount; // sum of m_activeWorkerCount sampled at end of each CTU
volatile int m_activeWorkerCountSamples; // count of times m_activeWorkerCount was sampled (think vbv restarts)
volatile int m_countRowBlocks; // count of workers forced to abandon a row because of top dependency
int64_t m_startCompressTime; // timestamp when frame encoder is given a frame
int64_t m_row0WaitTime; // timestamp when row 0 is allowed to start
int64_t m_allRowsAvailableTime; // timestamp when all reference dependencies are resolved
int64_t m_endCompressTime; // timestamp after all CTUs are compressed
int64_t m_endFrameTime; // timestamp after RCEnd, NR updates, etc
int64_t m_stallStartTime; // timestamp when worker count becomes 0
int64_t m_prevOutputTime; // timestamp when prev frame was retrieved by API thread
int64_t m_slicetypeWaitTime; // total elapsed time waiting for decided frame
int64_t m_totalWorkerElapsedTime; // total elapsed time spent by worker threads processing CTUs
int64_t m_totalNoWorkerTime; // total elapsed time without any active worker threads
#if DETAILED_CU_STATS
CUStats m_cuStats;
#endif
Encoder* m_top;
x265_param* m_param;
Frame* m_frame;
NoiseReduction* m_nr;
ThreadLocalData* m_tld; /* for --no-wpp */
Bitstream* m_outStreams;
uint32_t* m_substreamSizes;
CUGeom* m_cuGeoms;
uint32_t* m_ctuGeomMap;
Bitstream m_bs;
MotionReference m_mref[2][MAX_NUM_REF + 1];
Entropy m_entropyCoder;
Entropy m_initSliceContext;
FrameFilter m_frameFilter;
NALList m_nalList;
class WeightAnalysis : public BondedTaskGroup
{
public:
FrameEncoder& master;
WeightAnalysis(FrameEncoder& fe) : master(fe) {}
void processTasks(int workerThreadId);
protected:
WeightAnalysis operator=(const WeightAnalysis&);
};
protected:
bool initializeGeoms();
/* analyze / compress frame, can be run in parallel within reference constraints */
void compressFrame();
/* called by compressFrame to generate final per-row bitstreams */
void encodeSlice();
void threadMain();
int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
void noiseReductionUpdate();
/* Called by WaveFront::findJob() */
virtual void processRow(int row, int threadId);
virtual void processRowEncoder(int row, ThreadLocalData& tld);
void enqueueRowEncoder(int row) { WaveFront::enqueueRow(row * 2 + 0); }
void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); }
void enableRowEncoder(int row) { WaveFront::enableRow(row * 2 + 0); }
void enableRowFilter(int row) { WaveFront::enableRow(row * 2 + 1); }
};
}
#endif // ifndef X265_FRAMEENCODER_H

View file

@ -0,0 +1,494 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
* Min Chen <chenm003@163.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "frame.h"
#include "framedata.h"
#include "encoder.h"
#include "framefilter.h"
#include "frameencoder.h"
#include "wavefront.h"
using namespace X265_NS;
static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height);
static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt);
FrameFilter::FrameFilter()
: m_param(NULL)
, m_frame(NULL)
, m_frameEncoder(NULL)
, m_ssimBuf(NULL)
{
}
void FrameFilter::destroy()
{
if (m_param->bEnableSAO)
m_sao.destroy();
X265_FREE(m_ssimBuf);
}
void FrameFilter::init(Encoder *top, FrameEncoder *frame, int numRows)
{
m_param = top->m_param;
m_frameEncoder = frame;
m_numRows = numRows;
m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
if (m_param->bEnableSAO)
if (!m_sao.create(m_param))
m_param->bEnableSAO = 0;
if (m_param->bEnableSsim)
m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
}
void FrameFilter::start(Frame *frame, Entropy& initState, int qp)
{
m_frame = frame;
if (m_param->bEnableSAO)
m_sao.startSlice(frame, initState, qp);
}
void FrameFilter::processRow(int row)
{
ProfileScopeEvent(filterCTURow);
#if DETAILED_CU_STATS
ScopedElapsedTime filterPerfScope(m_frameEncoder->m_cuStats.loopFilterElapsedTime);
m_frameEncoder->m_cuStats.countLoopFilter++;
#endif
if (!m_param->bEnableLoopFilter && !m_param->bEnableSAO)
{
processRowPost(row);
return;
}
FrameData& encData = *m_frame->m_encData;
const uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
const uint32_t lineStartCUAddr = row * numCols;
if (m_param->bEnableLoopFilter)
{
const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
for (uint32_t col = 0; col < numCols; col++)
{
uint32_t cuAddr = lineStartCUAddr + col;
const CUData* ctu = encData.getPicCTU(cuAddr);
deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
if (col > 0)
{
const CUData* ctuPrev = encData.getPicCTU(cuAddr - 1);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
}
}
const CUData* ctuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[lineStartCUAddr + numCols - 1]], Deblock::EDGE_HOR);
}
// SAO
SAOParam* saoParam = encData.m_saoParam;
if (m_param->bEnableSAO)
{
m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext);
m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext);
m_sao.rdoSaoUnitRow(saoParam, row);
// NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
if (row >= m_saoRowDelay)
processSao(row - m_saoRowDelay);
}
// this row of CTUs has been encoded
if (row > 0)
processRowPost(row - 1);
if (row == m_numRows - 1)
{
if (m_param->bEnableSAO)
{
m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
for (int i = m_numRows - m_saoRowDelay; i < m_numRows; i++)
processSao(i);
}
processRowPost(row);
}
}
uint32_t FrameFilter::getCUHeight(int rowNum) const
{
return rowNum == m_numRows - 1 ? m_lastHeight : g_maxCUSize;
}
void FrameFilter::processRowPost(int row)
{
PicYuv *reconPic = m_frame->m_reconPic;
const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
const uint32_t lineStartCUAddr = row * numCols;
const int realH = getCUHeight(row);
// Border extend Left and Right
primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
if (reconPic->m_picCsp != X265_CSP_I400) {
primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
}
// Border extend Top
if (!row)
{
const intptr_t stride = reconPic->m_stride;
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX;
for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
if (reconPic->m_picCsp != X265_CSP_I400) {
const intptr_t strideC = reconPic->m_strideC;
pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
{
memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
}
}
}
// Border extend Bottom
if (row == m_numRows - 1)
{
const intptr_t stride = reconPic->m_stride;
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride;
for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
if (reconPic->m_picCsp != X265_CSP_I400) {
const intptr_t strideC = reconPic->m_strideC;
pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
{
memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
}
}
}
// Notify other FrameEncoders that this row of reconstructed pixels is available
m_frame->m_reconRowCount.incr();
uint32_t cuAddr = lineStartCUAddr;
if (m_param->bEnablePsnr)
{
PicYuv* fencPic = m_frame->m_fencPic;
intptr_t stride = reconPic->m_stride;
uint32_t width = reconPic->m_picWidth - m_pad[0];
uint32_t height = getCUHeight(row);
uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height);
m_frameEncoder->m_SSDY += ssdY;
if (reconPic->m_picCsp != X265_CSP_I400) {
height >>= m_vChromaShift;
width >>= m_hChromaShift;
stride = reconPic->m_strideC;
uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height);
uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height);
m_frameEncoder->m_SSDU += ssdU;
m_frameEncoder->m_SSDV += ssdV;
}
}
if (m_param->bEnableSsim && m_ssimBuf)
{
pixel *rec = reconPic->m_picOrg[0];
pixel *fenc = m_frame->m_fencPic->m_picOrg[0];
intptr_t stride1 = reconPic->m_stride;
intptr_t stride2 = m_frame->m_fencPic->m_stride;
uint32_t bEnd = ((row + 1) == (this->m_numRows - 1));
uint32_t bStart = (row == 0);
uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;
uint32_t maxPixY = (row + 1) * g_maxCUSize - 4 * !bEnd;
uint32_t ssim_cnt;
x265_emms();
/* SSIM is done for each row in blocks of 4x4 . The First blocks are offset by 2 pixels to the right
* to avoid alignment of ssim blocks with DCT blocks. */
minPixY += bStart ? 2 : -6;
m_frameEncoder->m_ssim += calculateSSIM(rec + 2 + minPixY * stride1, stride1, fenc + 2 + minPixY * stride2, stride2,
m_param->sourceWidth - 2, maxPixY - minPixY, m_ssimBuf, ssim_cnt);
m_frameEncoder->m_ssimCnt += ssim_cnt;
}
if (m_param->decodedPictureHashSEI == 1)
{
uint32_t height = getCUHeight(row);
uint32_t width = reconPic->m_picWidth;
intptr_t stride = reconPic->m_stride;
if (!row)
{
for (int i = 0; i < 3; i++)
MD5Init(&m_frameEncoder->m_state[i]);
}
updateMD5Plane(m_frameEncoder->m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
if (reconPic->m_picCsp != X265_CSP_I400) {
width >>= m_hChromaShift;
height >>= m_vChromaShift;
stride = reconPic->m_strideC;
updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
}
}
else if (m_param->decodedPictureHashSEI == 2)
{
uint32_t height = getCUHeight(row);
uint32_t width = reconPic->m_picWidth;
intptr_t stride = reconPic->m_stride;
if (!row)
m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] = m_frameEncoder->m_crc[2] = 0xffff;
updateCRC(reconPic->getLumaAddr(cuAddr), m_frameEncoder->m_crc[0], height, width, stride);
if (reconPic->m_picCsp != X265_CSP_I400) {
width >>= m_hChromaShift;
height >>= m_vChromaShift;
stride = reconPic->m_strideC;
updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride);
updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride);
}
}
else if (m_param->decodedPictureHashSEI == 3)
{
uint32_t width = reconPic->m_picWidth;
uint32_t height = getCUHeight(row);
intptr_t stride = reconPic->m_stride;
uint32_t cuHeight = g_maxCUSize;
if (!row)
m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1] = m_frameEncoder->m_checksum[2] = 0;
updateChecksum(reconPic->m_picOrg[0], m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight);
if (reconPic->m_picCsp != X265_CSP_I400) {
width >>= m_hChromaShift;
height >>= m_vChromaShift;
stride = reconPic->m_strideC;
cuHeight >>= m_vChromaShift;
updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
}
}
if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 * (int)m_frameEncoder->m_numRows)
m_frameEncoder->m_completionEvent.trigger();
}
static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height)
{
uint64_t ssd = 0;
if ((width | height) & 3)
{
/* Slow Path */
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
int diff = (int)(fenc[x] - rec[x]);
ssd += diff * diff;
}
fenc += stride;
rec += stride;
}
return ssd;
}
uint32_t y = 0;
/* Consume rows in ever narrower chunks of height */
for (int size = BLOCK_64x64; size >= BLOCK_4x4 && y < height; size--)
{
uint32_t rowHeight = 1 << (size + 2);
for (; y + rowHeight <= height; y += rowHeight)
{
uint32_t y1, x = 0;
/* Consume each row using the largest square blocks possible */
if (size == BLOCK_64x64 && !(stride & 31))
for (; x + 64 <= width; x += 64)
ssd += primitives.cu[BLOCK_64x64].sse_pp(fenc + x, stride, rec + x, stride);
if (size >= BLOCK_32x32 && !(stride & 15))
for (; x + 32 <= width; x += 32)
for (y1 = 0; y1 + 32 <= rowHeight; y1 += 32)
ssd += primitives.cu[BLOCK_32x32].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
if (size >= BLOCK_16x16)
for (; x + 16 <= width; x += 16)
for (y1 = 0; y1 + 16 <= rowHeight; y1 += 16)
ssd += primitives.cu[BLOCK_16x16].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
if (size >= BLOCK_8x8)
for (; x + 8 <= width; x += 8)
for (y1 = 0; y1 + 8 <= rowHeight; y1 += 8)
ssd += primitives.cu[BLOCK_8x8].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
for (; x + 4 <= width; x += 4)
for (y1 = 0; y1 + 4 <= rowHeight; y1 += 4)
ssd += primitives.cu[BLOCK_4x4].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
fenc += stride * rowHeight;
rec += stride * rowHeight;
}
}
return ssd;
}
/* Function to calculate SSIM for each row */
static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt)
{
uint32_t z = 0;
float ssim = 0.0;
int(*sum0)[4] = (int(*)[4])buf;
int(*sum1)[4] = sum0 + (width >> 2) + 3;
width >>= 2;
height >>= 2;
for (uint32_t y = 1; y < height; y++)
{
for (; z <= y; z++)
{
std::swap(sum0, sum1);
for (uint32_t x = 0; x < width; x += 2)
primitives.ssim_4x4x2_core(&pix1[(4 * x + (z * stride1))], stride1, &pix2[(4 * x + (z * stride2))], stride2, &sum0[x]);
}
for (uint32_t x = 0; x < width - 1; x += 4)
ssim += primitives.ssim_end_4(sum0 + x, sum1 + x, X265_MIN(4, width - x - 1));
}
cnt = (height - 1) * (width - 1);
return ssim;
}
/* restore original YUV samples to recon after SAO (if lossless) */
static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx)
{
int size = cu->m_log2CUSize[absPartIdx] - 2;
uint32_t cuAddr = cu->m_cuAddr;
PicYuv* reconPic = frame.m_reconPic;
PicYuv* fencPic = frame.m_fencPic;
pixel* dst = reconPic->getLumaAddr(cuAddr, absPartIdx);
pixel* src = fencPic->getLumaAddr(cuAddr, absPartIdx);
primitives.cu[size].copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
int csp = fencPic->m_picCsp;
if (csp != X265_CSP_I400) {
pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
}
}
/* Original YUV restoration for CU in lossless coding */
static void origCUSampleRestoration(const CUData* cu, const CUGeom& cuGeom, Frame& frame)
{
uint32_t absPartIdx = cuGeom.absPartIdx;
if (cu->m_cuDepth[absPartIdx] > cuGeom.depth)
{
for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
if (childGeom.flags & CUGeom::PRESENT)
origCUSampleRestoration(cu, childGeom, frame);
}
return;
}
// restore original YUV samples
if (cu->m_tqBypass[absPartIdx])
restoreOrigLosslessYuv(cu, frame, absPartIdx);
}
void FrameFilter::processSao(int row)
{
FrameData& encData = *m_frame->m_encData;
SAOParam* saoParam = encData.m_saoParam;
if (saoParam->bSaoFlag[0])
m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
if (saoParam->bSaoFlag[1])
{
m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1);
m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
}
if (encData.m_slice->m_pps->bTransquantBypassEnabled)
{
uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
uint32_t lineStartCUAddr = row * numCols;
const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
for (uint32_t col = 0; col < numCols; col++)
{
uint32_t cuAddr = lineStartCUAddr + col;
const CUData* ctu = encData.getPicCTU(cuAddr);
origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
}
}
}

View file

@ -0,0 +1,74 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
* Min Chen <chenm003@163.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_FRAMEFILTER_H
#define X265_FRAMEFILTER_H
#include "common.h"
#include "frame.h"
#include "deblock.h"
#include "sao.h"
namespace X265_NS {
// private x265 namespace
class Encoder;
class Entropy;
class FrameEncoder;
struct ThreadLocalData;
// Manages the processing of a single frame loopfilter
class FrameFilter : public Deblock
{
public:
x265_param* m_param;
Frame* m_frame;
FrameEncoder* m_frameEncoder;
int m_hChromaShift;
int m_vChromaShift;
int m_pad[2];
SAO m_sao;
int m_numRows;
int m_saoRowDelay;
int m_lastHeight;
void* m_ssimBuf; /* Temp storage for ssim computation */
FrameFilter();
void init(Encoder *top, FrameEncoder *frame, int numRows);
void destroy();
void start(Frame *pic, Entropy& initState, int qp);
void processRow(int row);
void processRowPost(int row);
void processSao(int row);
uint32_t getCUHeight(int rowNum) const;
};
}
#endif // ifndef X265_FRAMEFILTER_H

View file

@ -0,0 +1,496 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "slice.h"
#include "level.h"
namespace X265_NS {
typedef struct
{
uint32_t maxLumaSamples;
uint32_t maxLumaSamplesPerSecond;
uint32_t maxBitrateMain;
uint32_t maxBitrateHigh;
uint32_t maxCpbSizeMain;
uint32_t maxCpbSizeHigh;
uint32_t minCompressionRatio;
Level::Name levelEnum;
const char* name;
int levelIdc;
} LevelSpec;
LevelSpec levels[] =
{
{ 36864, 552960, 128, MAX_UINT, 350, MAX_UINT, 2, Level::LEVEL1, "1", 10 },
{ 122880, 3686400, 1500, MAX_UINT, 1500, MAX_UINT, 2, Level::LEVEL2, "2", 20 },
{ 245760, 7372800, 3000, MAX_UINT, 3000, MAX_UINT, 2, Level::LEVEL2_1, "2.1", 21 },
{ 552960, 16588800, 6000, MAX_UINT, 6000, MAX_UINT, 2, Level::LEVEL3, "3", 30 },
{ 983040, 33177600, 10000, MAX_UINT, 10000, MAX_UINT, 2, Level::LEVEL3_1, "3.1", 31 },
{ 2228224, 66846720, 12000, 30000, 12000, 30000, 4, Level::LEVEL4, "4", 40 },
{ 2228224, 133693440, 20000, 50000, 20000, 50000, 4, Level::LEVEL4_1, "4.1", 41 },
{ 8912896, 267386880, 25000, 100000, 25000, 100000, 6, Level::LEVEL5, "5", 50 },
{ 8912896, 534773760, 40000, 160000, 40000, 160000, 8, Level::LEVEL5_1, "5.1", 51 },
{ 8912896, 1069547520, 60000, 240000, 60000, 240000, 8, Level::LEVEL5_2, "5.2", 52 },
{ 35651584, 1069547520, 60000, 240000, 60000, 240000, 8, Level::LEVEL6, "6", 60 },
{ 35651584, 2139095040, 120000, 480000, 120000, 480000, 8, Level::LEVEL6_1, "6.1", 61 },
{ 35651584, 4278190080U, 240000, 800000, 240000, 800000, 6, Level::LEVEL6_2, "6.2", 62 },
{ MAX_UINT, MAX_UINT, MAX_UINT, MAX_UINT, MAX_UINT, MAX_UINT, 1, Level::LEVEL8_5, "8.5", 85 },
};
/* determine minimum decoder level required to decode the described video */
void determineLevel(const x265_param &param, VPS& vps)
{
vps.ptl.onePictureOnlyConstraintFlag = param.totalFrames == 1;
vps.ptl.intraConstraintFlag = param.keyframeMax <= 1 || vps.ptl.onePictureOnlyConstraintFlag;
vps.ptl.bitDepthConstraint = param.internalBitDepth;
vps.ptl.chromaFormatConstraint = param.internalCsp;
/* TODO: figure out HighThroughput signaling, aka: HbrFactor in section A.4.2, only available
* for intra-only profiles (vps.ptl.intraConstraintFlag) */
vps.ptl.lowerBitRateConstraintFlag = true;
vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
if (param.internalCsp == X265_CSP_I420 && param.internalBitDepth <= 10)
{
/* Probably an HEVC v1 profile, but must check to be sure */
if (param.internalBitDepth <= 8)
{
if (vps.ptl.onePictureOnlyConstraintFlag)
vps.ptl.profileIdc = Profile::MAINSTILLPICTURE;
else if (vps.ptl.intraConstraintFlag)
vps.ptl.profileIdc = Profile::MAINREXT; /* Main Intra */
else
vps.ptl.profileIdc = Profile::MAIN;
}
else if (param.internalBitDepth <= 10)
{
/* note there is no 10bit still picture profile */
if (vps.ptl.intraConstraintFlag)
vps.ptl.profileIdc = Profile::MAINREXT; /* Main10 Intra */
else
vps.ptl.profileIdc = Profile::MAIN10;
}
}
else
vps.ptl.profileIdc = Profile::MAINREXT;
/* determine which profiles are compatible with this stream */
memset(vps.ptl.profileCompatibilityFlag, 0, sizeof(vps.ptl.profileCompatibilityFlag));
vps.ptl.profileCompatibilityFlag[vps.ptl.profileIdc] = true;
if (vps.ptl.profileIdc == Profile::MAIN10 && param.internalBitDepth == 8)
vps.ptl.profileCompatibilityFlag[Profile::MAIN] = true;
else if (vps.ptl.profileIdc == Profile::MAIN)
vps.ptl.profileCompatibilityFlag[Profile::MAIN10] = true;
else if (vps.ptl.profileIdc == Profile::MAINSTILLPICTURE)
{
vps.ptl.profileCompatibilityFlag[Profile::MAIN] = true;
vps.ptl.profileCompatibilityFlag[Profile::MAIN10] = true;
}
else if (vps.ptl.profileIdc == Profile::MAINREXT)
vps.ptl.profileCompatibilityFlag[Profile::MAINREXT] = true;
uint32_t lumaSamples = param.sourceWidth * param.sourceHeight;
uint32_t samplesPerSec = (uint32_t)(lumaSamples * ((double)param.fpsNum / param.fpsDenom));
uint32_t bitrate = param.rc.vbvMaxBitrate ? param.rc.vbvMaxBitrate : param.rc.bitrate;
const uint32_t MaxDpbPicBuf = 6;
vps.ptl.levelIdc = Level::NONE;
vps.ptl.tierFlag = Level::MAIN;
const size_t NumLevels = sizeof(levels) / sizeof(levels[0]);
uint32_t i;
if (param.bLossless)
{
i = 13;
vps.ptl.minCrForLevel = 1;
vps.ptl.maxLumaSrForLevel = MAX_UINT;
vps.ptl.levelIdc = Level::LEVEL8_5;
vps.ptl.tierFlag = Level::MAIN;
}
else for (i = 0; i < NumLevels; i++)
{
if (lumaSamples > levels[i].maxLumaSamples)
continue;
else if (samplesPerSec > levels[i].maxLumaSamplesPerSecond)
continue;
else if (bitrate > levels[i].maxBitrateMain && levels[i].maxBitrateHigh == MAX_UINT)
continue;
else if (bitrate > levels[i].maxBitrateHigh)
continue;
else if (param.sourceWidth > sqrt(levels[i].maxLumaSamples * 8.0f))
continue;
else if (param.sourceHeight > sqrt(levels[i].maxLumaSamples * 8.0f))
continue;
uint32_t maxDpbSize = MaxDpbPicBuf;
if (lumaSamples <= (levels[i].maxLumaSamples >> 2))
maxDpbSize = X265_MIN(4 * MaxDpbPicBuf, 16);
else if (lumaSamples <= (levels[i].maxLumaSamples >> 1))
maxDpbSize = X265_MIN(2 * MaxDpbPicBuf, 16);
else if (lumaSamples <= ((3 * levels[i].maxLumaSamples) >> 2))
maxDpbSize = X265_MIN((4 * MaxDpbPicBuf) / 3, 16);
/* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than
* or equal to MaxDpbSize */
if (vps.maxDecPicBuffering > maxDpbSize)
continue;
/* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
if (levels[i].levelEnum >= Level::LEVEL5 && param.maxCUSize < 32)
{
x265_log(&param, X265_LOG_WARNING, "level %s detected, but CTU size 16 is non-compliant\n", levels[i].name);
vps.ptl.profileIdc = Profile::NONE;
vps.ptl.levelIdc = Level::NONE;
vps.ptl.tierFlag = Level::MAIN;
x265_log(&param, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
return;
}
/* The value of NumPocTotalCurr shall be less than or equal to 8 */
int numPocTotalCurr = param.maxNumReferences + vps.numReorderPics;
if (numPocTotalCurr > 8)
{
x265_log(&param, X265_LOG_WARNING, "level %s detected, but NumPocTotalCurr (total references) is non-compliant\n", levels[i].name);
vps.ptl.profileIdc = Profile::NONE;
vps.ptl.levelIdc = Level::NONE;
vps.ptl.tierFlag = Level::MAIN;
x265_log(&param, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
return;
}
#define CHECK_RANGE(value, main, high) (high != MAX_UINT && value > main && value <= high)
if (CHECK_RANGE(bitrate, levels[i].maxBitrateMain, levels[i].maxBitrateHigh) ||
CHECK_RANGE((uint32_t)param.rc.vbvBufferSize, levels[i].maxCpbSizeMain, levels[i].maxCpbSizeHigh))
{
/* The bitrate or buffer size are out of range for Main tier, but in
* range for High tier. If the user requested High tier then give
* them High tier at this level. Otherwise allow the loop to
* progress to the Main tier of the next level */
if (param.bHighTier)
vps.ptl.tierFlag = Level::HIGH;
else
continue;
}
else
vps.ptl.tierFlag = Level::MAIN;
#undef CHECK_RANGE
vps.ptl.levelIdc = levels[i].levelEnum;
vps.ptl.minCrForLevel = levels[i].minCompressionRatio;
vps.ptl.maxLumaSrForLevel = levels[i].maxLumaSamplesPerSecond;
break;
}
static const char *profiles[] = { "None", "Main", "Main 10", "Main Still Picture", "RExt" };
static const char *tiers[] = { "Main", "High" };
char profbuf[64];
strcpy(profbuf, profiles[vps.ptl.profileIdc]);
bool bStillPicture = false;
if (vps.ptl.profileIdc == Profile::MAINREXT)
{
if (vps.ptl.bitDepthConstraint > 12 && vps.ptl.intraConstraintFlag)
{
if (vps.ptl.onePictureOnlyConstraintFlag)
{
strcpy(profbuf, "Main 4:4:4 16 Still Picture");
bStillPicture = true;
}
else
strcpy(profbuf, "Main 4:4:4 16");
}
else if (param.internalCsp == X265_CSP_I420)
{
X265_CHECK(vps.ptl.intraConstraintFlag || vps.ptl.bitDepthConstraint > 10, "rext fail\n");
if (vps.ptl.bitDepthConstraint <= 8)
strcpy(profbuf, "Main");
else if (vps.ptl.bitDepthConstraint <= 10)
strcpy(profbuf, "Main 10");
else if (vps.ptl.bitDepthConstraint <= 12)
strcpy(profbuf, "Main 12");
}
else if (param.internalCsp == X265_CSP_I422)
{
/* there is no Main 4:2:2 profile, so it must be signaled as Main10 4:2:2 */
if (param.internalBitDepth <= 10)
strcpy(profbuf, "Main 4:2:2 10");
else if (vps.ptl.bitDepthConstraint <= 12)
strcpy(profbuf, "Main 4:2:2 12");
}
else if (param.internalCsp == X265_CSP_I444)
{
if (vps.ptl.bitDepthConstraint <= 8)
{
if (vps.ptl.onePictureOnlyConstraintFlag)
{
strcpy(profbuf, "Main 4:4:4 Still Picture");
bStillPicture = true;
}
else
strcpy(profbuf, "Main 4:4:4");
}
else if (vps.ptl.bitDepthConstraint <= 10)
strcpy(profbuf, "Main 4:4:4 10");
else if (vps.ptl.bitDepthConstraint <= 12)
strcpy(profbuf, "Main 4:4:4 12");
}
else
strcpy(profbuf, "Unknown");
if (vps.ptl.intraConstraintFlag && !bStillPicture)
strcat(profbuf, " Intra");
}
x265_log(&param, X265_LOG_INFO, "%s profile, Level-%s (%s tier)\n",
profbuf, levels[i].name, tiers[vps.ptl.tierFlag]);
}
/* enforce a maximum decoder level requirement, in other words assure that a
* decoder of the specified level may decode the video about to be created.
* Lower parameters where necessary to ensure the video will be decodable by a
* decoder meeting this level of requirement. Some parameters (resolution and
* frame rate) are non-negotiable and thus this function may fail. In those
* circumstances it will be quite noisy */
bool enforceLevel(x265_param& param, VPS& vps)
{
vps.numReorderPics = (param.bBPyramid && param.bframes > 1) ? 2 : !!param.bframes;
vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 2, (uint32_t)param.maxNumReferences) + vps.numReorderPics);
/* no level specified by user, just auto-detect from the configuration */
if (param.levelIdc <= 0)
return true;
uint32_t level = 0;
while (levels[level].levelIdc != param.levelIdc && level + 1 < sizeof(levels) / sizeof(levels[0]))
level++;
if (levels[level].levelIdc != param.levelIdc)
{
x265_log(&param, X265_LOG_WARNING, "specified level %d does not exist\n", param.levelIdc);
return false;
}
LevelSpec& l = levels[level];
bool highTier = !!param.bHighTier;
if (highTier && l.maxBitrateHigh == MAX_UINT)
{
highTier = false;
x265_log(&param, X265_LOG_WARNING, "Level %s has no High tier, using Main tier\n", l.name);
}
uint32_t lumaSamples = param.sourceWidth * param.sourceHeight;
uint32_t samplesPerSec = (uint32_t)(lumaSamples * ((double)param.fpsNum / param.fpsDenom));
bool ok = true;
if (lumaSamples > l.maxLumaSamples)
ok = false;
else if (param.sourceWidth > sqrt(l.maxLumaSamples * 8.0f))
ok = false;
else if (param.sourceHeight > sqrt(l.maxLumaSamples * 8.0f))
ok = false;
if (!ok)
{
x265_log(&param, X265_LOG_WARNING, "picture dimensions are out of range for specified level\n");
return false;
}
else if (samplesPerSec > l.maxLumaSamplesPerSecond)
{
x265_log(&param, X265_LOG_WARNING, "frame rate is out of range for specified level\n");
return false;
}
if ((uint32_t)param.rc.vbvMaxBitrate > (highTier ? l.maxBitrateHigh : l.maxBitrateMain))
{
param.rc.vbvMaxBitrate = highTier ? l.maxBitrateHigh : l.maxBitrateMain;
x265_log(&param, X265_LOG_INFO, "lowering VBV max bitrate to %dKbps\n", param.rc.vbvMaxBitrate);
}
if ((uint32_t)param.rc.vbvBufferSize > (highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain))
{
param.rc.vbvBufferSize = highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain;
x265_log(&param, X265_LOG_INFO, "lowering VBV buffer size to %dKb\n", param.rc.vbvBufferSize);
}
switch (param.rc.rateControlMode)
{
case X265_RC_ABR:
if ((uint32_t)param.rc.bitrate > (highTier ? l.maxBitrateHigh : l.maxBitrateMain))
{
param.rc.bitrate = l.maxBitrateHigh;
x265_log(&param, X265_LOG_INFO, "lowering target bitrate to High tier limit of %dKbps\n", param.rc.bitrate);
}
break;
case X265_RC_CQP:
x265_log(&param, X265_LOG_WARNING, "Constant QP is inconsistent with specifying a decoder level, no bitrate guarantee is possible.\n");
return false;
case X265_RC_CRF:
if (!param.rc.vbvBufferSize || !param.rc.vbvMaxBitrate)
{
if (!param.rc.vbvMaxBitrate)
param.rc.vbvMaxBitrate = highTier ? l.maxBitrateHigh : l.maxBitrateMain;
if (!param.rc.vbvBufferSize)
param.rc.vbvBufferSize = highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain;
x265_log(&param, X265_LOG_WARNING, "Specifying a decoder level with constant rate factor rate-control requires\n");
x265_log(&param, X265_LOG_WARNING, "enabling VBV with vbv-bufsize=%dkb vbv-maxrate=%dkbps. VBV outputs are non-deterministic!\n",
param.rc.vbvBufferSize, param.rc.vbvMaxBitrate);
}
break;
default:
x265_log(&param, X265_LOG_ERROR, "Unknown rate control mode is inconsistent with specifying a decoder level\n");
return false;
}
/* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than or equal to MaxDpbSize */
const uint32_t MaxDpbPicBuf = 6;
uint32_t maxDpbSize = MaxDpbPicBuf;
if (lumaSamples <= (l.maxLumaSamples >> 2))
maxDpbSize = X265_MIN(4 * MaxDpbPicBuf, 16);
else if (lumaSamples <= (l.maxLumaSamples >> 1))
maxDpbSize = X265_MIN(2 * MaxDpbPicBuf, 16);
else if (lumaSamples <= ((3 * l.maxLumaSamples) >> 2))
maxDpbSize = X265_MIN((4 * MaxDpbPicBuf) / 3, 16);
int savedRefCount = param.maxNumReferences;
while (vps.maxDecPicBuffering > maxDpbSize && param.maxNumReferences > 1)
{
param.maxNumReferences--;
vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 1, (uint32_t)param.maxNumReferences) + vps.numReorderPics);
}
if (param.maxNumReferences != savedRefCount)
x265_log(&param, X265_LOG_INFO, "Lowering max references to %d to meet level requirement\n", param.maxNumReferences);
/* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
if (param.levelIdc >= 50 && param.maxCUSize < 32)
{
param.maxCUSize = 32;
x265_log(&param, X265_LOG_INFO, "Levels 5.0 and above require a maximum CTU size of at least 32, using --ctu 32\n");
}
/* The value of NumPocTotalCurr shall be less than or equal to 8 */
int numPocTotalCurr = param.maxNumReferences + !!param.bframes;
if (numPocTotalCurr > 8)
{
param.maxNumReferences = 8 - !!param.bframes;
x265_log(&param, X265_LOG_INFO, "Lowering max references to %d to meet numPocTotalCurr requirement\n", param.maxNumReferences);
}
return true;
}
}
#if EXPORT_C_API
/* these functions are exported as C functions (default) */
using namespace X265_NS;
extern "C" {
#else
/* these functions exist within private namespace (multilib) */
namespace X265_NS {
#endif
int x265_param_apply_profile(x265_param *param, const char *profile)
{
if (!param || !profile)
return 0;
/* Check if profile bit-depth requirement is exceeded by internal bit depth */
bool bInvalidDepth = false;
#if X265_DEPTH > 8
if (!strcmp(profile, "main") || !strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp") ||
!strcmp(profile, "main444-8") || !strcmp(profile, "main-intra") ||
!strcmp(profile, "main444-intra") || !strcmp(profile, "main444-stillpicture"))
bInvalidDepth = true;
#endif
#if X265_DEPTH > 10
if (!strcmp(profile, "main10") || !strcmp(profile, "main422-10") || !strcmp(profile, "main444-10") ||
!strcmp(profile, "main10-intra") || !strcmp(profile, "main422-10-intra") || !strcmp(profile, "main444-10-intra"))
bInvalidDepth = true;
#endif
#if X265_DEPTH > 12
if (!strcmp(profile, "main12") || !strcmp(profile, "main422-12") || !strcmp(profile, "main444-12") ||
!strcmp(profile, "main12-intra") || !strcmp(profile, "main422-12-intra") || !strcmp(profile, "main444-12-intra"))
bInvalidDepth = true;
#endif
if (bInvalidDepth)
{
x265_log(param, X265_LOG_ERROR, "%s profile not supported, internal bit depth %d.\n", profile, X265_DEPTH);
return -1;
}
size_t l = strlen(profile);
bool bBoolIntra = (l > 6 && !strcmp(profile + l - 6, "-intra")) ||
!strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp");
if (bBoolIntra)
{
/* The profile may be detected as still picture if param->totalFrames is 1 */
param->keyframeMax = 1;
}
/* check that input color space is supported by profile */
if (!strcmp(profile, "main") || !strcmp(profile, "main-intra") ||
!strcmp(profile, "main10") || !strcmp(profile, "main10-intra") ||
!strcmp(profile, "main12") || !strcmp(profile, "main12-intra") ||
!strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp"))
{
if (param->internalCsp != X265_CSP_I420)
{
x265_log(param, X265_LOG_ERROR, "%s profile not compatible with %s input color space.\n",
profile, x265_source_csp_names[param->internalCsp]);
return -1;
}
}
else if (!strcmp(profile, "main422-10") || !strcmp(profile, "main422-10-intra") ||
!strcmp(profile, "main422-12") || !strcmp(profile, "main422-12-intra"))
{
if (param->internalCsp != X265_CSP_I420 && param->internalCsp != X265_CSP_I422)
{
x265_log(param, X265_LOG_ERROR, "%s profile not compatible with %s input color space.\n",
profile, x265_source_csp_names[param->internalCsp]);
return -1;
}
}
else if (!strcmp(profile, "main444-8") ||
!strcmp(profile, "main444-intra") || !strcmp(profile, "main444-stillpicture") ||
!strcmp(profile, "main444-10") || !strcmp(profile, "main444-10-intra") ||
!strcmp(profile, "main444-12") || !strcmp(profile, "main444-12-intra") ||
!strcmp(profile, "main444-16-intra") || !strcmp(profile, "main444-16-stillpicture"))
{
/* any color space allowed */
}
else
{
x265_log(param, X265_LOG_ERROR, "unknown profile <%s>\n", profile);
return -1;
}
return 0;
}
}

View file

@ -0,0 +1,39 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_LEVEL_H
#define X265_LEVEL_H 1
#include "common.h"
#include "x265.h"
namespace X265_NS {
// encoder private namespace
struct VPS;
void determineLevel(const x265_param &param, VPS& vps);
bool enforceLevel(x265_param& param, VPS& vps);
}
#endif // ifndef X265_LEVEL_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,110 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_MOTIONESTIMATE_H
#define X265_MOTIONESTIMATE_H
#include "primitives.h"
#include "reference.h"
#include "mv.h"
#include "bitcost.h"
#include "yuv.h"
namespace X265_NS {
// private x265 namespace
class MotionEstimate : public BitCost
{
protected:
intptr_t blockOffset;
int ctuAddr;
int absPartIdx; // part index of PU, including CU offset within CTU
int searchMethod;
int subpelRefine;
int blockwidth;
int blockheight;
pixelcmp_t sad;
pixelcmp_x3_t sad_x3;
pixelcmp_x4_t sad_x4;
pixelcmp_t satd;
pixelcmp_t chromaSatd;
MotionEstimate& operator =(const MotionEstimate&);
public:
static const int COST_MAX = 1 << 28;
Yuv fencPUYuv;
int partEnum;
bool bChromaSATD;
MotionEstimate();
~MotionEstimate();
static void initScales();
static int hpelIterationCount(int subme);
void init(int method, int refine, int csp);
/* Methods called at slice setup */
void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight);
void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight);
/* buf*() and motionEstimate() methods all use cached fenc pixels and thus
* require setSourcePU() to be called prior. */
inline int bufSAD(const pixel* fref, intptr_t stride) { return sad(fencPUYuv.m_buf[0], FENC_STRIDE, fref, stride); }
inline int bufSATD(const pixel* fref, intptr_t stride) { return satd(fencPUYuv.m_buf[0], FENC_STRIDE, fref, stride); }
inline int bufChromaSATD(const Yuv& refYuv, int puPartIdx)
{
return chromaSatd(refYuv.getCbAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[1], fencPUYuv.m_csize) +
chromaSatd(refYuv.getCrAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[2], fencPUYuv.m_csize);
}
int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
protected:
inline void StarPatternSearch(ReferencePlanes *ref,
const MV & mvmin,
const MV & mvmax,
MV & bmv,
int & bcost,
int & bPointNr,
int & bDistance,
int earlyExitIters,
int merange);
};
}
#endif // ifndef X265_MOTIONESTIMATE_H

232
x265/source/encoder/nal.cpp Normal file
View file

@ -0,0 +1,232 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "bitstream.h"
#include "nal.h"
using namespace X265_NS;
NALList::NALList()
: m_numNal(0)
, m_buffer(NULL)
, m_occupancy(0)
, m_allocSize(0)
, m_extraBuffer(NULL)
, m_extraOccupancy(0)
, m_extraAllocSize(0)
, m_annexB(true)
{}
void NALList::takeContents(NALList& other)
{
/* take other NAL buffer, discard our old one */
X265_FREE(m_buffer);
m_buffer = other.m_buffer;
m_allocSize = other.m_allocSize;
m_occupancy = other.m_occupancy;
/* copy packet data */
m_numNal = other.m_numNal;
memcpy(m_nal, other.m_nal, sizeof(x265_nal) * m_numNal);
/* reset other list, re-allocate their buffer with same size */
other.m_numNal = 0;
other.m_occupancy = 0;
other.m_buffer = X265_MALLOC(uint8_t, m_allocSize);
}
void NALList::serialize(NalUnitType nalUnitType, const Bitstream& bs)
{
static const char startCodePrefix[] = { 0, 0, 0, 1 };
uint32_t payloadSize = bs.getNumberOfWrittenBytes();
const uint8_t* bpayload = bs.getFIFO();
if (!bpayload)
return;
uint32_t nextSize = m_occupancy + sizeof(startCodePrefix) + 2 + payloadSize + (payloadSize >> 1) + m_extraOccupancy;
if (nextSize > m_allocSize)
{
uint8_t *temp = X265_MALLOC(uint8_t, nextSize);
if (temp)
{
memcpy(temp, m_buffer, m_occupancy);
/* fixup existing payload pointers */
for (uint32_t i = 0; i < m_numNal; i++)
m_nal[i].payload = temp + (m_nal[i].payload - m_buffer);
X265_FREE(m_buffer);
m_buffer = temp;
m_allocSize = nextSize;
}
else
{
x265_log(NULL, X265_LOG_ERROR, "Unable to realloc access unit buffer\n");
return;
}
}
uint8_t *out = m_buffer + m_occupancy;
uint32_t bytes = 0;
if (!m_annexB)
{
/* Will write size later */
bytes += 4;
}
else if (!m_numNal || nalUnitType == NAL_UNIT_VPS || nalUnitType == NAL_UNIT_SPS || nalUnitType == NAL_UNIT_PPS)
{
memcpy(out, startCodePrefix, 4);
bytes += 4;
}
else
{
memcpy(out, startCodePrefix + 1, 3);
bytes += 3;
}
/* 16 bit NAL header:
* forbidden_zero_bit 1-bit
* nal_unit_type 6-bits
* nuh_reserved_zero_6bits 6-bits
* nuh_temporal_id_plus1 3-bits */
out[bytes++] = (uint8_t)nalUnitType << 1;
out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);
/* 7.4.1 ...
* Within the NAL unit, the following three-byte sequences shall not occur at
* any byte-aligned position:
* - 0x000000
* - 0x000001
* - 0x000002 */
for (uint32_t i = 0; i < payloadSize; i++)
{
if (i > 2 && !out[bytes - 2] && !out[bytes - 3] && out[bytes - 1] <= 0x03)
{
/* inject 0x03 to prevent emulating a start code */
out[bytes] = out[bytes - 1];
out[bytes - 1] = 0x03;
bytes++;
}
out[bytes++] = bpayload[i];
}
X265_CHECK(bytes <= 4 + 2 + payloadSize + (payloadSize >> 1), "NAL buffer overflow\n");
if (m_extraOccupancy)
{
/* these bytes were escaped by serializeSubstreams */
memcpy(out + bytes, m_extraBuffer, m_extraOccupancy);
bytes += m_extraOccupancy;
m_extraOccupancy = 0;
}
/* 7.4.1.1
* ... when the last byte of the RBSP data is equal to 0x00 (which can
* only occur when the RBSP ends in a cabac_zero_word), a final byte equal
* to 0x03 is appended to the end of the data. */
if (!out[bytes - 1])
out[bytes++] = 0x03;
if (!m_annexB)
{
uint32_t dataSize = bytes - 4;
out[0] = (uint8_t)(dataSize >> 24);
out[1] = (uint8_t)(dataSize >> 16);
out[2] = (uint8_t)(dataSize >> 8);
out[3] = (uint8_t)dataSize;
}
m_occupancy += bytes;
X265_CHECK(m_numNal < (uint32_t)MAX_NAL_UNITS, "NAL count overflow\n");
x265_nal& nal = m_nal[m_numNal++];
nal.type = nalUnitType;
nal.sizeBytes = bytes;
nal.payload = out;
}
/* concatenate and escape WPP sub-streams, return escaped row lengths.
* These streams will be appended to the next serialized NAL */
uint32_t NALList::serializeSubstreams(uint32_t* streamSizeBytes, uint32_t streamCount, const Bitstream* streams)
{
uint32_t maxStreamSize = 0;
uint32_t estSize = 0;
for (uint32_t s = 0; s < streamCount; s++)
estSize += streams[s].getNumberOfWrittenBytes();
estSize += estSize >> 1;
if (estSize > m_extraAllocSize)
{
uint8_t *temp = X265_MALLOC(uint8_t, estSize);
if (temp)
{
X265_FREE(m_extraBuffer);
m_extraBuffer = temp;
m_extraAllocSize = estSize;
}
else
{
x265_log(NULL, X265_LOG_ERROR, "Unable to realloc WPP substream concatenation buffer\n");
return 0;
}
}
uint32_t bytes = 0;
uint8_t *out = m_extraBuffer;
for (uint32_t s = 0; s < streamCount; s++)
{
const Bitstream& stream = streams[s];
uint32_t inSize = stream.getNumberOfWrittenBytes();
const uint8_t *inBytes = stream.getFIFO();
uint32_t prevBufSize = bytes;
if (inBytes)
{
for (uint32_t i = 0; i < inSize; i++)
{
if (bytes >= 2 && !out[bytes - 2] && !out[bytes - 1] && inBytes[i] <= 0x03)
{
/* inject 0x03 to prevent emulating a start code */
out[bytes++] = 3;
}
out[bytes++] = inBytes[i];
}
}
if (s < streamCount - 1)
{
streamSizeBytes[s] = bytes - prevBufSize;
if (streamSizeBytes[s] > maxStreamSize)
maxStreamSize = streamSizeBytes[s];
}
}
m_extraOccupancy = bytes;
return maxStreamSize;
}

65
x265/source/encoder/nal.h Normal file
View file

@ -0,0 +1,65 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_NAL_H
#define X265_NAL_H
#include "common.h"
#include "x265.h"
namespace X265_NS {
// private namespace
class Bitstream;
class NALList
{
static const int MAX_NAL_UNITS = 16;
public:
x265_nal m_nal[MAX_NAL_UNITS];
uint32_t m_numNal;
uint8_t* m_buffer;
uint32_t m_occupancy;
uint32_t m_allocSize;
uint8_t* m_extraBuffer;
uint32_t m_extraOccupancy;
uint32_t m_extraAllocSize;
bool m_annexB;
NALList();
~NALList() { X265_FREE(m_buffer); X265_FREE(m_extraBuffer); }
void takeContents(NALList& other);
void serialize(NalUnitType nalUnitType, const Bitstream& bs);
uint32_t serializeSubstreams(uint32_t* streamSizeBytes, uint32_t streamCount, const Bitstream* streams);
};
}
#endif // ifndef X265_NAL_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,267 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Sumalatha Polureddy <sumalatha@multicorewareinc.com>
* Aarthi Priya Thirumalai <aarthi@multicorewareinc.com>
* Xun Xu, PPLive Corporation <xunxu@pptv.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_RATECONTROL_H
#define X265_RATECONTROL_H
#include "common.h"
#include "sei.h"
namespace X265_NS {
// encoder namespace
class Encoder;
class Frame;
class SEIBufferingPeriod;
struct SPS;
#define BASE_FRAME_DURATION 0.04
/* Arbitrary limitations as a sanity check. */
#define MAX_FRAME_DURATION 1.00
#define MIN_FRAME_DURATION 0.01
#define MIN_AMORTIZE_FRAME 10
#define MIN_AMORTIZE_FRACTION 0.2
#define CLIP_DURATION(f) x265_clip3(MIN_FRAME_DURATION, MAX_FRAME_DURATION, f)
struct Predictor
{
double coeff;
double count;
double decay;
double offset;
};
struct HRDTiming
{
double cpbInitialAT;
double cpbFinalAT;
double dpbOutputTime;
double cpbRemovalTime;
};
struct RateControlEntry
{
Predictor rowPreds[3][2];
Predictor* rowPred[2];
int64_t lastSatd; /* Contains the picture cost of the previous frame, required for resetAbr and VBV */
int64_t leadingNoBSatd;
int64_t rowTotalBits; /* update cplxrsum and totalbits at the end of 2 rows */
double blurredComplexity;
double qpaRc;
double qpAq;
double qRceq;
double frameSizePlanned; /* frame Size decided by RateCotrol before encoding the frame */
double bufferRate;
double movingAvgSum;
double rowCplxrSum;
double qpNoVbv;
double bufferFill;
double frameDuration;
double clippedDuration;
double frameSizeEstimated; /* hold frameSize, updated from cu level vbv rc */
double frameSizeMaximum; /* max frame Size according to minCR restrictions and level of the video */
int sliceType;
int bframes;
int poc;
int encodeOrder;
bool bLastMiniGopBFrame;
bool isActive;
double amortizeFrames;
double amortizeFraction;
/* Required in 2-pass rate control */
uint64_t expectedBits; /* total expected bits up to the current frame (current one excluded) */
double iCuCount;
double pCuCount;
double skipCuCount;
double expectedVbv;
double qScale;
double newQScale;
double newQp;
int mvBits;
int miscBits;
int coeffBits;
bool keptAsRef;
SEIPictureTiming *picTimingSEI;
HRDTiming *hrdTiming;
};
class RateControl
{
public:
x265_param* m_param;
Slice* m_curSlice; /* all info about the current frame */
SliceType m_sliceType; /* Current frame type */
int m_ncu; /* number of CUs in a frame */
int m_qp; /* updated qp for current frame */
bool m_isAbr;
bool m_isVbv;
bool m_isCbr;
bool m_singleFrameVbv;
bool m_isAbrReset;
int m_lastAbrResetPoc;
double m_rateTolerance;
double m_frameDuration; /* current frame duration in seconds */
double m_bitrate;
double m_rateFactorConstant;
double m_bufferSize;
double m_bufferFillFinal; /* real buffer as of the last finished frame */
double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */
double m_bufferRate; /* # of bits added to buffer_fill after each frame */
double m_vbvMaxRate; /* in kbps */
double m_rateFactorMaxIncrement; /* Don't allow RF above (CRF + this value). */
double m_rateFactorMaxDecrement; /* don't allow RF below (this value). */
Predictor m_pred[4]; /* Slice predictors to preidct bits for each Slice type - I,P,Bref and B */
int64_t m_leadingNoBSatd;
int m_predType; /* Type of slice predictors to be used - depends on the slice type */
double m_ipOffset;
double m_pbOffset;
int64_t m_bframeBits;
int64_t m_currentSatd;
int m_qpConstant[3];
int m_lastNonBPictType;
int m_framesDone; /* # of frames passed through RateCotrol already */
double m_cplxrSum; /* sum of bits*qscale/rceq */
double m_wantedBitsWindow; /* target bitrate * window */
double m_accumPQp; /* for determining I-frame quant */
double m_accumPNorm;
double m_lastQScaleFor[3]; /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */
double m_lstep;
double m_shortTermCplxSum;
double m_shortTermCplxCount;
double m_lastRceq;
double m_qCompress;
int64_t m_totalBits; /* total bits used for already encoded frames (after ammortization) */
int64_t m_encodedBits; /* bits used for encoded frames (without ammortization) */
double m_fps;
int64_t m_satdCostWindow[50];
int64_t m_encodedBitsWindow[50];
int m_sliderPos;
/* To detect a pattern of low detailed static frames in single pass ABR using satdcosts */
int64_t m_lastBsliceSatdCost;
int m_numBframesInPattern;
bool m_isPatternPresent;
bool m_isSceneTransition;
/* a common variable on which rateControlStart, rateControlEnd and rateControUpdateStats waits to
* sync the calls to these functions. For example
* -F2:
* rceStart 10
* rceUpdate 10
* rceEnd 9
* rceStart 11
* rceUpdate 11
* rceEnd 10
* rceStart 12
* rceUpdate 12
* rceEnd 11 */
ThreadSafeInteger m_startEndOrder;
int m_finalFrameCount; /* set when encoder begins flushing */
bool m_bTerminated; /* set true when encoder is closing */
/* hrd stuff */
SEIBufferingPeriod m_bufPeriodSEI;
double m_nominalRemovalTime;
double m_prevCpbFinalAT;
/* 2 pass */
bool m_2pass;
int m_numEntries;
FILE* m_statFileOut;
FILE* m_cutreeStatFileOut;
FILE* m_cutreeStatFileIn;
double m_lastAccumPNorm;
double m_expectedBitsSum; /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
int64_t m_predictedBits;
RateControlEntry* m_rce2Pass;
struct
{
uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */
int qpBufPos; /* In order to handle pyramid reordering, QP buffer acts as a stack.
* This value is the current position (0 or 1). */
} m_cuTreeStats;
RateControl(x265_param& p);
bool init(const SPS& sps);
void initHRD(SPS& sps);
void setFinalFrameCount(int count);
void terminate(); /* un-block all waiting functions so encoder may close */
void destroy();
// to be called for each curFrame to process RateControl and set QP
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);
void rateControlUpdateStats(RateControlEntry* rce);
int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);
int rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
int rateControlSliceType(int frameNum);
bool cuTreeReadFor2Pass(Frame* curFrame);
void hrdFullness(SEIBufferingPeriod* sei);
int writeRateControlFrameStats(Frame* curFrame, RateControlEntry* rce);
protected:
static const int s_slidingWindowFrames;
static const char* s_defaultStatFileName;
double m_amortizeFraction;
int m_amortizeFrames;
int m_residualFrames;
int m_partialResidualFrames;
int m_residualCost;
int m_partialResidualCost;
x265_zone* getZone();
double getQScale(RateControlEntry *rce, double rateFactor);
double rateEstimateQscale(Frame* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR
double tuneAbrQScaleFromFeedback(double qScale);
void accumPQpUpdate();
int getPredictorType(int lowresSliceType, int sliceType);
void updateVbv(int64_t bits, RateControlEntry* rce);
void updatePredictor(Predictor *p, double q, double var, double bits);
double clipQscale(Frame* pic, RateControlEntry* rce, double q);
void updateVbvPlan(Encoder* enc);
double predictSize(Predictor *p, double q, double var);
void checkAndResetABR(RateControlEntry* rce, bool isFrameDone);
double predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpm, int32_t& encodedBits);
bool initPass2();
double getDiffLimitedQScale(RateControlEntry *rce, double q);
double countExpectedBits();
bool vbv2Pass(uint64_t allAvailableBits);
bool findUnderflow(double *fills, int *t0, int *t1, int over);
bool fixUnderflow(int t0, int t1, double adjustment, double qscaleMin, double qscaleMax);
};
}
#endif // ifndef X265_RATECONTROL_H

View file

@ -0,0 +1,147 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_RDCOST_H
#define X265_RDCOST_H
#include "common.h"
#include "slice.h"
namespace X265_NS {
// private namespace
class RDCost
{
public:
/* all weights and factors stored as FIX8 */
uint64_t m_lambda2;
uint64_t m_lambda;
uint32_t m_chromaDistWeight[2];
uint32_t m_psyRdBase;
uint32_t m_psyRd;
int m_qp; /* QP used to configure lambda, may be higher than QP_MAX_SPEC but <= QP_MAX_MAX */
void setPsyRdScale(double scale) { m_psyRdBase = (uint32_t)floor(65536.0 * scale * 0.33); }
void setQP(const Slice& slice, int qp)
{
x265_emms(); /* TODO: if the lambda tables were ints, this would not be necessary */
m_qp = qp;
setLambda(x265_lambda2_tab[qp], x265_lambda_tab[qp]);
/* Scale PSY RD factor by a slice type factor */
static const uint32_t psyScaleFix8[3] = { 300, 256, 96 }; /* B, P, I */
m_psyRd = (m_psyRdBase * psyScaleFix8[slice.m_sliceType]) >> 8;
/* Scale PSY RD factor by QP, at high QP psy-rd can cause artifacts */
if (qp >= 40)
{
int scale = qp >= QP_MAX_SPEC ? 0 : (QP_MAX_SPEC - qp) * 23;
m_psyRd = (m_psyRd * scale) >> 8;
}
int qpCb, qpCr;
if (slice.m_sps->chromaFormatIdc == X265_CSP_I420)
{
qpCb = (int)g_chromaScale[x265_clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaQpOffset[0])];
qpCr = (int)g_chromaScale[x265_clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaQpOffset[1])];
}
else
{
qpCb = x265_clip3(QP_MIN, QP_MAX_SPEC, qp + slice.m_pps->chromaQpOffset[0]);
qpCr = x265_clip3(QP_MIN, QP_MAX_SPEC, qp + slice.m_pps->chromaQpOffset[1]);
}
int chroma_offset_idx = X265_MIN(qp - qpCb + 12, MAX_CHROMA_LAMBDA_OFFSET);
uint16_t lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
m_chromaDistWeight[0] = lambdaOffset;
chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET);
lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
m_chromaDistWeight[1] = lambdaOffset;
}
void setLambda(double lambda2, double lambda)
{
m_lambda2 = (uint64_t)floor(256.0 * lambda2);
m_lambda = (uint64_t)floor(256.0 * lambda);
}
inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const
{
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
#if X265_DEPTH <= 10
"calcRdCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n",
#else
"calcRdCost wrap detected dist: " X265_LL ", bits %u, lambda: " X265_LL "\n",
#endif
distortion, bits, m_lambda2);
return distortion + ((bits * m_lambda2 + 128) >> 8);
}
/* return the difference in energy between the source block and the recon block */
inline int psyCost(int size, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) const
{
return primitives.cu[size].psy_cost_pp(source, sstride, recon, rstride);
}
/* return the difference in energy between the source block and the recon block */
inline int psyCost(int size, const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride) const
{
return primitives.cu[size].psy_cost_ss(source, sstride, recon, rstride);
}
/* return the RD cost of this prediction, including the effect of psy-rd */
inline uint64_t calcPsyRdCost(sse_ret_t distortion, uint32_t bits, uint32_t psycost) const
{
return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);
}
inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const
{
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,
"calcRdSADCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n", sadCost, bits, m_lambda);
return sadCost + ((bits * m_lambda + 128) >> 8);
}
inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const
{
X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
#if X265_DEPTH <= 10
"scaleChromaDist wrap detected dist: %u, lambda: %u\n",
#else
"scaleChromaDist wrap detected dist: " X265_LL " lambda: %u\n",
#endif
dist, m_chromaDistWeight[plane - 1]);
return (sse_ret_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
}
inline uint32_t getCost(uint32_t bits) const
{
return (uint32_t)((bits * m_lambda + 128) >> 8);
}
};
}
#endif // ifndef X265_TCOMRDCOST_H

View file

@ -0,0 +1,174 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
* Deepthi Devaki <deepthidevaki@multicorewareinc.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "primitives.h"
#include "slice.h"
#include "picyuv.h"
#include "reference.h"
using namespace X265_NS;
MotionReference::MotionReference()
{
weightBuffer[0] = NULL;
weightBuffer[1] = NULL;
weightBuffer[2] = NULL;
}
MotionReference::~MotionReference()
{
X265_FREE(weightBuffer[0]);
X265_FREE(weightBuffer[1]);
X265_FREE(weightBuffer[2]);
}
int MotionReference::init(PicYuv* recPic, WeightParam *wp, const x265_param& p)
{
reconPic = recPic;
numWeightedRows = 0;
lumaStride = recPic->m_stride;
chromaStride = recPic->m_strideC;
numInterpPlanes = p.subpelRefine > 2 ? 3 : 1; /* is chroma satd possible? */
/* directly reference the extended integer pel planes */
fpelPlane[0] = recPic->m_picOrg[0];
fpelPlane[1] = recPic->m_picOrg[1];
fpelPlane[2] = recPic->m_picOrg[2];
isWeighted = false;
if (wp)
{
uint32_t numCUinHeight = (reconPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
int marginX = reconPic->m_lumaMarginX;
int marginY = reconPic->m_lumaMarginY;
intptr_t stride = reconPic->m_stride;
int cuHeight = g_maxCUSize;
for (int c = 0; c < numInterpPlanes; c++)
{
if (c == 1)
{
marginX = reconPic->m_chromaMarginX;
marginY = reconPic->m_chromaMarginY;
stride = reconPic->m_strideC;
cuHeight >>= reconPic->m_vChromaShift;
}
if (wp[c].bPresentFlag)
{
if (!weightBuffer[c])
{
size_t padheight = (numCUinHeight * cuHeight) + marginY * 2;
weightBuffer[c] = X265_MALLOC(pixel, stride * padheight);
if (!weightBuffer[c])
return -1;
}
/* use our buffer which will have weighted pixels written to it */
fpelPlane[c] = weightBuffer[c] + marginY * stride + marginX;
X265_CHECK(recPic->m_picOrg[c] - recPic->m_picBuf[c] == marginY * stride + marginX, "PicYuv pad calculation mismatch\n");
w[c].weight = wp[c].inputWeight;
w[c].offset = wp[c].inputOffset * (1 << (X265_DEPTH - 8));
w[c].shift = wp[c].log2WeightDenom;
w[c].round = w[c].shift ? 1 << (w[c].shift - 1) : 0;
}
}
isWeighted = true;
}
return 0;
}
void MotionReference::applyWeight(int finishedRows, int maxNumRows)
{
finishedRows = X265_MIN(finishedRows, maxNumRows);
if (numWeightedRows >= finishedRows)
return;
int marginX = reconPic->m_lumaMarginX;
int marginY = reconPic->m_lumaMarginY;
intptr_t stride = reconPic->m_stride;
int width = reconPic->m_picWidth;
int height = (finishedRows - numWeightedRows) * g_maxCUSize;
if (finishedRows == maxNumRows && (reconPic->m_picHeight % g_maxCUSize))
{
/* the last row may be partial height */
height -= g_maxCUSize;
height += reconPic->m_picHeight % g_maxCUSize;
}
int cuHeight = g_maxCUSize;
for (int c = 0; c < numInterpPlanes; c++)
{
if (c == 1)
{
marginX = reconPic->m_chromaMarginX;
marginY = reconPic->m_chromaMarginY;
stride = reconPic->m_strideC;
width >>= reconPic->m_hChromaShift;
height >>= reconPic->m_vChromaShift;
cuHeight >>= reconPic->m_vChromaShift;
}
/* Do not generate weighted predictions if using original picture */
if (fpelPlane[c] == reconPic->m_picOrg[c])
continue;
const pixel* src = reconPic->m_picOrg[c] + numWeightedRows * cuHeight * stride;
pixel* dst = fpelPlane[c] + numWeightedRows * cuHeight * stride;
// Computing weighted CU rows
int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
int padwidth = (width + 15) & ~15; // weightp assembly needs even 16 byte widths
primitives.weight_pp(src, dst, stride, padwidth, height, w[c].weight, w[c].round << correction, w[c].shift + correction, w[c].offset);
// Extending Left & Right
primitives.extendRowBorder(dst, stride, width, height, marginX);
// Extending Above
if (numWeightedRows == 0)
{
pixel *pixY = fpelPlane[c] - marginX;
for (int y = 0; y < marginY; y++)
memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
}
// Extending Bottom
if (finishedRows == maxNumRows)
{
int picHeight = reconPic->m_picHeight;
if (c) picHeight >>= reconPic->m_vChromaShift;
pixel *pixY = fpelPlane[c] - marginX + (picHeight - 1) * stride;
for (int y = 0; y < marginY; y++)
memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
}
}
numWeightedRows = finishedRows;
}

View file

@ -0,0 +1,56 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_REFERENCE_H
#define X265_REFERENCE_H
#include "primitives.h"
#include "picyuv.h"
#include "lowres.h"
#include "mv.h"
namespace X265_NS {
// private x265 namespace
struct WeightParam;
class MotionReference : public ReferencePlanes
{
public:
MotionReference();
~MotionReference();
int init(PicYuv*, WeightParam* wp, const x265_param& p);
void applyWeight(int rows, int numRows);
pixel* weightBuffer[3];
int numInterpPlanes;
int numWeightedRows;
protected:
MotionReference& operator =(const MotionReference&);
};
}
#endif // ifndef X265_REFERENCE_H

1709
x265/source/encoder/sao.cpp Normal file

File diff suppressed because it is too large Load diff

154
x265/source/encoder/sao.h Normal file
View file

@ -0,0 +1,154 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
* Min Chen <chenm003@163.com>
* Praveen Kumar Tiwari <praveen@multicorewareinc.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_SAO_H
#define X265_SAO_H
#include "common.h"
#include "frame.h"
#include "entropy.h"
namespace X265_NS {
// private namespace
enum SAOTypeLen
{
SAO_EO_LEN = 4,
SAO_BO_LEN = 4,
SAO_NUM_BO_CLASSES = 32
};
enum SAOType
{
SAO_EO_0 = 0,
SAO_EO_1,
SAO_EO_2,
SAO_EO_3,
SAO_BO,
MAX_NUM_SAO_TYPE
};
class SAO
{
public:
enum { SAO_MAX_DEPTH = 4 };
enum { SAO_BO_BITS = 5 };
enum { MAX_NUM_SAO_CLASS = 33 };
enum { SAO_BIT_INC = 0 }; /* in HM12.0, it wrote as X265_MAX(X265_DEPTH - 10, 0) */
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
enum { NUM_EDGETYPE = 5 };
enum { NUM_PLANE = 3 };
enum { NUM_MERGE_MODE = 3 };
static const uint32_t s_eoTable[NUM_EDGETYPE];
typedef int32_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
typedef int32_t (PerPlane[NUM_PLANE][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
protected:
/* allocated per part */
PerClass* m_count;
PerClass* m_offset;
PerClass* m_offsetOrg;
/* allocated per CTU */
PerPlane* m_countPreDblk;
PerPlane* m_offsetOrgPreDblk;
double m_depthSaoRate[2][4];
int8_t m_offsetBo[SAO_NUM_BO_CLASSES];
int8_t m_offsetEo[NUM_EDGETYPE];
int m_numCuInWidth;
int m_numCuInHeight;
int m_numPlanes;
int m_hChromaShift;
int m_vChromaShift;
pixel* m_clipTable;
pixel* m_clipTableBase;
pixel* m_tmpU1[3];
pixel* m_tmpU2[3];
pixel* m_tmpL1;
pixel* m_tmpL2;
public:
struct SAOContexts
{
Entropy cur;
Entropy next;
Entropy temp;
};
Frame* m_frame;
Entropy m_entropyCoder;
SAOContexts m_rdContexts;
x265_param* m_param;
int m_refDepth;
int m_numNoSao[2];
double m_lumaLambda;
double m_chromaLambda;
/* TODO: No doubles for distortion */
SAO();
bool create(x265_param* param);
void destroy();
void allocSaoParam(SAOParam* saoParam) const;
void startSlice(Frame* pic, Entropy& initState, int qp);
void resetStats();
void resetSaoUnit(SaoCtuParam* saoUnit);
// CTU-based SAO process without slice granularity
void processSaoCu(int addr, int typeIdx, int plane);
void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc);
void calcSaoStatsCu(int addr, int plane);
void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
void saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[2], double* mergeDist);
void sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist);
inline int estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
};
}
#endif // ifndef X265_SAO_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,468 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_SEARCH_H
#define X265_SEARCH_H
#include "common.h"
#include "predict.h"
#include "quant.h"
#include "bitcost.h"
#include "framedata.h"
#include "yuv.h"
#include "threadpool.h"
#include "rdcost.h"
#include "entropy.h"
#include "motion.h"
#if DETAILED_CU_STATS
#define ProfileCUScopeNamed(name, cu, acc, count) \
m_stats[cu.m_encData->m_frameEncoderID].count++; \
ScopedElapsedTime name(m_stats[cu.m_encData->m_frameEncoderID].acc)
#define ProfileCUScope(cu, acc, count) ProfileCUScopeNamed(timedScope, cu, acc, count)
#define ProfileCounter(cu, count) m_stats[cu.m_encData->m_frameEncoderID].count++;
#else
#define ProfileCUScopeNamed(name, cu, acc, count)
#define ProfileCUScope(cu, acc, count)
#define ProfileCounter(cu, count)
#endif
namespace X265_NS {
// private namespace
class Entropy;
struct ThreadLocalData;
/* All the CABAC contexts that Analysis needs to keep track of at each depth
* and temp buffers for residual, coeff, and recon for use during residual
* quad-tree depth recursion */
struct RQTData
{
Entropy cur; /* starting context for current CU */
/* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
* the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
* which are reconstructed at each depth are valid. At the end, the transform depth table
* is walked and the coeff and recon at the final split depths are collected */
Entropy rqtRoot; /* residual quad-tree start context */
Entropy rqtTemp; /* residual quad-tree temp context */
Entropy rqtTest; /* residual quad-tree test context */
coeff_t* coeffRQT[3]; /* coeff storage for entire CTU for each RQT layer */
Yuv reconQtYuv; /* recon storage for entire CTU for each RQT layer (intra) */
ShortYuv resiQtYuv; /* residual storage for entire CTU for each RQT layer (inter) */
/* per-depth temp buffers for inter prediction */
ShortYuv tmpResiYuv;
Yuv tmpPredYuv;
Yuv bidirPredYuv[2];
};
struct MotionData
{
MV mv;
MV mvp;
int mvpIdx;
int ref;
uint32_t cost;
int bits;
};
struct Mode
{
CUData cu;
const Yuv* fencYuv;
Yuv predYuv;
Yuv reconYuv;
Entropy contexts;
enum { MAX_INTER_PARTS = 2 };
MotionData bestME[MAX_INTER_PARTS][2];
MV amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
// Neighbour MVs of the current partition. 5 spatial candidates and the
// temporal candidate.
InterNeighbourMV interNeighbours[6];
uint64_t rdCost; // sum of partition (psy) RD costs (sse(fenc, recon) + lambda2 * bits)
uint64_t sa8dCost; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
uint32_t sa8dBits; // signal bits used in sa8dCost calculation
uint32_t psyEnergy; // sum of partition psycho-visual energy difference
sse_ret_t resEnergy; // sum of partition residual energy after motion prediction
sse_ret_t lumaDistortion;
sse_ret_t chromaDistortion;
sse_ret_t distortion; // sum of partition SSE distortion
uint32_t totalBits; // sum of partition bits (mv + coeff)
uint32_t mvBits; // Mv bits + Ref + block type (or intra mode)
uint32_t coeffBits; // Texture bits (DCT Coeffs)
void initCosts()
{
rdCost = 0;
sa8dCost = 0;
sa8dBits = 0;
psyEnergy = 0;
resEnergy = 0;
lumaDistortion = 0;
chromaDistortion = 0;
distortion = 0;
totalBits = 0;
mvBits = 0;
coeffBits = 0;
}
void invalidate()
{
/* set costs to invalid data, catch uninitialized re-use */
rdCost = UINT64_MAX / 2;
sa8dCost = UINT64_MAX / 2;
sa8dBits = MAX_UINT / 2;
psyEnergy = MAX_UINT / 2;
#if X265_DEPTH <= 10
resEnergy = MAX_UINT / 2;
lumaDistortion = MAX_UINT / 2;
chromaDistortion = MAX_UINT / 2;
distortion = MAX_UINT / 2;
#else
resEnergy = UINT64_MAX / 2;
lumaDistortion = UINT64_MAX / 2;
chromaDistortion = UINT64_MAX / 2;
distortion = UINT64_MAX / 2;
#endif
totalBits = MAX_UINT / 2;
mvBits = MAX_UINT / 2;
coeffBits = MAX_UINT / 2;
}
bool ok() const
{
#if X265_DEPTH <= 10
return !(rdCost >= UINT64_MAX / 2 ||
sa8dCost >= UINT64_MAX / 2 ||
sa8dBits >= MAX_UINT / 2 ||
psyEnergy >= MAX_UINT / 2 ||
resEnergy >= MAX_UINT / 2 ||
lumaDistortion >= MAX_UINT / 2 ||
chromaDistortion >= MAX_UINT / 2 ||
distortion >= MAX_UINT / 2 ||
totalBits >= MAX_UINT / 2 ||
mvBits >= MAX_UINT / 2 ||
coeffBits >= MAX_UINT / 2);
#else
return !(rdCost >= UINT64_MAX / 2 ||
sa8dCost >= UINT64_MAX / 2 ||
sa8dBits >= MAX_UINT / 2 ||
psyEnergy >= MAX_UINT / 2 ||
resEnergy >= UINT64_MAX / 2 ||
lumaDistortion >= UINT64_MAX / 2 ||
chromaDistortion >= UINT64_MAX / 2 ||
distortion >= UINT64_MAX / 2 ||
totalBits >= MAX_UINT / 2 ||
mvBits >= MAX_UINT / 2 ||
coeffBits >= MAX_UINT / 2);
#endif
}
void addSubCosts(const Mode& subMode)
{
X265_CHECK(subMode.ok(), "sub-mode not initialized");
rdCost += subMode.rdCost;
sa8dCost += subMode.sa8dCost;
sa8dBits += subMode.sa8dBits;
psyEnergy += subMode.psyEnergy;
resEnergy += subMode.resEnergy;
lumaDistortion += subMode.lumaDistortion;
chromaDistortion += subMode.chromaDistortion;
distortion += subMode.distortion;
totalBits += subMode.totalBits;
mvBits += subMode.mvBits;
coeffBits += subMode.coeffBits;
}
};
#if DETAILED_CU_STATS
/* This structure is intended for performance debugging and we make no attempt
* to handle dynamic range overflows. Care should be taken to avoid long encodes
* if you care about the accuracy of these elapsed times and counters. This
* profiling is orthogonal to PPA/VTune and can be enabled independently from
* either of them */
struct CUStats
{
int64_t intraRDOElapsedTime[NUM_CU_DEPTH]; // elapsed worker time in intra RDO per CU depth
int64_t interRDOElapsedTime[NUM_CU_DEPTH]; // elapsed worker time in inter RDO per CU depth
int64_t intraAnalysisElapsedTime; // elapsed worker time in intra sa8d analysis
int64_t motionEstimationElapsedTime; // elapsed worker time in predInterSearch()
int64_t loopFilterElapsedTime; // elapsed worker time in deblock and SAO and PSNR/SSIM
int64_t pmeTime; // elapsed worker time processing ME slave jobs
int64_t pmeBlockTime; // elapsed worker time blocked for pme batch completion
int64_t pmodeTime; // elapsed worker time processing pmode slave jobs
int64_t pmodeBlockTime; // elapsed worker time blocked for pmode batch completion
int64_t weightAnalyzeTime; // elapsed worker time analyzing reference weights
int64_t totalCTUTime; // elapsed worker time in compressCTU (includes pmode master)
uint32_t skippedMotionReferences[NUM_CU_DEPTH];
uint32_t totalMotionReferences[NUM_CU_DEPTH];
uint32_t skippedIntraCU[NUM_CU_DEPTH];
uint32_t totalIntraCU[NUM_CU_DEPTH];
uint64_t countIntraRDO[NUM_CU_DEPTH];
uint64_t countInterRDO[NUM_CU_DEPTH];
uint64_t countIntraAnalysis;
uint64_t countMotionEstimate;
uint64_t countLoopFilter;
uint64_t countPMETasks;
uint64_t countPMEMasters;
uint64_t countPModeTasks;
uint64_t countPModeMasters;
uint64_t countWeightAnalyze;
uint64_t totalCTUs;
CUStats() { clear(); }
void clear()
{
memset(this, 0, sizeof(*this));
}
void accumulate(CUStats& other)
{
for (uint32_t i = 0; i <= g_maxCUDepth; i++)
{
intraRDOElapsedTime[i] += other.intraRDOElapsedTime[i];
interRDOElapsedTime[i] += other.interRDOElapsedTime[i];
countIntraRDO[i] += other.countIntraRDO[i];
countInterRDO[i] += other.countInterRDO[i];
skippedMotionReferences[i] += other.skippedMotionReferences[i];
totalMotionReferences[i] += other.totalMotionReferences[i];
skippedIntraCU[i] += other.skippedIntraCU[i];
totalIntraCU[i] += other.totalIntraCU[i];
}
intraAnalysisElapsedTime += other.intraAnalysisElapsedTime;
motionEstimationElapsedTime += other.motionEstimationElapsedTime;
loopFilterElapsedTime += other.loopFilterElapsedTime;
pmeTime += other.pmeTime;
pmeBlockTime += other.pmeBlockTime;
pmodeTime += other.pmodeTime;
pmodeBlockTime += other.pmodeBlockTime;
weightAnalyzeTime += other.weightAnalyzeTime;
totalCTUTime += other.totalCTUTime;
countIntraAnalysis += other.countIntraAnalysis;
countMotionEstimate += other.countMotionEstimate;
countLoopFilter += other.countLoopFilter;
countPMETasks += other.countPMETasks;
countPMEMasters += other.countPMEMasters;
countPModeTasks += other.countPModeTasks;
countPModeMasters += other.countPModeMasters;
countWeightAnalyze += other.countWeightAnalyze;
totalCTUs += other.totalCTUs;
other.clear();
}
};
#endif
inline int getTUBits(int idx, int numIdx)
{
return idx + (idx < numIdx - 1);
}
class Search : public Predict
{
public:
static const int16_t zeroShort[MAX_CU_SIZE];
MotionEstimate m_me;
Quant m_quant;
RDCost m_rdCost;
const x265_param* m_param;
Frame* m_frame;
const Slice* m_slice;
Entropy m_entropyCoder;
RQTData m_rqt[NUM_FULL_DEPTH];
uint8_t* m_qtTempCbf[3];
uint8_t* m_qtTempTransformSkipFlag[3];
pixel* m_fencScaled; /* 32x32 buffer for down-scaled version of 64x64 CU fenc */
pixel* m_fencTransposed; /* 32x32 buffer for transposed copy of fenc */
pixel* m_intraPred; /* 32x32 buffer for individual intra predictions */
pixel* m_intraPredAngs; /* allocation for 33 consecutive (all angular) 32x32 intra predictions */
coeff_t* m_tsCoeff; /* transform skip coeff 32x32 */
int16_t* m_tsResidual; /* transform skip residual 32x32 */
pixel* m_tsRecon; /* transform skip reconstructed pixels 32x32 */
bool m_bFrameParallel;
bool m_bEnableRDOQ;
uint32_t m_numLayers;
uint32_t m_refLagPixels;
#if DETAILED_CU_STATS
/* Accumulate CU statistics separately for each frame encoder */
CUStats m_stats[X265_MAX_FRAME_THREADS];
#endif
Search();
~Search();
bool initSearch(const x265_param& param, ScalingList& scalingList);
int setLambdaFromQP(const CUData& ctu, int qp); /* returns real quant QP in valid spec range */
// mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
void invalidateContexts(int fromDepth);
// full RD search of intra modes. if sharedModes is not NULL, it directly uses them
void checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint8_t* sharedChromaModes);
// select best intra mode using only sa8d costs, cannot measure NxN intra
void checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
// encode luma mode selected by checkIntraInInter, then pick and encode a chroma mode
void encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
// estimation inter prediction (non-skip)
void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
// encode residual and compute rd-cost for inter mode
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
void encodeResAndCalcRdSkipCU(Mode& interMode);
// encode residual without rd-cost
void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth);
// pick be chroma mode from available using just sa8d costs
void getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);
/* update CBF flags and QP values to be internally consistent */
void checkDQP(Mode& mode, const CUGeom& cuGeom);
void checkDQPForSplitPred(Mode& mode, const CUGeom& cuGeom);
MV getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref);
class PME : public BondedTaskGroup
{
public:
Search& master;
Mode& mode;
const CUGeom& cuGeom;
const PredictionUnit& pu;
int puIdx;
struct {
int ref[2][MAX_NUM_REF];
int refCnt[2];
} m_jobs;
PME(Search& s, Mode& m, const CUGeom& g, const PredictionUnit& u, int p) : master(s), mode(m), cuGeom(g), pu(u), puIdx(p) {}
void processTasks(int workerThreadId);
protected:
PME operator=(const PME&);
};
void processPME(PME& pme, Search& slave);
void singleMotionEstimation(Search& master, Mode& interMode, const PredictionUnit& pu, int part, int list, int ref);
protected:
/* motion estimation distribution */
ThreadLocalData* m_tld;
uint32_t m_listSelBits[3];
Lock m_meLock;
void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
// RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
// RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom, uint8_t* sharedChromaModes);
void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);
void codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype);
struct Cost
{
uint64_t rdcost;
uint32_t bits;
sse_ret_t distortion;
uint32_t energy;
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
};
uint64_t estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2]);
// generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& costs);
void extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx);
// generate chroma prediction, generate residual and recon
uint32_t codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, uint32_t& psyEnergy);
uint32_t codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, uint32_t& psyEnergy);
void extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth);
// reshuffle CBF flags after coding a pair of 4:2:2 chroma blocks
void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx);
/* output of mergeEstimation, best merge candidate */
struct MergeData
{
MVField mvField[2];
uint32_t dir;
uint32_t index;
uint32_t bits;
};
/* inter/ME helper functions */
int selectMVP(const CUData& cu, const PredictionUnit& pu, const MV amvp[AMVP_NUM_CANDS], int list, int ref);
const MV& checkBestMVP(const MV amvpCand[2], const MV& mv, int& mvpIdx, uint32_t& outBits, uint32_t& outCost) const;
void setSearchRange(const CUData& cu, const MV& mvp, int merange, MV& mvmin, MV& mvmax) const;
uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, const PredictionUnit& pu, int puIdx, MergeData& m);
static void getBlkBits(PartSize cuMode, bool bPSlice, int puIdx, uint32_t lastMode, uint32_t blockBit[3]);
/* intra helper functions */
enum { MAX_RD_INTRA_MODES = 16 };
static void updateCandList(uint32_t mode, uint64_t cost, int maxCandCount, uint32_t* candModeList, uint64_t* candCostList);
// get most probable luma modes for CU part, and bit cost of all non mpm modes
uint32_t getIntraRemModeBits(CUData & cu, uint32_t absPartIdx, uint32_t mpmModes[3], uint64_t& mpms) const;
void updateModeCost(Mode& m) const { m.rdCost = m_rdCost.m_psyRd ? m_rdCost.calcPsyRdCost(m.distortion, m.totalBits, m.psyEnergy) : m_rdCost.calcRdCost(m.distortion, m.totalBits); }
};
}
#endif // ifndef X265_SEARCH_H

View file

@ -0,0 +1,74 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "bitstream.h"
#include "slice.h"
#include "sei.h"
using namespace X265_NS;
/* x265's identifying GUID */
const uint8_t SEIuserDataUnregistered::m_uuid_iso_iec_11578[16] = {
0x2C, 0xA2, 0xDE, 0x09, 0xB5, 0x17, 0x47, 0xDB,
0xBB, 0x55, 0xA4, 0xFE, 0x7F, 0xC2, 0xFC, 0x4E
};
/* marshal a single SEI message sei, storing the marshalled representation
* in bitstream bs */
void SEI::write(Bitstream& bs, const SPS& sps)
{
BitCounter count;
m_bitIf = &count;
/* virtual writeSEI method, write to bit counter */
writeSEI(sps);
m_bitIf = &bs;
uint32_t type = payloadType();
for (; type >= 0xff; type -= 0xff)
WRITE_CODE(0xff, 8, "payload_type");
WRITE_CODE(type, 8, "payload_type");
X265_CHECK(0 == (count.getNumberOfWrittenBits() & 7), "payload unaligned\n");
uint32_t payloadSize = count.getNumberOfWrittenBits() >> 3;
for (; payloadSize >= 0xff; payloadSize -= 0xff)
WRITE_CODE(0xff, 8, "payload_size");
WRITE_CODE(payloadSize, 8, "payload_size");
/* virtual writeSEI method, write to bs */
writeSEI(sps);
}
void SEI::writeByteAlign()
{
// TODO: expose bs.writeByteAlignment() as virtual function
if (m_bitIf->getNumberOfWrittenBits() % 8 != 0)
{
WRITE_FLAG(1, "bit_equal_to_one");
while (m_bitIf->getNumberOfWrittenBits() % 8 != 0)
{
WRITE_FLAG(0, "bit_equal_to_zero");
}
}
}

344
x265/source/encoder/sei.h Normal file
View file

@ -0,0 +1,344 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_SEI_H
#define X265_SEI_H
#include "common.h"
#include "bitstream.h"
#include "slice.h"
namespace X265_NS {
// private namespace
class SEI : public SyntaxElementWriter
{
public:
/* SEI users call write() to marshal an SEI to a bitstream. SEI
* subclasses may implement write() or accept the default write()
* method which calls writeSEI() with a bitcounter to determine
* the size, then it encodes the header and calls writeSEI a
* second time for the real encode. */
virtual void write(Bitstream& bs, const SPS& sps);
virtual ~SEI() {}
protected:
enum PayloadType
{
BUFFERING_PERIOD = 0,
PICTURE_TIMING = 1,
PAN_SCAN_RECT = 2,
FILLER_PAYLOAD = 3,
USER_DATA_REGISTERED_ITU_T_T35 = 4,
USER_DATA_UNREGISTERED = 5,
RECOVERY_POINT = 6,
SCENE_INFO = 9,
FULL_FRAME_SNAPSHOT = 15,
PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
PROGRESSIVE_REFINEMENT_SEGMENT_END = 17,
FILM_GRAIN_CHARACTERISTICS = 19,
POST_FILTER_HINT = 22,
TONE_MAPPING_INFO = 23,
FRAME_PACKING = 45,
DISPLAY_ORIENTATION = 47,
SOP_DESCRIPTION = 128,
ACTIVE_PARAMETER_SETS = 129,
DECODING_UNIT_INFO = 130,
TEMPORAL_LEVEL0_INDEX = 131,
DECODED_PICTURE_HASH = 132,
SCALABLE_NESTING = 133,
REGION_REFRESH_INFO = 134,
MASTERING_DISPLAY_INFO = 137,
CONTENT_LIGHT_LEVEL_INFO = 144,
};
virtual PayloadType payloadType() const = 0;
virtual void writeSEI(const SPS&) { X265_CHECK(0, "empty writeSEI method called\n"); }
void writeByteAlign();
};
class SEIuserDataUnregistered : public SEI
{
public:
PayloadType payloadType() const { return USER_DATA_UNREGISTERED; }
SEIuserDataUnregistered() : m_userData(NULL) {}
static const uint8_t m_uuid_iso_iec_11578[16];
uint32_t m_userDataLength;
uint8_t *m_userData;
void write(Bitstream& bs, const SPS&)
{
m_bitIf = &bs;
WRITE_CODE(USER_DATA_UNREGISTERED, 8, "payload_type");
uint32_t payloadSize = 16 + m_userDataLength;
for (; payloadSize >= 0xff; payloadSize -= 0xff)
WRITE_CODE(0xff, 8, "payload_size");
WRITE_CODE(payloadSize, 8, "payload_size");
for (uint32_t i = 0; i < 16; i++)
WRITE_CODE(m_uuid_iso_iec_11578[i], 8, "sei.uuid_iso_iec_11578[i]");
for (uint32_t i = 0; i < m_userDataLength; i++)
WRITE_CODE(m_userData[i], 8, "user_data");
}
};
class SEIMasteringDisplayColorVolume : public SEI
{
public:
uint16_t displayPrimaryX[3];
uint16_t displayPrimaryY[3];
uint16_t whitePointX, whitePointY;
uint32_t maxDisplayMasteringLuminance;
uint32_t minDisplayMasteringLuminance;
PayloadType payloadType() const { return MASTERING_DISPLAY_INFO; }
bool parse(const char* value)
{
return sscanf(value, "G(%hu,%hu)B(%hu,%hu)R(%hu,%hu)WP(%hu,%hu)L(%u,%u)",
&displayPrimaryX[0], &displayPrimaryY[0],
&displayPrimaryX[1], &displayPrimaryY[1],
&displayPrimaryX[2], &displayPrimaryY[2],
&whitePointX, &whitePointY,
&maxDisplayMasteringLuminance, &minDisplayMasteringLuminance) == 10;
}
void write(Bitstream& bs, const SPS&)
{
m_bitIf = &bs;
WRITE_CODE(MASTERING_DISPLAY_INFO, 8, "payload_type");
WRITE_CODE(8 * 2 + 2 * 4, 8, "payload_size");
for (uint32_t i = 0; i < 3; i++)
{
WRITE_CODE(displayPrimaryX[i], 16, "display_primaries_x[ c ]");
WRITE_CODE(displayPrimaryY[i], 16, "display_primaries_y[ c ]");
}
WRITE_CODE(whitePointX, 16, "white_point_x");
WRITE_CODE(whitePointY, 16, "white_point_y");
WRITE_CODE(maxDisplayMasteringLuminance, 32, "max_display_mastering_luminance");
WRITE_CODE(minDisplayMasteringLuminance, 32, "min_display_mastering_luminance");
}
};
class SEIContentLightLevel : public SEI
{
public:
uint16_t max_content_light_level;
uint16_t max_pic_average_light_level;
PayloadType payloadType() const { return CONTENT_LIGHT_LEVEL_INFO; }
void write(Bitstream& bs, const SPS&)
{
m_bitIf = &bs;
WRITE_CODE(CONTENT_LIGHT_LEVEL_INFO, 8, "payload_type");
WRITE_CODE(4, 8, "payload_size");
WRITE_CODE(max_content_light_level, 16, "max_content_light_level");
WRITE_CODE(max_pic_average_light_level, 16, "max_pic_average_light_level");
}
};
class SEIDecodedPictureHash : public SEI
{
public:
PayloadType payloadType() const { return DECODED_PICTURE_HASH; }
enum Method
{
MD5,
CRC,
CHECKSUM,
} m_method;
uint8_t m_digest[3][16];
void write(Bitstream& bs, const SPS&)
{
m_bitIf = &bs;
WRITE_CODE(DECODED_PICTURE_HASH, 8, "payload_type");
switch (m_method)
{
case MD5:
WRITE_CODE(1 + 16 * 3, 8, "payload_size");
WRITE_CODE(MD5, 8, "hash_type");
break;
case CRC:
WRITE_CODE(1 + 2 * 3, 8, "payload_size");
WRITE_CODE(CRC, 8, "hash_type");
break;
case CHECKSUM:
WRITE_CODE(1 + 4 * 3, 8, "payload_size");
WRITE_CODE(CHECKSUM, 8, "hash_type");
break;
}
for (int yuvIdx = 0; yuvIdx < 3; yuvIdx++)
{
if (m_method == MD5)
{
for (uint32_t i = 0; i < 16; i++)
WRITE_CODE(m_digest[yuvIdx][i], 8, "picture_md5");
}
else if (m_method == CRC)
{
uint32_t val = (m_digest[yuvIdx][0] << 8) + m_digest[yuvIdx][1];
WRITE_CODE(val, 16, "picture_crc");
}
else if (m_method == CHECKSUM)
{
uint32_t val = (m_digest[yuvIdx][0] << 24) + (m_digest[yuvIdx][1] << 16) + (m_digest[yuvIdx][2] << 8) + m_digest[yuvIdx][3];
WRITE_CODE(val, 32, "picture_checksum");
}
}
}
};
class SEIActiveParameterSets : public SEI
{
public:
PayloadType payloadType() const { return ACTIVE_PARAMETER_SETS; }
bool m_selfContainedCvsFlag;
bool m_noParamSetUpdateFlag;
void writeSEI(const SPS&)
{
WRITE_CODE(0, 4, "active_vps_id");
WRITE_FLAG(m_selfContainedCvsFlag, "self_contained_cvs_flag");
WRITE_FLAG(m_noParamSetUpdateFlag, "no_param_set_update_flag");
WRITE_UVLC(0, "num_sps_ids_minus1");
WRITE_UVLC(0, "active_seq_param_set_id");
writeByteAlign();
}
};
class SEIBufferingPeriod : public SEI
{
public:
PayloadType payloadType() const { return BUFFERING_PERIOD; }
SEIBufferingPeriod()
: m_cpbDelayOffset(0)
, m_dpbDelayOffset(0)
, m_auCpbRemovalDelayDelta(1)
{
}
bool m_cpbDelayOffset;
bool m_dpbDelayOffset;
uint32_t m_initialCpbRemovalDelay;
uint32_t m_initialCpbRemovalDelayOffset;
uint32_t m_auCpbRemovalDelayDelta;
void writeSEI(const SPS& sps)
{
const HRDInfo& hrd = sps.vuiParameters.hrdParameters;
WRITE_UVLC(0, "bp_seq_parameter_set_id");
WRITE_FLAG(0, "rap_cpb_params_present_flag");
WRITE_FLAG(0, "concatenation_flag");
WRITE_CODE(m_auCpbRemovalDelayDelta - 1, hrd.cpbRemovalDelayLength, "au_cpb_removal_delay_delta_minus1");
WRITE_CODE(m_initialCpbRemovalDelay, hrd.initialCpbRemovalDelayLength, "initial_cpb_removal_delay");
WRITE_CODE(m_initialCpbRemovalDelayOffset, hrd.initialCpbRemovalDelayLength, "initial_cpb_removal_delay_offset");
writeByteAlign();
}
};
class SEIPictureTiming : public SEI
{
public:
PayloadType payloadType() const { return PICTURE_TIMING; }
uint32_t m_picStruct;
uint32_t m_sourceScanType;
bool m_duplicateFlag;
uint32_t m_auCpbRemovalDelay;
uint32_t m_picDpbOutputDelay;
void writeSEI(const SPS& sps)
{
const VUI *vui = &sps.vuiParameters;
const HRDInfo *hrd = &vui->hrdParameters;
if (vui->frameFieldInfoPresentFlag)
{
WRITE_CODE(m_picStruct, 4, "pic_struct");
WRITE_CODE(m_sourceScanType, 2, "source_scan_type");
WRITE_FLAG(m_duplicateFlag, "duplicate_flag");
}
if (vui->hrdParametersPresentFlag)
{
WRITE_CODE(m_auCpbRemovalDelay - 1, hrd->cpbRemovalDelayLength, "au_cpb_removal_delay_minus1");
WRITE_CODE(m_picDpbOutputDelay, hrd->dpbOutputDelayLength, "pic_dpb_output_delay");
/* Removed sub-pic signaling June 2014 */
}
writeByteAlign();
}
};
class SEIRecoveryPoint : public SEI
{
public:
PayloadType payloadType() const { return RECOVERY_POINT; }
int m_recoveryPocCnt;
bool m_exactMatchingFlag;
bool m_brokenLinkFlag;
void writeSEI(const SPS&)
{
WRITE_SVLC(m_recoveryPocCnt, "recovery_poc_cnt");
WRITE_FLAG(m_exactMatchingFlag, "exact_matching_flag");
WRITE_FLAG(m_brokenLinkFlag, "broken_link_flag");
writeByteAlign();
}
};
}
#endif // ifndef X265_SEI_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,243 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve@borho.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#ifndef X265_SLICETYPE_H
#define X265_SLICETYPE_H
#include "common.h"
#include "slice.h"
#include "motion.h"
#include "piclist.h"
#include "threadpool.h"
namespace X265_NS {
// private namespace
struct Lowres;
class Frame;
class Lookahead;
#define LOWRES_COST_MASK ((1 << 14) - 1)
#define LOWRES_COST_SHIFT 14
/* Thread local data for lookahead tasks */
struct LookaheadTLD
{
MotionEstimate me;
ReferencePlanes weightedRef;
pixel* wbuffer[4];
int widthInCU;
int heightInCU;
int ncu;
int paddedLines;
#if DETAILED_CU_STATS
int64_t batchElapsedTime;
int64_t coopSliceElapsedTime;
uint64_t countBatches;
uint64_t countCoopSlices;
#endif
LookaheadTLD()
{
me.setQP(X265_LOOKAHEAD_QP);
me.init(X265_HEX_SEARCH, 1, X265_CSP_I400);
for (int i = 0; i < 4; i++)
wbuffer[i] = NULL;
widthInCU = heightInCU = ncu = paddedLines = 0;
#if DETAILED_CU_STATS
batchElapsedTime = 0;
coopSliceElapsedTime = 0;
countBatches = 0;
countCoopSlices = 0;
#endif
}
void init(int w, int h, int n)
{
widthInCU = w;
heightInCU = h;
ncu = n;
}
~LookaheadTLD() { X265_FREE(wbuffer[0]); }
void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
void lowresIntraEstimate(Lowres& fenc);
void weightsAnalyse(Lowres& fenc, Lowres& ref);
protected:
uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp);
uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
bool allocWeightedRef(Lowres& fenc);
};
class Lookahead : public JobProvider
{
public:
PicList m_inputQueue; // input pictures in order received
PicList m_outputQueue; // pictures to be encoded, in encode order
Lock m_inputLock;
Lock m_outputLock;
/* pre-lookahead */
int m_fullQueueSize;
bool m_isActive;
bool m_sliceTypeBusy;
bool m_bAdaptiveQuant;
bool m_outputSignalRequired;
bool m_bBatchMotionSearch;
bool m_bBatchFrameCosts;
Event m_outputSignal;
LookaheadTLD* m_tld;
x265_param* m_param;
Lowres* m_lastNonB;
int* m_scratch; // temp buffer for cutree propagate
int m_histogram[X265_BFRAME_MAX + 1];
int m_lastKeyframe;
int m_8x8Width;
int m_8x8Height;
int m_8x8Blocks;
int m_numCoopSlices;
int m_numRowsPerSlice;
bool m_filled;
bool m_isSceneTransition;
Lookahead(x265_param *param, ThreadPool *pool);
#if DETAILED_CU_STATS
int64_t m_slicetypeDecideElapsedTime;
int64_t m_preLookaheadElapsedTime;
uint64_t m_countSlicetypeDecide;
uint64_t m_countPreLookahead;
void getWorkerStats(int64_t& batchElapsedTime, uint64_t& batchCount, int64_t& coopSliceElapsedTime, uint64_t& coopSliceCount);
#endif
bool create();
void destroy();
void stopJobs();
void addPicture(Frame&, int sliceType);
void flush();
Frame* getDecidedPicture();
void getEstimatedPictureCost(Frame *pic);
protected:
void findJob(int workerThreadID);
void slicetypeDecide();
void slicetypeAnalyse(Lowres **frames, bool bKeyframe);
/* called by slicetypeAnalyse() to make slice decisions */
bool scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames);
bool scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut);
void slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold);
int64_t vbvFrameCost(Lowres **frames, int p0, int p1, int b);
void vbvLookahead(Lowres **frames, int numFrames, int keyframes);
/* called by slicetypeAnalyse() to effect cuTree adjustments to adaptive
* quant offsets */
void cuTree(Lowres **frames, int numframes, bool bintra);
void estimateCUPropagate(Lowres **frames, double average_duration, int p0, int p1, int b, int referenced);
void cuTreeFinish(Lowres *frame, double averageDuration, int ref0Distance);
/* called by getEstimatedPictureCost() to finalize cuTree costs */
int64_t frameCostRecalculate(Lowres **frames, int p0, int p1, int b);
};
class PreLookaheadGroup : public BondedTaskGroup
{
public:
Frame* m_preframes[X265_LOOKAHEAD_MAX];
Lookahead& m_lookahead;
PreLookaheadGroup(Lookahead& l) : m_lookahead(l) {}
void processTasks(int workerThreadID);
protected:
PreLookaheadGroup& operator=(const PreLookaheadGroup&);
};
class CostEstimateGroup : public BondedTaskGroup
{
public:
Lookahead& m_lookahead;
Lowres** m_frames;
bool m_batchMode;
CostEstimateGroup(Lookahead& l, Lowres** f) : m_lookahead(l), m_frames(f), m_batchMode(false) {}
/* Cooperative cost estimate using multiple slices of downscaled frame */
struct Coop
{
int p0, b, p1;
bool bDoSearch[2];
} m_coop;
enum { MAX_COOP_SLICES = 32 };
struct Slice
{
int costEst;
int costEstAq;
int intraMbs;
} m_slice[MAX_COOP_SLICES];
int64_t singleCost(int p0, int p1, int b, bool intraPenalty = false);
/* Batch cost estimates, using one worker thread per estimateFrameCost() call */
enum { MAX_BATCH_SIZE = 512 };
struct Estimate
{
int p0, b, p1;
} m_estimates[MAX_BATCH_SIZE];
void add(int p0, int p1, int b);
void finishBatch();
protected:
static const int s_merange = 16;
void processTasks(int workerThreadID);
int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty);
void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice);
CostEstimateGroup& operator=(const CostEstimateGroup&);
};
}
#endif // ifndef X265_SLICETYPE_H

View file

@ -0,0 +1,536 @@
/*****************************************************************************
* Copyright (C) 2013 x265 project
*
* Author: Shazeb Nawaz Khan <shazeb@multicorewareinc.com>
* Steve Borho <steve@borho.org>
* Kavitha Sampas <kavitha@multicorewareinc.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
#include "common.h"
#include "frame.h"
#include "picyuv.h"
#include "lowres.h"
#include "slice.h"
#include "mv.h"
#include "bitstream.h"
using namespace X265_NS;
namespace {
struct Cache
{
const int * intraCost;
int numPredDir;
int csp;
int hshift;
int vshift;
int lowresWidthInCU;
int lowresHeightInCU;
};
int sliceHeaderCost(WeightParam *w, int lambda, int bChroma)
{
/* 4 times higher, because chroma is analyzed at full resolution. */
if (bChroma)
lambda *= 4;
int denomCost = bs_size_ue(w[0].log2WeightDenom) * (2 - bChroma);
return lambda * (10 + denomCost + 2 * (bs_size_se(w[0].inputWeight) + bs_size_se(w[0].inputOffset)));
}
/* make a motion compensated copy of lowres ref into mcout with the same stride.
* The borders of mcout are not extended */
void mcLuma(pixel* mcout, Lowres& ref, const MV * mvs)
{
intptr_t stride = ref.lumaStride;
const int mvshift = 1 << 2;
const int cuSize = 8;
MV mvmin, mvmax;
int cu = 0;
for (int y = 0; y < ref.lines; y += cuSize)
{
intptr_t pixoff = y * stride;
mvmin.y = (int16_t)((-y - 8) * mvshift);
mvmax.y = (int16_t)((ref.lines - y - 1 + 8) * mvshift);
for (int x = 0; x < ref.width; x += cuSize, pixoff += cuSize, cu++)
{
ALIGN_VAR_16(pixel, buf8x8[8 * 8]);
intptr_t bstride = 8;
mvmin.x = (int16_t)((-x - 8) * mvshift);
mvmax.x = (int16_t)((ref.width - x - 1 + 8) * mvshift);
/* clip MV to available pixels */
MV mv = mvs[cu];
mv = mv.clipped(mvmin, mvmax);
pixel *tmp = ref.lowresMC(pixoff, mv, buf8x8, bstride);
primitives.cu[BLOCK_8x8].copy_pp(mcout + pixoff, stride, tmp, bstride);
}
}
}
/* use lowres MVs from lookahead to generate a motion compensated chroma plane.
* if a block had cheaper lowres cost as intra, we treat it as MV 0 */
void mcChroma(pixel * mcout,
pixel * src,
intptr_t stride,
const MV * mvs,
const Cache& cache,
int height,
int width)
{
/* the motion vectors correspond to 8x8 lowres luma blocks, or 16x16 fullres
* luma blocks. We have to adapt block size to chroma csp */
int csp = cache.csp;
int bw = 16 >> cache.hshift;
int bh = 16 >> cache.vshift;
const int mvshift = 1 << 2;
MV mvmin, mvmax;
for (int y = 0; y < height; y += bh)
{
/* note: lowres block count per row might be different from chroma block
* count per row because of rounding issues, so be very careful with indexing
* into the lowres structures */
int cu = y * cache.lowresWidthInCU;
intptr_t pixoff = y * stride;
mvmin.y = (int16_t)((-y - 8) * mvshift);
mvmax.y = (int16_t)((height - y - 1 + 8) * mvshift);
for (int x = 0; x < width; x += bw, cu++, pixoff += bw)
{
if (x < cache.lowresWidthInCU && y < cache.lowresHeightInCU)
{
MV mv = mvs[cu]; // lowres MV
mv <<= 1; // fullres MV
mv.x >>= cache.hshift;
mv.y >>= cache.vshift;
/* clip MV to available pixels */
mvmin.x = (int16_t)((-x - 8) * mvshift);
mvmax.x = (int16_t)((width - x - 1 + 8) * mvshift);
mv = mv.clipped(mvmin, mvmax);
intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
pixel *temp = src + pixoff + fpeloffset;
int xFrac = mv.x & 0x7;
int yFrac = mv.y & 0x7;
if ((yFrac | xFrac) == 0)
{
primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, temp, stride);
}
else if (yFrac == 0)
{
primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp, stride, mcout + pixoff, stride, xFrac);
}
else if (xFrac == 0)
{
primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp, stride, mcout + pixoff, stride, yFrac);
}
else
{
ALIGN_VAR_16(int16_t, imm[16 * (16 + NTAPS_CHROMA)]);
primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, imm, bw, xFrac, 1);
primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(imm + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
}
}
else
{
primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, src + pixoff, stride);
}
}
}
}
/* Measure sum of 8x8 satd costs between source frame and reference
* frame (potentially weighted, potentially motion compensated). We
* always use source images for this analysis since reference recon
* pixels have unreliable availability */
uint32_t weightCost(pixel * fenc,
pixel * ref,
pixel * weightTemp,
intptr_t stride,
const Cache & cache,
int width,
int height,
WeightParam * w,
bool bLuma)
{
if (w)
{
/* make a weighted copy of the reference plane */
int offset = w->inputOffset << (X265_DEPTH - 8);
int weight = w->inputWeight;
int denom = w->log2WeightDenom;
int round = denom ? 1 << (denom - 1) : 0;
int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
int pwidth = ((width + 15) >> 4) << 4;
primitives.weight_pp(ref, weightTemp, stride, pwidth, height,
weight, round << correction, denom + correction, offset);
ref = weightTemp;
}
uint32_t cost = 0;
pixel *f = fenc, *r = ref;
if (bLuma)
{
int cu = 0;
for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride)
{
for (int x = 0; x < width; x += 8, cu++)
{
int cmp = primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride);
cost += X265_MIN(cmp, cache.intraCost[cu]);
}
}
}
else if (cache.csp == X265_CSP_I444)
for (int y = 0; y < height; y += 16, r += 16 * stride, f += 16 * stride)
for (int x = 0; x < width; x += 16)
cost += primitives.pu[LUMA_16x16].satd(r + x, stride, f + x, stride);
else
for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride)
for (int x = 0; x < width; x += 8)
cost += primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride);
return cost;
}
}
namespace X265_NS {
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param)
{
WeightParam wp[2][MAX_NUM_REF][3];
PicYuv *fencPic = frame.m_fencPic;
Lowres& fenc = frame.m_lowres;
Cache cache;
memset(&cache, 0, sizeof(cache));
cache.intraCost = fenc.intraCost;
cache.numPredDir = slice.isInterP() ? 1 : 2;
cache.lowresWidthInCU = fenc.width >> 3;
cache.lowresHeightInCU = fenc.lines >> 3;
cache.csp = fencPic->m_picCsp;
cache.hshift = CHROMA_H_SHIFT(cache.csp);
cache.vshift = CHROMA_V_SHIFT(cache.csp);
/* Use single allocation for motion compensated ref and weight buffers */
pixel *mcbuf = X265_MALLOC(pixel, 2 * fencPic->m_stride * fencPic->m_picHeight);
if (!mcbuf)
{
slice.disableWeights();
return;
}
pixel *weightTemp = mcbuf + fencPic->m_stride * fencPic->m_picHeight;
int lambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
int curPoc = slice.m_poc;
const float epsilon = 1.f / 128.f;
int chromaDenom, lumaDenom, denom;
chromaDenom = lumaDenom = 7;
int numpixels[3];
int w16 = ((fencPic->m_picWidth + 15) >> 4) << 4;
int h16 = ((fencPic->m_picHeight + 15) >> 4) << 4;
numpixels[0] = w16 * h16;
numpixels[1] = numpixels[2] = numpixels[0] >> (cache.hshift + cache.vshift);
for (int list = 0; list < cache.numPredDir; list++)
{
WeightParam *weights = wp[list][0];
Frame *refFrame = slice.m_refFrameList[list][0];
Lowres& refLowres = refFrame->m_lowres;
int diffPoc = abs(curPoc - refFrame->m_poc);
/* prepare estimates */
float guessScale[3], fencMean[3], refMean[3];
for (int plane = 0; plane < 3; plane++)
{
SET_WEIGHT(weights[plane], false, 1, 0, 0);
uint64_t fencVar = fenc.wp_ssd[plane] + !refLowres.wp_ssd[plane];
uint64_t refVar = refLowres.wp_ssd[plane] + !refLowres.wp_ssd[plane];
guessScale[plane] = sqrt((float)fencVar / refVar);
fencMean[plane] = (float)fenc.wp_sum[plane] / (numpixels[plane]) / (1 << (X265_DEPTH - 8));
refMean[plane] = (float)refLowres.wp_sum[plane] / (numpixels[plane]) / (1 << (X265_DEPTH - 8));
}
/* make sure both our scale factors fit */
while (!list && chromaDenom > 0)
{
float thresh = 127.f / (1 << chromaDenom);
if (guessScale[1] < thresh && guessScale[2] < thresh)
break;
chromaDenom--;
}
SET_WEIGHT(weights[1], false, 1 << chromaDenom, chromaDenom, 0);
SET_WEIGHT(weights[2], false, 1 << chromaDenom, chromaDenom, 0);
MV *mvs = NULL;
for (int plane = 0; plane < 3; plane++)
{
denom = plane ? chromaDenom : lumaDenom;
if (plane && !weights[0].bPresentFlag)
break;
/* Early termination */
x265_emms();
if (fabsf(refMean[plane] - fencMean[plane]) < 0.5f && fabsf(1.f - guessScale[plane]) < epsilon)
{
SET_WEIGHT(weights[plane], 0, 1 << denom, denom, 0);
continue;
}
if (plane)
{
int scale = x265_clip3(0, 255, (int)(guessScale[plane] * (1 << denom) + 0.5f));
if (scale > 127)
continue;
weights[plane].inputWeight = scale;
}
else
{
weights[plane].setFromWeightAndOffset((int)(guessScale[plane] * (1 << denom) + 0.5f), 0, denom, !list);
}
int mindenom = weights[plane].log2WeightDenom;
int minscale = weights[plane].inputWeight;
int minoff = 0;
if (!plane && diffPoc <= param.bframes + 1)
{
mvs = fenc.lowresMvs[list][diffPoc - 1];
/* test whether this motion search was performed by lookahead */
if (mvs[0].x != 0x7FFF)
{
/* reference chroma planes must be extended prior to being
* used as motion compensation sources */
if (!refFrame->m_bChromaExtended)
{
refFrame->m_bChromaExtended = true;
PicYuv *refPic = refFrame->m_fencPic;
int width = refPic->m_picWidth >> cache.hshift;
int height = refPic->m_picHeight >> cache.vshift;
extendPicBorder(refPic->m_picOrg[1], refPic->m_strideC, width, height, refPic->m_chromaMarginX, refPic->m_chromaMarginY);
extendPicBorder(refPic->m_picOrg[2], refPic->m_strideC, width, height, refPic->m_chromaMarginX, refPic->m_chromaMarginY);
}
}
else
mvs = 0;
}
/* prepare inputs to weight analysis */
pixel *orig;
pixel *fref;
intptr_t stride;
int width, height;
switch (plane)
{
case 0:
orig = fenc.lowresPlane[0];
stride = fenc.lumaStride;
width = fenc.width;
height = fenc.lines;
fref = refLowres.lowresPlane[0];
if (mvs)
{
mcLuma(mcbuf, refLowres, mvs);
fref = mcbuf;
}
break;
case 1:
orig = fencPic->m_picOrg[1];
stride = fencPic->m_strideC;
fref = refFrame->m_fencPic->m_picOrg[1];
/* Clamp the chroma dimensions to the nearest multiple of
* 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
* blocks and weightCost measures 8x8 blocks. This
* potentially ignores some edge pixels, but simplifies the
* logic and prevents reading uninitialized pixels. Lowres
* planes are border extended and require no clamping. */
width = ((fencPic->m_picWidth >> 4) << 4) >> cache.hshift;
height = ((fencPic->m_picHeight >> 4) << 4) >> cache.vshift;
if (mvs)
{
mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
fref = mcbuf;
}
break;
case 2:
orig = fencPic->m_picOrg[2];
stride = fencPic->m_strideC;
fref = refFrame->m_fencPic->m_picOrg[2];
width = ((fencPic->m_picWidth >> 4) << 4) >> cache.hshift;
height = ((fencPic->m_picHeight >> 4) << 4) >> cache.vshift;
if (mvs)
{
mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
fref = mcbuf;
}
break;
default:
slice.disableWeights();
X265_FREE(mcbuf);
return;
}
uint32_t origscore = weightCost(orig, fref, weightTemp, stride, cache, width, height, NULL, !plane);
if (!origscore)
{
SET_WEIGHT(weights[plane], 0, 1 << denom, denom, 0);
continue;
}
uint32_t minscore = origscore;
bool bFound = false;
/* x264 uses a table lookup here, selecting search range based on preset */
static const int scaleDist = 4;
static const int offsetDist = 2;
int startScale = x265_clip3(0, 127, minscale - scaleDist);
int endScale = x265_clip3(0, 127, minscale + scaleDist);
for (int scale = startScale; scale <= endScale; scale++)
{
int deltaWeight = scale - (1 << mindenom);
if (deltaWeight > 127 || deltaWeight <= -128)
continue;
x265_emms();
int curScale = scale;
int curOffset = (int)(fencMean[plane] - refMean[plane] * curScale / (1 << mindenom) + 0.5f);
if (curOffset < -128 || curOffset > 127)
{
/* Rescale considering the constraints on curOffset. We do it in this order
* because scale has a much wider range than offset (because of denom), so
* it should almost never need to be clamped. */
curOffset = x265_clip3(-128, 127, curOffset);
curScale = (int)((1 << mindenom) * (fencMean[plane] - curOffset) / refMean[plane] + 0.5f);
curScale = x265_clip3(0, 127, curScale);
}
int startOffset = x265_clip3(-128, 127, curOffset - offsetDist);
int endOffset = x265_clip3(-128, 127, curOffset + offsetDist);
for (int off = startOffset; off <= endOffset; off++)
{
WeightParam wsp;
SET_WEIGHT(wsp, true, curScale, mindenom, off);
uint32_t s = weightCost(orig, fref, weightTemp, stride, cache, width, height, &wsp, !plane) +
sliceHeaderCost(&wsp, lambda, !!plane);
COPY4_IF_LT(minscore, s, minscale, curScale, minoff, off, bFound, true);
/* Don't check any more offsets if the previous one had a lower cost than the current one */
if (minoff == startOffset && off != startOffset)
break;
}
}
/* Use a smaller luma denominator if possible */
if (!(plane || list))
{
while (mindenom > 0 && !(minscale & 1))
{
mindenom--;
minscale >>= 1;
}
}
if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
{
SET_WEIGHT(weights[plane], false, 1 << denom, denom, 0);
}
else
{
SET_WEIGHT(weights[plane], true, minscale, mindenom, minoff);
}
}
if (weights[0].bPresentFlag)
{
// Make sure both chroma channels match
if (weights[1].bPresentFlag != weights[2].bPresentFlag)
{
if (weights[1].bPresentFlag)
weights[2] = weights[1];
else
weights[1] = weights[2];
}
}
lumaDenom = weights[0].log2WeightDenom;
chromaDenom = weights[1].log2WeightDenom;
/* reset weight states */
for (int ref = 1; ref < slice.m_numRefIdx[list]; ref++)
{
SET_WEIGHT(wp[list][ref][0], false, 1 << lumaDenom, lumaDenom, 0);
SET_WEIGHT(wp[list][ref][1], false, 1 << chromaDenom, chromaDenom, 0);
SET_WEIGHT(wp[list][ref][2], false, 1 << chromaDenom, chromaDenom, 0);
}
}
X265_FREE(mcbuf);
memcpy(slice.m_weightPredTable, wp, sizeof(WeightParam) * 2 * MAX_NUM_REF * 3);
if (param.logLevel >= X265_LOG_FULL)
{
char buf[1024];
int p = 0;
bool bWeighted = false;
p = sprintf(buf, "poc: %d weights:", slice.m_poc);
int numPredDir = slice.isInterP() ? 1 : 2;
for (int list = 0; list < numPredDir; list++)
{
WeightParam* w = &wp[list][0][0];
if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
{
bWeighted = true;
p += sprintf(buf + p, " [L%d:R0 ", list);
if (w[0].bPresentFlag)
p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
if (w[1].bPresentFlag)
p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
if (w[2].bPresentFlag)
p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
p += sprintf(buf + p, "]");
}
}
if (bWeighted)
{
if (p < 80) // pad with spaces to ensure progress line overwritten
sprintf(buf + p, "%*s", 80 - p, " ");
x265_log(&param, X265_LOG_FULL, "%s\n", buf);
}
}
}
}