forked from mirror/libbpg
libbpg-0.9.6
This commit is contained in:
parent
3035b41edf
commit
35a8402710
248 changed files with 232891 additions and 100 deletions
48
x265/source/encoder/CMakeLists.txt
Normal file
48
x265/source/encoder/CMakeLists.txt
Normal file
|
@ -0,0 +1,48 @@
|
|||
# vim: syntax=cmake
|
||||
|
||||
if(GCC)
|
||||
add_definitions(-Wno-uninitialized)
|
||||
if(CC_HAS_NO_STRICT_OVERFLOW)
|
||||
# GCC 4.9.2 gives warnings we know we can ignore in this file
|
||||
set_source_files_properties(slicetype.cpp PROPERTIES COMPILE_FLAGS -Wno-strict-overflow)
|
||||
endif(CC_HAS_NO_STRICT_OVERFLOW)
|
||||
endif()
|
||||
if(MSVC)
|
||||
add_definitions(/wd4701) # potentially uninitialized local variable 'foo' used
|
||||
endif()
|
||||
|
||||
if(LINKED_8BIT)
|
||||
list(APPEND APIFLAGS "-DLINKED_8BIT=1")
|
||||
endif(LINKED_8BIT)
|
||||
if(LINKED_10BIT)
|
||||
list(APPEND APIFLAGS "-DLINKED_10BIT=1")
|
||||
endif(LINKED_10BIT)
|
||||
if(LINKED_12BIT)
|
||||
list(APPEND APIFLAGS "-DLINKED_12BIT=1")
|
||||
endif(LINKED_12BIT)
|
||||
if(ENABLE_SHARED)
|
||||
list(APPEND APIFLAGS "-DENABLE_SHARED=1")
|
||||
endif(ENABLE_SHARED)
|
||||
|
||||
string(REPLACE ";" " " APIFLAGSTR "${APIFLAGS}")
|
||||
set_source_files_properties(api.cpp PROPERTIES COMPILE_FLAGS "${APIFLAGSTR}")
|
||||
|
||||
add_library(encoder OBJECT ../x265.h
|
||||
analysis.cpp analysis.h
|
||||
search.cpp search.h
|
||||
bitcost.cpp bitcost.h rdcost.h
|
||||
motion.cpp motion.h
|
||||
slicetype.cpp slicetype.h
|
||||
frameencoder.cpp frameencoder.h
|
||||
framefilter.cpp framefilter.h
|
||||
level.cpp level.h
|
||||
nal.cpp nal.h
|
||||
sei.cpp sei.h
|
||||
sao.cpp sao.h
|
||||
entropy.cpp entropy.h
|
||||
dpb.cpp dpb.h
|
||||
ratecontrol.cpp ratecontrol.h
|
||||
reference.cpp reference.h
|
||||
encoder.cpp encoder.h
|
||||
api.cpp
|
||||
weightPrediction.cpp)
|
2168
x265/source/encoder/analysis.cpp
Normal file
2168
x265/source/encoder/analysis.cpp
Normal file
File diff suppressed because it is too large
Load diff
171
x265/source/encoder/analysis.h
Normal file
171
x265/source/encoder/analysis.h
Normal file
|
@ -0,0 +1,171 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
|
||||
* Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_ANALYSIS_H
|
||||
#define X265_ANALYSIS_H
|
||||
|
||||
#include "common.h"
|
||||
#include "predict.h"
|
||||
#include "quant.h"
|
||||
#include "yuv.h"
|
||||
#include "shortyuv.h"
|
||||
#include "cudata.h"
|
||||
|
||||
#include "entropy.h"
|
||||
#include "search.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
class Entropy;
|
||||
|
||||
class Analysis : public Search
|
||||
{
|
||||
public:
|
||||
|
||||
enum {
|
||||
PRED_MERGE,
|
||||
PRED_SKIP,
|
||||
PRED_INTRA,
|
||||
PRED_2Nx2N,
|
||||
PRED_BIDIR,
|
||||
PRED_Nx2N,
|
||||
PRED_2NxN,
|
||||
PRED_SPLIT,
|
||||
PRED_2NxnU,
|
||||
PRED_2NxnD,
|
||||
PRED_nLx2N,
|
||||
PRED_nRx2N,
|
||||
PRED_INTRA_NxN, /* 4x4 intra PU blocks for 8x8 CU */
|
||||
PRED_LOSSLESS, /* lossless encode of best mode */
|
||||
MAX_PRED_TYPES
|
||||
};
|
||||
|
||||
struct ModeDepth
|
||||
{
|
||||
Mode pred[MAX_PRED_TYPES];
|
||||
Mode* bestMode;
|
||||
Yuv fencYuv;
|
||||
CUDataMemPool cuMemPool;
|
||||
};
|
||||
|
||||
class PMODE : public BondedTaskGroup
|
||||
{
|
||||
public:
|
||||
|
||||
Analysis& master;
|
||||
const CUGeom& cuGeom;
|
||||
int modes[MAX_PRED_TYPES];
|
||||
|
||||
PMODE(Analysis& m, const CUGeom& g) : master(m), cuGeom(g) {}
|
||||
|
||||
void processTasks(int workerThreadId);
|
||||
|
||||
protected:
|
||||
|
||||
PMODE operator=(const PMODE&);
|
||||
};
|
||||
|
||||
void processPmode(PMODE& pmode, Analysis& slave);
|
||||
|
||||
ModeDepth m_modeDepth[NUM_CU_DEPTH];
|
||||
bool m_bTryLossless;
|
||||
bool m_bChromaSa8d;
|
||||
|
||||
Analysis();
|
||||
|
||||
bool create(ThreadLocalData* tld);
|
||||
void destroy();
|
||||
|
||||
Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
|
||||
|
||||
protected:
|
||||
|
||||
/* Analysis data for load/save modes, keeps getting incremented as CTU analysis proceeds and data is consumed or read */
|
||||
analysis_intra_data* m_reuseIntraDataCTU;
|
||||
analysis_inter_data* m_reuseInterDataCTU;
|
||||
int32_t* m_reuseRef;
|
||||
uint32_t* m_reuseBestMergeCand;
|
||||
|
||||
uint32_t m_splitRefIdx[4];
|
||||
|
||||
/* full analysis for an I-slice CU */
|
||||
void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
|
||||
|
||||
/* full analysis for a P or B slice CU */
|
||||
uint32_t compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
|
||||
uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
|
||||
uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
|
||||
|
||||
/* measure merge and skip */
|
||||
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
|
||||
void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isShareMergeCand);
|
||||
|
||||
/* measure inter options */
|
||||
void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
|
||||
void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
|
||||
|
||||
void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
|
||||
|
||||
/* encode current bestMode losslessly, pick best RD cost */
|
||||
void tryLossless(const CUGeom& cuGeom);
|
||||
|
||||
/* add the RD cost of coding a split flag (0 or 1) to the given mode */
|
||||
void addSplitFlagCost(Mode& mode, uint32_t depth);
|
||||
|
||||
/* work-avoidance heuristics for RD levels < 5 */
|
||||
uint32_t topSkipMinDepth(const CUData& parentCTU, const CUGeom& cuGeom);
|
||||
bool recursionDepthCheck(const CUData& parentCTU, const CUGeom& cuGeom, const Mode& bestMode);
|
||||
|
||||
/* generate residual and recon pixels for an entire CTU recursively (RD0) */
|
||||
void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
|
||||
|
||||
int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom);
|
||||
|
||||
/* check whether current mode is the new best */
|
||||
inline void checkBestMode(Mode& mode, uint32_t depth)
|
||||
{
|
||||
X265_CHECK(mode.ok(), "mode costs are uninitialized\n");
|
||||
|
||||
ModeDepth& md = m_modeDepth[depth];
|
||||
if (md.bestMode)
|
||||
{
|
||||
if (mode.rdCost < md.bestMode->rdCost)
|
||||
md.bestMode = &mode;
|
||||
}
|
||||
else
|
||||
md.bestMode = &mode;
|
||||
}
|
||||
};
|
||||
|
||||
struct ThreadLocalData
|
||||
{
|
||||
Analysis analysis;
|
||||
|
||||
void destroy() { analysis.destroy(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ifndef X265_ANALYSIS_H
|
523
x265/source/encoder/api.cpp
Normal file
523
x265/source/encoder/api.cpp
Normal file
|
@ -0,0 +1,523 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "bitstream.h"
|
||||
#include "param.h"
|
||||
|
||||
#include "encoder.h"
|
||||
#include "entropy.h"
|
||||
#include "level.h"
|
||||
#include "nal.h"
|
||||
#include "bitcost.h"
|
||||
|
||||
/* multilib namespace reflectors */
|
||||
#if LINKED_8BIT
|
||||
namespace x265_8bit {
|
||||
const x265_api* x265_api_get(int bitDepth);
|
||||
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LINKED_10BIT
|
||||
namespace x265_10bit {
|
||||
const x265_api* x265_api_get(int bitDepth);
|
||||
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LINKED_12BIT
|
||||
namespace x265_12bit {
|
||||
const x265_api* x265_api_get(int bitDepth);
|
||||
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EXPORT_C_API
|
||||
/* these functions are exported as C functions (default) */
|
||||
using namespace X265_NS;
|
||||
extern "C" {
|
||||
#else
|
||||
/* these functions exist within private namespace (multilib) */
|
||||
namespace X265_NS {
|
||||
#endif
|
||||
|
||||
x265_encoder *x265_encoder_open(x265_param *p)
|
||||
{
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
#if _MSC_VER
|
||||
#pragma warning(disable: 4127) // conditional expression is constant, yes I know
|
||||
#endif
|
||||
|
||||
#if HIGH_BIT_DEPTH
|
||||
if (X265_DEPTH != 10 && X265_DEPTH != 12)
|
||||
#else
|
||||
if (X265_DEPTH != 8)
|
||||
#endif
|
||||
{
|
||||
x265_log(p, X265_LOG_ERROR, "Build error, internal bit depth mismatch\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Encoder* encoder = NULL;
|
||||
x265_param* param = PARAM_NS::x265_param_alloc();
|
||||
x265_param* latestParam = PARAM_NS::x265_param_alloc();
|
||||
if (!param || !latestParam)
|
||||
goto fail;
|
||||
|
||||
memcpy(param, p, sizeof(x265_param));
|
||||
x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", PFX(version_str));
|
||||
x265_log(param, X265_LOG_INFO, "build info %s\n", PFX(build_info_str));
|
||||
|
||||
x265_setup_primitives(param);
|
||||
|
||||
if (x265_check_params(param))
|
||||
goto fail;
|
||||
|
||||
if (x265_set_globals(param))
|
||||
goto fail;
|
||||
|
||||
encoder = new Encoder;
|
||||
if (!param->rc.bEnableSlowFirstPass)
|
||||
PARAM_NS::x265_param_apply_fastfirstpass(param);
|
||||
|
||||
// may change params for auto-detect, etc
|
||||
encoder->configure(param);
|
||||
// may change rate control and CPB params
|
||||
if (!enforceLevel(*param, encoder->m_vps))
|
||||
goto fail;
|
||||
|
||||
// will detect and set profile/tier/level in VPS
|
||||
determineLevel(*param, encoder->m_vps);
|
||||
|
||||
if (!param->bAllowNonConformance && encoder->m_vps.ptl.profileIdc == Profile::NONE)
|
||||
{
|
||||
x265_log(param, X265_LOG_INFO, "non-conformant bitstreams not allowed (--allow-non-conformance)\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
encoder->create();
|
||||
encoder->m_latestParam = latestParam;
|
||||
memcpy(latestParam, param, sizeof(x265_param));
|
||||
if (encoder->m_aborted)
|
||||
goto fail;
|
||||
|
||||
x265_print_params(param);
|
||||
return encoder;
|
||||
|
||||
fail:
|
||||
delete encoder;
|
||||
PARAM_NS::x265_param_free(param);
|
||||
PARAM_NS::x265_param_free(latestParam);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int x265_encoder_headers(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal)
|
||||
{
|
||||
if (pp_nal && enc)
|
||||
{
|
||||
Encoder *encoder = static_cast<Encoder*>(enc);
|
||||
Entropy sbacCoder;
|
||||
Bitstream bs;
|
||||
encoder->getStreamHeaders(encoder->m_nalList, sbacCoder, bs);
|
||||
*pp_nal = &encoder->m_nalList.m_nal[0];
|
||||
if (pi_nal) *pi_nal = encoder->m_nalList.m_numNal;
|
||||
return encoder->m_nalList.m_occupancy;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void x265_encoder_parameters(x265_encoder *enc, x265_param *out)
|
||||
{
|
||||
if (enc && out)
|
||||
{
|
||||
Encoder *encoder = static_cast<Encoder*>(enc);
|
||||
memcpy(out, encoder->m_param, sizeof(x265_param));
|
||||
}
|
||||
}
|
||||
|
||||
int x265_encoder_reconfig(x265_encoder* enc, x265_param* param_in)
|
||||
{
|
||||
if (!enc || !param_in)
|
||||
return -1;
|
||||
|
||||
x265_param save;
|
||||
Encoder* encoder = static_cast<Encoder*>(enc);
|
||||
memcpy(&save, encoder->m_latestParam, sizeof(x265_param));
|
||||
int ret = encoder->reconfigureParam(encoder->m_latestParam, param_in);
|
||||
if (ret)
|
||||
/* reconfigure failed, recover saved param set */
|
||||
memcpy(encoder->m_latestParam, &save, sizeof(x265_param));
|
||||
else
|
||||
{
|
||||
encoder->m_reconfigured = true;
|
||||
x265_print_reconfigured_params(&save, encoder->m_latestParam);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
|
||||
{
|
||||
if (!enc)
|
||||
return -1;
|
||||
|
||||
Encoder *encoder = static_cast<Encoder*>(enc);
|
||||
int numEncoded;
|
||||
|
||||
// While flushing, we cannot return 0 until the entire stream is flushed
|
||||
do
|
||||
{
|
||||
numEncoded = encoder->encode(pic_in, pic_out);
|
||||
}
|
||||
while (numEncoded == 0 && !pic_in && encoder->m_numDelayedPic);
|
||||
|
||||
// do not allow reuse of these buffers for more than one picture. The
|
||||
// encoder now owns these analysisData buffers.
|
||||
if (pic_in)
|
||||
{
|
||||
pic_in->analysisData.intraData = NULL;
|
||||
pic_in->analysisData.interData = NULL;
|
||||
}
|
||||
|
||||
if (pp_nal && numEncoded > 0)
|
||||
{
|
||||
*pp_nal = &encoder->m_nalList.m_nal[0];
|
||||
if (pi_nal) *pi_nal = encoder->m_nalList.m_numNal;
|
||||
}
|
||||
else if (pi_nal)
|
||||
*pi_nal = 0;
|
||||
|
||||
return numEncoded;
|
||||
}
|
||||
|
||||
void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats, uint32_t statsSizeBytes)
|
||||
{
|
||||
if (enc && outputStats)
|
||||
{
|
||||
Encoder *encoder = static_cast<Encoder*>(enc);
|
||||
encoder->fetchStats(outputStats, statsSizeBytes);
|
||||
}
|
||||
}
|
||||
|
||||
void x265_encoder_log(x265_encoder* enc, int, char **)
|
||||
{
|
||||
if (enc)
|
||||
{
|
||||
Encoder *encoder = static_cast<Encoder*>(enc);
|
||||
x265_log(encoder->m_param, X265_LOG_WARNING, "x265_encoder_log is now deprecated\n");
|
||||
}
|
||||
}
|
||||
|
||||
void x265_encoder_close(x265_encoder *enc)
|
||||
{
|
||||
if (enc)
|
||||
{
|
||||
Encoder *encoder = static_cast<Encoder*>(enc);
|
||||
|
||||
encoder->stopJobs();
|
||||
encoder->printSummary();
|
||||
encoder->destroy();
|
||||
delete encoder;
|
||||
ATOMIC_DEC(&g_ctuSizeConfigured);
|
||||
}
|
||||
}
|
||||
|
||||
void x265_cleanup(void)
|
||||
{
|
||||
if (!g_ctuSizeConfigured)
|
||||
{
|
||||
BitCost::destroy();
|
||||
CUData::s_partSet[0] = NULL; /* allow CUData to adjust to new CTU size */
|
||||
}
|
||||
}
|
||||
|
||||
x265_picture *x265_picture_alloc()
|
||||
{
|
||||
return (x265_picture*)x265_malloc(sizeof(x265_picture));
|
||||
}
|
||||
|
||||
void x265_picture_init(x265_param *param, x265_picture *pic)
|
||||
{
|
||||
memset(pic, 0, sizeof(x265_picture));
|
||||
|
||||
pic->bitDepth = param->internalBitDepth;
|
||||
pic->colorSpace = param->internalCsp;
|
||||
pic->forceqp = X265_QP_AUTO;
|
||||
pic->quantOffsets = NULL;
|
||||
if (param->analysisMode)
|
||||
{
|
||||
uint32_t widthInCU = (param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
|
||||
uint32_t heightInCU = (param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
|
||||
|
||||
uint32_t numCUsInFrame = widthInCU * heightInCU;
|
||||
pic->analysisData.numCUsInFrame = numCUsInFrame;
|
||||
pic->analysisData.numPartitions = NUM_4x4_PARTITIONS;
|
||||
}
|
||||
}
|
||||
|
||||
void x265_picture_free(x265_picture *p)
|
||||
{
|
||||
return x265_free(p);
|
||||
}
|
||||
|
||||
static const x265_api libapi =
|
||||
{
|
||||
X265_MAJOR_VERSION,
|
||||
X265_BUILD,
|
||||
sizeof(x265_param),
|
||||
sizeof(x265_picture),
|
||||
sizeof(x265_analysis_data),
|
||||
sizeof(x265_zone),
|
||||
sizeof(x265_stats),
|
||||
|
||||
PFX(max_bit_depth),
|
||||
PFX(version_str),
|
||||
PFX(build_info_str),
|
||||
|
||||
&PARAM_NS::x265_param_alloc,
|
||||
&PARAM_NS::x265_param_free,
|
||||
&PARAM_NS::x265_param_default,
|
||||
&PARAM_NS::x265_param_parse,
|
||||
&PARAM_NS::x265_param_apply_profile,
|
||||
&PARAM_NS::x265_param_default_preset,
|
||||
&x265_picture_alloc,
|
||||
&x265_picture_free,
|
||||
&x265_picture_init,
|
||||
&x265_encoder_open,
|
||||
&x265_encoder_parameters,
|
||||
&x265_encoder_reconfig,
|
||||
&x265_encoder_headers,
|
||||
&x265_encoder_encode,
|
||||
&x265_encoder_get_stats,
|
||||
&x265_encoder_log,
|
||||
&x265_encoder_close,
|
||||
&x265_cleanup,
|
||||
|
||||
sizeof(x265_frame_stats),
|
||||
};
|
||||
|
||||
typedef const x265_api* (*api_get_func)(int bitDepth);
|
||||
typedef const x265_api* (*api_query_func)(int bitDepth, int apiVersion, int* err);
|
||||
|
||||
#define xstr(s) str(s)
|
||||
#define str(s) #s
|
||||
|
||||
#if _WIN32
|
||||
#define ext ".dll"
|
||||
#elif MACOS
|
||||
#include <dlfcn.h>
|
||||
#define ext ".dylib"
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#define ext ".so"
|
||||
#endif
|
||||
|
||||
#if ENABLE_SHARED
|
||||
static int g_recursion /* = 0 */;
|
||||
#endif
|
||||
|
||||
const x265_api* x265_api_get(int bitDepth)
|
||||
{
|
||||
if (bitDepth && bitDepth != X265_DEPTH)
|
||||
{
|
||||
#if LINKED_8BIT
|
||||
if (bitDepth == 8) return x265_8bit::x265_api_get(0);
|
||||
#endif
|
||||
#if LINKED_10BIT
|
||||
if (bitDepth == 10) return x265_10bit::x265_api_get(0);
|
||||
#endif
|
||||
#if LINKED_12BIT
|
||||
if (bitDepth == 12) return x265_12bit::x265_api_get(0);
|
||||
#endif
|
||||
#if ENABLE_SHARED
|
||||
const char* libname = NULL;
|
||||
const char* method = "x265_api_get_" xstr(X265_BUILD);
|
||||
const char* multilibname = "libx265" ext;
|
||||
|
||||
if (bitDepth == 12)
|
||||
libname = "libx265_main12" ext;
|
||||
else if (bitDepth == 10)
|
||||
libname = "libx265_main10" ext;
|
||||
else if (bitDepth == 8)
|
||||
libname = "libx265_main" ext;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
const x265_api* api = NULL;
|
||||
int reqDepth = 0;
|
||||
|
||||
if (g_recursion > 1)
|
||||
return NULL;
|
||||
else
|
||||
g_recursion++;
|
||||
|
||||
#if _WIN32
|
||||
HMODULE h = LoadLibraryA(libname);
|
||||
if (!h)
|
||||
{
|
||||
h = LoadLibraryA(multilibname);
|
||||
reqDepth = bitDepth;
|
||||
}
|
||||
if (h)
|
||||
{
|
||||
api_get_func get = (api_get_func)GetProcAddress(h, method);
|
||||
if (get)
|
||||
api = get(reqDepth);
|
||||
}
|
||||
#else
|
||||
void* h = dlopen(libname, RTLD_LAZY | RTLD_LOCAL);
|
||||
if (!h)
|
||||
{
|
||||
h = dlopen(multilibname, RTLD_LAZY | RTLD_LOCAL);
|
||||
reqDepth = bitDepth;
|
||||
}
|
||||
if (h)
|
||||
{
|
||||
api_get_func get = (api_get_func)dlsym(h, method);
|
||||
if (get)
|
||||
api = get(reqDepth);
|
||||
}
|
||||
#endif
|
||||
|
||||
g_recursion--;
|
||||
|
||||
if (api && bitDepth != api->bit_depth)
|
||||
{
|
||||
x265_log(NULL, X265_LOG_WARNING, "%s does not support requested bitDepth %d\n", libname, bitDepth);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return api;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
return &libapi;
|
||||
}
|
||||
|
||||
const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err)
|
||||
{
|
||||
if (apiVersion < 51)
|
||||
{
|
||||
/* builds before 1.6 had re-ordered public structs */
|
||||
if (err) *err = X265_API_QUERY_ERR_VER_REFUSED;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (err) *err = X265_API_QUERY_ERR_NONE;
|
||||
|
||||
if (bitDepth && bitDepth != X265_DEPTH)
|
||||
{
|
||||
#if LINKED_8BIT
|
||||
if (bitDepth == 8) return x265_8bit::x265_api_query(0, apiVersion, err);
|
||||
#endif
|
||||
#if LINKED_10BIT
|
||||
if (bitDepth == 10) return x265_10bit::x265_api_query(0, apiVersion, err);
|
||||
#endif
|
||||
#if LINKED_12BIT
|
||||
if (bitDepth == 12) return x265_12bit::x265_api_query(0, apiVersion, err);
|
||||
#endif
|
||||
#if ENABLE_SHARED
|
||||
const char* libname = NULL;
|
||||
const char* method = "x265_api_query";
|
||||
const char* multilibname = "libx265" ext;
|
||||
|
||||
if (bitDepth == 12)
|
||||
libname = "libx265_main12" ext;
|
||||
else if (bitDepth == 10)
|
||||
libname = "libx265_main10" ext;
|
||||
else if (bitDepth == 8)
|
||||
libname = "libx265_main" ext;
|
||||
else
|
||||
{
|
||||
if (err) *err = X265_API_QUERY_ERR_LIB_NOT_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const x265_api* api = NULL;
|
||||
int reqDepth = 0;
|
||||
int e = X265_API_QUERY_ERR_LIB_NOT_FOUND;
|
||||
|
||||
if (g_recursion > 1)
|
||||
{
|
||||
if (err) *err = X265_API_QUERY_ERR_LIB_NOT_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
g_recursion++;
|
||||
|
||||
#if _WIN32
|
||||
HMODULE h = LoadLibraryA(libname);
|
||||
if (!h)
|
||||
{
|
||||
h = LoadLibraryA(multilibname);
|
||||
reqDepth = bitDepth;
|
||||
}
|
||||
if (h)
|
||||
{
|
||||
e = X265_API_QUERY_ERR_FUNC_NOT_FOUND;
|
||||
api_query_func query = (api_query_func)GetProcAddress(h, method);
|
||||
if (query)
|
||||
api = query(reqDepth, apiVersion, err);
|
||||
}
|
||||
#else
|
||||
void* h = dlopen(libname, RTLD_LAZY | RTLD_LOCAL);
|
||||
if (!h)
|
||||
{
|
||||
h = dlopen(multilibname, RTLD_LAZY | RTLD_LOCAL);
|
||||
reqDepth = bitDepth;
|
||||
}
|
||||
if (h)
|
||||
{
|
||||
e = X265_API_QUERY_ERR_FUNC_NOT_FOUND;
|
||||
api_query_func query = (api_query_func)dlsym(h, method);
|
||||
if (query)
|
||||
api = query(reqDepth, apiVersion, err);
|
||||
}
|
||||
#endif
|
||||
|
||||
g_recursion--;
|
||||
|
||||
if (api && bitDepth != api->bit_depth)
|
||||
{
|
||||
x265_log(NULL, X265_LOG_WARNING, "%s does not support requested bitDepth %d\n", libname, bitDepth);
|
||||
if (err) *err = X265_API_QUERY_ERR_WRONG_BITDEPTH;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (err) *err = api ? X265_API_QUERY_ERR_NONE : e;
|
||||
return api;
|
||||
#else
|
||||
if (err) *err = X265_API_QUERY_ERR_WRONG_BITDEPTH;
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
return &libapi;
|
||||
}
|
||||
|
||||
} /* end namespace or extern "C" */
|
91
x265/source/encoder/bitcost.cpp
Normal file
91
x265/source/encoder/bitcost.cpp
Normal file
|
@ -0,0 +1,91 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "primitives.h"
|
||||
#include "bitcost.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
|
||||
void BitCost::setQP(unsigned int qp)
|
||||
{
|
||||
if (!s_costs[qp])
|
||||
{
|
||||
ScopedLock s(s_costCalcLock);
|
||||
|
||||
// Now that we have acquired the lock, check again if another thread calculated
|
||||
// this row while we were blocked
|
||||
if (!s_costs[qp])
|
||||
{
|
||||
x265_emms(); // just to be safe
|
||||
|
||||
CalculateLogs();
|
||||
s_costs[qp] = new uint16_t[4 * BC_MAX_MV + 1] + 2 * BC_MAX_MV;
|
||||
double lambda = x265_lambda_tab[qp];
|
||||
|
||||
// estimate same cost for negative and positive MVD
|
||||
for (int i = 0; i <= 2 * BC_MAX_MV; i++)
|
||||
s_costs[qp][i] = s_costs[qp][-i] = (uint16_t)X265_MIN(s_bitsizes[i] * lambda + 0.5f, (1 << 15) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
m_cost = s_costs[qp];
|
||||
}
|
||||
|
||||
/***
|
||||
* Class static data and methods
|
||||
*/
|
||||
|
||||
uint16_t *BitCost::s_costs[BC_MAX_QP];
|
||||
|
||||
float *BitCost::s_bitsizes;
|
||||
|
||||
Lock BitCost::s_costCalcLock;
|
||||
|
||||
void BitCost::CalculateLogs()
|
||||
{
|
||||
if (!s_bitsizes)
|
||||
{
|
||||
s_bitsizes = new float[2 * BC_MAX_MV + 1];
|
||||
s_bitsizes[0] = 0.718f;
|
||||
float log2_2 = 2.0f / log(2.0f); // 2 x 1/log(2)
|
||||
for (int i = 1; i <= 2 * BC_MAX_MV; i++)
|
||||
s_bitsizes[i] = log((float)(i + 1)) * log2_2 + 1.718f;
|
||||
}
|
||||
}
|
||||
|
||||
void BitCost::destroy()
|
||||
{
|
||||
for (int i = 0; i < BC_MAX_QP; i++)
|
||||
{
|
||||
if (s_costs[i])
|
||||
{
|
||||
delete [] (s_costs[i] - 2 * BC_MAX_MV);
|
||||
|
||||
s_costs[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
delete [] s_bitsizes;
|
||||
s_bitsizes = 0;
|
||||
}
|
93
x265/source/encoder/bitcost.h
Normal file
93
x265/source/encoder/bitcost.h
Normal file
|
@ -0,0 +1,93 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_BITCOST_H
|
||||
#define X265_BITCOST_H
|
||||
|
||||
#include "common.h"
|
||||
#include "threading.h"
|
||||
#include "mv.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private x265 namespace
|
||||
|
||||
class BitCost
|
||||
{
|
||||
public:
|
||||
|
||||
BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0), m_mvp(0) {}
|
||||
|
||||
void setQP(unsigned int qp);
|
||||
|
||||
void setMVP(const MV& mvp) { m_mvp = mvp; m_cost_mvx = m_cost - mvp.x; m_cost_mvy = m_cost - mvp.y; }
|
||||
|
||||
// return bit cost of motion vector difference, multiplied by lambda
|
||||
inline uint16_t mvcost(const MV& mv) const { return m_cost_mvx[mv.x] + m_cost_mvy[mv.y]; }
|
||||
|
||||
// return bit cost of motion vector difference, without lambda
|
||||
inline uint32_t bitcost(const MV& mv) const
|
||||
{
|
||||
return (uint32_t)(s_bitsizes[abs(mv.x - m_mvp.x)] +
|
||||
s_bitsizes[abs(mv.y - m_mvp.y)] + 0.5f);
|
||||
}
|
||||
|
||||
static inline uint32_t bitcost(const MV& mv, const MV& mvp)
|
||||
{
|
||||
return (uint32_t)(s_bitsizes[abs(mv.x - mvp.x)] +
|
||||
s_bitsizes[abs(mv.y - mvp.y)] + 0.5f);
|
||||
}
|
||||
|
||||
static void destroy();
|
||||
|
||||
protected:
|
||||
|
||||
uint16_t *m_cost_mvx;
|
||||
|
||||
uint16_t *m_cost_mvy;
|
||||
|
||||
uint16_t *m_cost;
|
||||
|
||||
MV m_mvp;
|
||||
|
||||
BitCost& operator =(const BitCost&);
|
||||
|
||||
private:
|
||||
|
||||
/* default log2_max_mv_length_horizontal and log2_max_mv_length_horizontal
|
||||
* are 15, specified in quarter-pel luma sample units. making the maximum
|
||||
* signaled ful-pel motion distance 4096, max qpel is 32768 */
|
||||
enum { BC_MAX_MV = (1 << 15) };
|
||||
|
||||
enum { BC_MAX_QP = 82 };
|
||||
|
||||
static float *s_bitsizes;
|
||||
|
||||
static uint16_t *s_costs[BC_MAX_QP];
|
||||
|
||||
static Lock s_costCalcLock;
|
||||
|
||||
static void CalculateLogs();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_BITCOST_H
|
303
x265/source/encoder/dpb.cpp
Normal file
303
x265/source/encoder/dpb.cpp
Normal file
|
@ -0,0 +1,303 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "frame.h"
|
||||
#include "framedata.h"
|
||||
#include "picyuv.h"
|
||||
#include "slice.h"
|
||||
|
||||
#include "dpb.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
|
||||
DPB::~DPB()
|
||||
{
|
||||
while (!m_freeList.empty())
|
||||
{
|
||||
Frame* curFrame = m_freeList.popFront();
|
||||
curFrame->destroy();
|
||||
delete curFrame;
|
||||
}
|
||||
|
||||
while (!m_picList.empty())
|
||||
{
|
||||
Frame* curFrame = m_picList.popFront();
|
||||
curFrame->destroy();
|
||||
delete curFrame;
|
||||
}
|
||||
|
||||
while (m_frameDataFreeList)
|
||||
{
|
||||
FrameData* next = m_frameDataFreeList->m_freeListNext;
|
||||
m_frameDataFreeList->destroy();
|
||||
|
||||
m_frameDataFreeList->m_reconPic->destroy();
|
||||
delete m_frameDataFreeList->m_reconPic;
|
||||
|
||||
delete m_frameDataFreeList;
|
||||
m_frameDataFreeList = next;
|
||||
}
|
||||
}
|
||||
|
||||
// move unreferenced pictures from picList to freeList for recycle
|
||||
void DPB::recycleUnreferenced()
|
||||
{
|
||||
Frame *iterFrame = m_picList.first();
|
||||
|
||||
while (iterFrame)
|
||||
{
|
||||
Frame *curFrame = iterFrame;
|
||||
iterFrame = iterFrame->m_next;
|
||||
if (!curFrame->m_encData->m_bHasReferences && !curFrame->m_countRefEncoders)
|
||||
{
|
||||
curFrame->m_reconRowCount.set(0);
|
||||
curFrame->m_bChromaExtended = false;
|
||||
|
||||
// iterator is invalidated by remove, restart scan
|
||||
m_picList.remove(*curFrame);
|
||||
iterFrame = m_picList.first();
|
||||
|
||||
m_freeList.pushBack(*curFrame);
|
||||
curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
|
||||
m_frameDataFreeList = curFrame->m_encData;
|
||||
curFrame->m_encData = NULL;
|
||||
curFrame->m_reconPic = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DPB::prepareEncode(Frame *newFrame)
|
||||
{
|
||||
Slice* slice = newFrame->m_encData->m_slice;
|
||||
slice->m_poc = newFrame->m_poc;
|
||||
|
||||
int pocCurr = slice->m_poc;
|
||||
int type = newFrame->m_lowres.sliceType;
|
||||
bool bIsKeyFrame = newFrame->m_lowres.bKeyframe;
|
||||
|
||||
slice->m_nalUnitType = getNalUnitType(pocCurr, bIsKeyFrame);
|
||||
if (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL)
|
||||
m_lastIDR = pocCurr;
|
||||
slice->m_lastIDR = m_lastIDR;
|
||||
slice->m_sliceType = IS_X265_TYPE_B(type) ? B_SLICE : (type == X265_TYPE_P) ? P_SLICE : I_SLICE;
|
||||
|
||||
if (type == X265_TYPE_B)
|
||||
{
|
||||
newFrame->m_encData->m_bHasReferences = false;
|
||||
|
||||
// Adjust NAL type for unreferenced B frames (change from _R "referenced"
|
||||
// to _N "non-referenced" NAL unit type)
|
||||
switch (slice->m_nalUnitType)
|
||||
{
|
||||
case NAL_UNIT_CODED_SLICE_TRAIL_R:
|
||||
slice->m_nalUnitType = m_bTemporalSublayer ? NAL_UNIT_CODED_SLICE_TSA_N : NAL_UNIT_CODED_SLICE_TRAIL_N;
|
||||
break;
|
||||
case NAL_UNIT_CODED_SLICE_RADL_R:
|
||||
slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RADL_N;
|
||||
break;
|
||||
case NAL_UNIT_CODED_SLICE_RASL_R:
|
||||
slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RASL_N;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* m_bHasReferences starts out as true for non-B pictures, and is set to false
|
||||
* once no more pictures reference it */
|
||||
newFrame->m_encData->m_bHasReferences = true;
|
||||
}
|
||||
|
||||
m_picList.pushFront(*newFrame);
|
||||
|
||||
// Do decoding refresh marking if any
|
||||
decodingRefreshMarking(pocCurr, slice->m_nalUnitType);
|
||||
|
||||
computeRPS(pocCurr, slice->isIRAP(), &slice->m_rps, slice->m_sps->maxDecPicBuffering);
|
||||
|
||||
// Mark pictures in m_piclist as unreferenced if they are not included in RPS
|
||||
applyReferencePictureSet(&slice->m_rps, pocCurr);
|
||||
|
||||
slice->m_numRefIdx[0] = X265_MIN(m_maxRefL0, slice->m_rps.numberOfNegativePictures); // Ensuring L0 contains just the -ve POC
|
||||
slice->m_numRefIdx[1] = X265_MIN(m_maxRefL1, slice->m_rps.numberOfPositivePictures);
|
||||
slice->setRefPicList(m_picList);
|
||||
|
||||
X265_CHECK(slice->m_sliceType != B_SLICE || slice->m_numRefIdx[1], "B slice without L1 references (non-fatal)\n");
|
||||
|
||||
if (slice->m_sliceType == B_SLICE)
|
||||
{
|
||||
/* TODO: the lookahead should be able to tell which reference picture
|
||||
* had the least motion residual. We should be able to use that here to
|
||||
* select a colocation reference list and index */
|
||||
slice->m_colFromL0Flag = false;
|
||||
slice->m_colRefIdx = 0;
|
||||
slice->m_bCheckLDC = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
slice->m_bCheckLDC = true;
|
||||
slice->m_colFromL0Flag = true;
|
||||
slice->m_colRefIdx = 0;
|
||||
}
|
||||
slice->m_sLFaseFlag = (SLFASE_CONSTANT & (1 << (pocCurr % 31))) > 0;
|
||||
|
||||
/* Increment reference count of all motion-referenced frames to prevent them
|
||||
* from being recycled. These counts are decremented at the end of
|
||||
* compressFrame() */
|
||||
int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
|
||||
for (int l = 0; l < numPredDir; l++)
|
||||
{
|
||||
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
|
||||
{
|
||||
Frame *refpic = slice->m_refFrameList[l][ref];
|
||||
ATOMIC_INC(&refpic->m_countRefEncoders);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DPB::computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer)
|
||||
{
|
||||
unsigned int poci = 0, numNeg = 0, numPos = 0;
|
||||
|
||||
Frame* iterPic = m_picList.first();
|
||||
|
||||
while (iterPic && (poci < maxDecPicBuffer - 1))
|
||||
{
|
||||
if ((iterPic->m_poc != curPoc) && iterPic->m_encData->m_bHasReferences)
|
||||
{
|
||||
rps->poc[poci] = iterPic->m_poc;
|
||||
rps->deltaPOC[poci] = rps->poc[poci] - curPoc;
|
||||
(rps->deltaPOC[poci] < 0) ? numNeg++ : numPos++;
|
||||
rps->bUsed[poci] = !isRAP;
|
||||
poci++;
|
||||
}
|
||||
iterPic = iterPic->m_next;
|
||||
}
|
||||
|
||||
rps->numberOfPictures = poci;
|
||||
rps->numberOfPositivePictures = numPos;
|
||||
rps->numberOfNegativePictures = numNeg;
|
||||
|
||||
rps->sortDeltaPOC();
|
||||
}
|
||||
|
||||
/* Marking reference pictures when an IDR/CRA is encountered. */
|
||||
void DPB::decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType)
|
||||
{
|
||||
if (nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL)
|
||||
{
|
||||
/* If the nal_unit_type is IDR, all pictures in the reference picture
|
||||
* list are marked as "unused for reference" */
|
||||
Frame* iterFrame = m_picList.first();
|
||||
while (iterFrame)
|
||||
{
|
||||
if (iterFrame->m_poc != pocCurr)
|
||||
iterFrame->m_encData->m_bHasReferences = false;
|
||||
iterFrame = iterFrame->m_next;
|
||||
}
|
||||
}
|
||||
else // CRA or No DR
|
||||
{
|
||||
if (m_bRefreshPending && pocCurr > m_pocCRA)
|
||||
{
|
||||
/* If the bRefreshPending flag is true (a deferred decoding refresh
|
||||
* is pending) and the current temporal reference is greater than
|
||||
* the temporal reference of the latest CRA picture (pocCRA), mark
|
||||
* all reference pictures except the latest CRA picture as "unused
|
||||
* for reference" and set the bRefreshPending flag to false */
|
||||
Frame* iterFrame = m_picList.first();
|
||||
while (iterFrame)
|
||||
{
|
||||
if (iterFrame->m_poc != pocCurr && iterFrame->m_poc != m_pocCRA)
|
||||
iterFrame->m_encData->m_bHasReferences = false;
|
||||
iterFrame = iterFrame->m_next;
|
||||
}
|
||||
|
||||
m_bRefreshPending = false;
|
||||
}
|
||||
if (nalUnitType == NAL_UNIT_CODED_SLICE_CRA)
|
||||
{
|
||||
/* If the nal_unit_type is CRA, set the bRefreshPending flag to true
|
||||
* and pocCRA to the temporal reference of the current picture */
|
||||
m_bRefreshPending = true;
|
||||
m_pocCRA = pocCurr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note that the current picture is already placed in the reference list and
|
||||
* its marking is not changed. If the current picture has a nal_ref_idc
|
||||
* that is not 0, it will remain marked as "used for reference" */
|
||||
}
|
||||
|
||||
/** Function for applying picture marking based on the Reference Picture Set */
|
||||
void DPB::applyReferencePictureSet(RPS *rps, int curPoc)
|
||||
{
|
||||
// loop through all pictures in the reference picture buffer
|
||||
Frame* iterFrame = m_picList.first();
|
||||
while (iterFrame)
|
||||
{
|
||||
if (iterFrame->m_poc != curPoc && iterFrame->m_encData->m_bHasReferences)
|
||||
{
|
||||
// loop through all pictures in the Reference Picture Set
|
||||
// to see if the picture should be kept as reference picture
|
||||
bool referenced = false;
|
||||
for (int i = 0; i < rps->numberOfPositivePictures + rps->numberOfNegativePictures; i++)
|
||||
{
|
||||
if (iterFrame->m_poc == curPoc + rps->deltaPOC[i])
|
||||
{
|
||||
referenced = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!referenced)
|
||||
iterFrame->m_encData->m_bHasReferences = false;
|
||||
}
|
||||
iterFrame = iterFrame->m_next;
|
||||
}
|
||||
}
|
||||
|
||||
/* deciding the nal_unit_type */
|
||||
NalUnitType DPB::getNalUnitType(int curPOC, bool bIsKeyFrame)
|
||||
{
|
||||
if (!curPOC)
|
||||
return NAL_UNIT_CODED_SLICE_IDR_W_RADL;
|
||||
|
||||
if (bIsKeyFrame)
|
||||
return m_bOpenGOP ? NAL_UNIT_CODED_SLICE_CRA : NAL_UNIT_CODED_SLICE_IDR_W_RADL;
|
||||
|
||||
if (m_pocCRA && curPOC < m_pocCRA)
|
||||
// All leading pictures are being marked as TFD pictures here since
|
||||
// current encoder uses all reference pictures while encoding leading
|
||||
// pictures. An encoder can ensure that a leading picture can be still
|
||||
// decodable when random accessing to a CRA/CRANT/BLA/BLANT picture by
|
||||
// controlling the reference pictures used for encoding that leading
|
||||
// picture. Such a leading picture need not be marked as a TFD picture.
|
||||
return NAL_UNIT_CODED_SLICE_RASL_R;
|
||||
|
||||
if (m_lastIDR && curPOC < m_lastIDR)
|
||||
return NAL_UNIT_CODED_SLICE_RADL_R;
|
||||
|
||||
return NAL_UNIT_CODED_SLICE_TRAIL_R;
|
||||
}
|
80
x265/source/encoder/dpb.h
Normal file
80
x265/source/encoder/dpb.h
Normal file
|
@ -0,0 +1,80 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_DPB_H
|
||||
#define X265_DPB_H
|
||||
|
||||
#include "piclist.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace for x265
|
||||
|
||||
class Frame;
|
||||
class FrameData;
|
||||
class Slice;
|
||||
|
||||
class DPB
|
||||
{
|
||||
public:
|
||||
|
||||
int m_lastIDR;
|
||||
int m_pocCRA;
|
||||
int m_maxRefL0;
|
||||
int m_maxRefL1;
|
||||
int m_bOpenGOP;
|
||||
bool m_bRefreshPending;
|
||||
bool m_bTemporalSublayer;
|
||||
PicList m_picList;
|
||||
PicList m_freeList;
|
||||
FrameData* m_frameDataFreeList;
|
||||
|
||||
DPB(x265_param *param)
|
||||
{
|
||||
m_lastIDR = 0;
|
||||
m_pocCRA = 0;
|
||||
m_bRefreshPending = false;
|
||||
m_frameDataFreeList = NULL;
|
||||
m_maxRefL0 = param->maxNumReferences;
|
||||
m_maxRefL1 = param->bBPyramid ? 2 : 1;
|
||||
m_bOpenGOP = param->bOpenGOP;
|
||||
m_bTemporalSublayer = !!param->bEnableTemporalSubLayers;
|
||||
}
|
||||
|
||||
~DPB();
|
||||
|
||||
void prepareEncode(Frame*);
|
||||
|
||||
void recycleUnreferenced();
|
||||
|
||||
protected:
|
||||
|
||||
void computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer);
|
||||
|
||||
void applyReferencePictureSet(RPS *rps, int curPoc);
|
||||
void decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType);
|
||||
|
||||
NalUnitType getNalUnitType(int curPoc, bool bIsKeyFrame);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // X265_DPB_H
|
1921
x265/source/encoder/encoder.cpp
Normal file
1921
x265/source/encoder/encoder.cpp
Normal file
File diff suppressed because it is too large
Load diff
179
x265/source/encoder/encoder.h
Normal file
179
x265/source/encoder/encoder.h
Normal file
|
@ -0,0 +1,179 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_ENCODER_H
|
||||
#define X265_ENCODER_H
|
||||
|
||||
#include "common.h"
|
||||
#include "slice.h"
|
||||
#include "scalinglist.h"
|
||||
#include "x265.h"
|
||||
#include "nal.h"
|
||||
|
||||
struct x265_encoder {};
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
extern const char g_sliceTypeToChar[3];
|
||||
|
||||
class Entropy;
|
||||
|
||||
struct EncStats
|
||||
{
|
||||
double m_psnrSumY;
|
||||
double m_psnrSumU;
|
||||
double m_psnrSumV;
|
||||
double m_globalSsim;
|
||||
double m_totalQp;
|
||||
uint64_t m_accBits;
|
||||
uint32_t m_numPics;
|
||||
uint16_t m_maxCLL;
|
||||
double m_maxFALL;
|
||||
|
||||
EncStats()
|
||||
{
|
||||
m_psnrSumY = m_psnrSumU = m_psnrSumV = m_globalSsim = 0;
|
||||
m_accBits = 0;
|
||||
m_numPics = 0;
|
||||
m_totalQp = 0;
|
||||
m_maxCLL = 0;
|
||||
m_maxFALL = 0;
|
||||
}
|
||||
|
||||
void addQP(double aveQp);
|
||||
|
||||
void addPsnr(double psnrY, double psnrU, double psnrV);
|
||||
|
||||
void addBits(uint64_t bits);
|
||||
|
||||
void addSsim(double ssim);
|
||||
};
|
||||
|
||||
class FrameEncoder;
|
||||
class DPB;
|
||||
class Lookahead;
|
||||
class RateControl;
|
||||
class ThreadPool;
|
||||
|
||||
class Encoder : public x265_encoder
|
||||
{
|
||||
public:
|
||||
|
||||
int m_pocLast; // time index (POC)
|
||||
int m_encodedFrameNum;
|
||||
int m_outputCount;
|
||||
|
||||
int m_bframeDelay;
|
||||
int64_t m_firstPts;
|
||||
int64_t m_bframeDelayTime;
|
||||
int64_t m_prevReorderedPts[2];
|
||||
|
||||
ThreadPool* m_threadPool;
|
||||
FrameEncoder* m_frameEncoder[X265_MAX_FRAME_THREADS];
|
||||
DPB* m_dpb;
|
||||
|
||||
Frame* m_exportedPic;
|
||||
|
||||
int m_numPools;
|
||||
int m_curEncoder;
|
||||
|
||||
/* Collect statistics globally */
|
||||
EncStats m_analyzeAll;
|
||||
EncStats m_analyzeI;
|
||||
EncStats m_analyzeP;
|
||||
EncStats m_analyzeB;
|
||||
int64_t m_encodeStartTime;
|
||||
|
||||
// weighted prediction
|
||||
int m_numLumaWPFrames; // number of P frames with weighted luma reference
|
||||
int m_numChromaWPFrames; // number of P frames with weighted chroma reference
|
||||
int m_numLumaWPBiFrames; // number of B frames with weighted luma reference
|
||||
int m_numChromaWPBiFrames; // number of B frames with weighted chroma reference
|
||||
FILE* m_analysisFile;
|
||||
int m_conformanceMode;
|
||||
VPS m_vps;
|
||||
SPS m_sps;
|
||||
PPS m_pps;
|
||||
NALList m_nalList;
|
||||
ScalingList m_scalingList; // quantization matrix information
|
||||
|
||||
bool m_emitCLLSEI;
|
||||
int m_lastBPSEI;
|
||||
uint32_t m_numDelayedPic;
|
||||
|
||||
x265_param* m_param;
|
||||
x265_param* m_latestParam;
|
||||
RateControl* m_rateControl;
|
||||
Lookahead* m_lookahead;
|
||||
Window m_conformanceWindow;
|
||||
|
||||
bool m_bZeroLatency; // x265_encoder_encode() returns NALs for the input picture, zero lag
|
||||
bool m_aborted; // fatal error detected
|
||||
bool m_reconfigured; // reconfigure of encoder detected
|
||||
|
||||
uint32_t m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
|
||||
uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
|
||||
uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];
|
||||
|
||||
Encoder();
|
||||
~Encoder() {}
|
||||
|
||||
void create();
|
||||
void stopJobs();
|
||||
void destroy();
|
||||
|
||||
int encode(const x265_picture* pic, x265_picture *pic_out);
|
||||
|
||||
int reconfigureParam(x265_param* encParam, x265_param* param);
|
||||
|
||||
void getStreamHeaders(NALList& list, Entropy& sbacCoder, Bitstream& bs);
|
||||
|
||||
void fetchStats(x265_stats* stats, size_t statsSizeBytes);
|
||||
|
||||
void printSummary();
|
||||
|
||||
char* statsString(EncStats&, char*);
|
||||
|
||||
void configure(x265_param *param);
|
||||
|
||||
void updateVbvPlan(RateControl* rc);
|
||||
|
||||
void allocAnalysis(x265_analysis_data* analysis);
|
||||
|
||||
void freeAnalysis(x265_analysis_data* analysis);
|
||||
|
||||
void readAnalysisFile(x265_analysis_data* analysis, int poc);
|
||||
|
||||
void writeAnalysisFile(x265_analysis_data* pic);
|
||||
|
||||
void finishFrameStats(Frame* pic, FrameEncoder *curEncoder, uint64_t bits, x265_frame_stats* frameStats);
|
||||
|
||||
protected:
|
||||
|
||||
void initVPS(VPS *vps);
|
||||
void initSPS(SPS *sps);
|
||||
void initPPS(PPS *pps);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_ENCODER_H
|
2288
x265/source/encoder/entropy.cpp
Normal file
2288
x265/source/encoder/entropy.cpp
Normal file
File diff suppressed because it is too large
Load diff
255
x265/source/encoder/entropy.h
Normal file
255
x265/source/encoder/entropy.h
Normal file
|
@ -0,0 +1,255 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_ENTROPY_H
|
||||
#define X265_ENTROPY_H
|
||||
|
||||
#include "common.h"
|
||||
#include "bitstream.h"
|
||||
#include "frame.h"
|
||||
#include "cudata.h"
|
||||
#include "contexts.h"
|
||||
#include "slice.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
struct SaoCtuParam;
|
||||
struct EstBitsSbac;
|
||||
class ScalingList;
|
||||
|
||||
enum SplitType
|
||||
{
|
||||
DONT_SPLIT = 0,
|
||||
VERTICAL_SPLIT = 1,
|
||||
QUAD_SPLIT = 2,
|
||||
NUMBER_OF_SPLIT_MODES = 3
|
||||
};
|
||||
|
||||
struct TURecurse
|
||||
{
|
||||
uint32_t section;
|
||||
uint32_t splitMode;
|
||||
uint32_t absPartIdxTURelCU;
|
||||
uint32_t absPartIdxStep;
|
||||
|
||||
TURecurse(SplitType splitType, uint32_t _absPartIdxStep, uint32_t _absPartIdxTU)
|
||||
{
|
||||
static const uint32_t partIdxStepShift[NUMBER_OF_SPLIT_MODES] = { 0, 1, 2 };
|
||||
section = 0;
|
||||
absPartIdxTURelCU = _absPartIdxTU;
|
||||
splitMode = (uint32_t)splitType;
|
||||
absPartIdxStep = _absPartIdxStep >> partIdxStepShift[splitMode];
|
||||
}
|
||||
|
||||
bool isNextSection()
|
||||
{
|
||||
if (splitMode == DONT_SPLIT)
|
||||
{
|
||||
section++;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
absPartIdxTURelCU += absPartIdxStep;
|
||||
|
||||
section++;
|
||||
return section < (uint32_t)(1 << splitMode);
|
||||
}
|
||||
}
|
||||
|
||||
bool isLastSection() const
|
||||
{
|
||||
return (section + 1) >= (uint32_t)(1 << splitMode);
|
||||
}
|
||||
};
|
||||
|
||||
struct EstBitsSbac
|
||||
{
|
||||
int significantCoeffGroupBits[NUM_SIG_CG_FLAG_CTX][2];
|
||||
int significantBits[2][NUM_SIG_FLAG_CTX];
|
||||
int lastBits[2][10];
|
||||
int greaterOneBits[NUM_ONE_FLAG_CTX][2];
|
||||
int levelAbsBits[NUM_ABS_FLAG_CTX][2];
|
||||
int blockCbpBits[NUM_QT_CBF_CTX][2];
|
||||
int blockRootCbpBits[2];
|
||||
};
|
||||
|
||||
class Entropy : public SyntaxElementWriter
|
||||
{
|
||||
public:
|
||||
|
||||
uint64_t m_pad;
|
||||
uint8_t m_contextState[160]; // MAX_OFF_CTX_MOD + padding
|
||||
|
||||
/* CABAC state */
|
||||
uint32_t m_low;
|
||||
uint32_t m_range;
|
||||
uint32_t m_bufferedByte;
|
||||
int m_numBufferedBytes;
|
||||
int m_bitsLeft;
|
||||
uint64_t m_fracBits;
|
||||
EstBitsSbac m_estBitsSbac;
|
||||
|
||||
Entropy();
|
||||
|
||||
void setBitstream(Bitstream* p) { m_bitIf = p; }
|
||||
|
||||
uint32_t getNumberOfWrittenBits()
|
||||
{
|
||||
X265_CHECK(!m_bitIf, "bit counting mode expected\n");
|
||||
return (uint32_t)(m_fracBits >> 15);
|
||||
}
|
||||
|
||||
#if CHECKED_BUILD || _DEBUG
|
||||
bool m_valid;
|
||||
void markInvalid() { m_valid = false; }
|
||||
void markValid() { m_valid = true; }
|
||||
#else
|
||||
void markValid() { }
|
||||
#endif
|
||||
void zeroFract() { m_fracBits = 0; }
|
||||
void resetBits();
|
||||
void resetEntropy(const Slice& slice);
|
||||
|
||||
// SBAC RD
|
||||
void load(const Entropy& src) { copyFrom(src); }
|
||||
void store(Entropy& dest) const { dest.copyFrom(*this); }
|
||||
void loadContexts(const Entropy& src) { copyContextsFrom(src); }
|
||||
void loadIntraDirModeLuma(const Entropy& src);
|
||||
void copyState(const Entropy& other);
|
||||
|
||||
void codeVPS(const VPS& vps);
|
||||
void codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl);
|
||||
void codePPS(const PPS& pps);
|
||||
void codeVUI(const VUI& vui, int maxSubTLayers);
|
||||
void codeAUD(const Slice& slice);
|
||||
void codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers);
|
||||
|
||||
void codeSliceHeader(const Slice& slice, FrameData& encData);
|
||||
void codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset);
|
||||
void codeShortTermRefPicSet(const RPS& rps);
|
||||
void finishSlice() { encodeBinTrm(1); finish(); dynamic_cast<Bitstream*>(m_bitIf)->writeByteAlignment(); }
|
||||
|
||||
void encodeCTU(const CUData& cu, const CUGeom& cuGeom);
|
||||
|
||||
void codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple);
|
||||
void codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode);
|
||||
|
||||
void codeMergeIndex(const CUData& cu, uint32_t absPartIdx);
|
||||
void codeMvd(const CUData& cu, uint32_t absPartIdx, int list);
|
||||
|
||||
void codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
|
||||
void codePredInfo(const CUData& cu, uint32_t absPartIdx);
|
||||
inline void codeQtCbfLuma(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth) { codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth), tuDepth); }
|
||||
|
||||
void codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel);
|
||||
void codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2]);
|
||||
void codeCoeffNxN(const CUData& cu, const coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
|
||||
|
||||
inline void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
|
||||
inline void codeMVPIdx(uint32_t symbol) { encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]); }
|
||||
inline void codeMergeFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.m_mergeFlag[absPartIdx], m_contextState[OFF_MERGE_FLAG_EXT_CTX]); }
|
||||
inline void codeSkipFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.isSkipped(absPartIdx), m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); }
|
||||
inline void codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth) { encodeBin(cu.m_cuDepth[absPartIdx] > depth, m_contextState[OFF_SPLIT_FLAG_CTX + cu.getCtxSplitFlag(absPartIdx, depth)]); }
|
||||
inline void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx) { encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]); }
|
||||
inline void codePredMode(int predMode) { encodeBin(predMode == MODE_INTRA ? 1 : 0, m_contextState[OFF_PRED_MODE_CTX]); }
|
||||
inline void codeCUTransquantBypassFlag(uint32_t symbol) { encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]); }
|
||||
inline void codeQtCbfLuma(uint32_t cbf, uint32_t tuDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + !tuDepth]); }
|
||||
inline void codeQtCbfChroma(uint32_t cbf, uint32_t tuDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + 2 + tuDepth]); }
|
||||
inline void codeQtRootCbf(uint32_t cbf) { encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
|
||||
inline void codeTransformSkipFlags(uint32_t transformSkip, TextType ttype) { encodeBin(transformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]); }
|
||||
void codeDeltaQP(const CUData& cu, uint32_t absPartIdx);
|
||||
void codeSaoOffset(const SaoCtuParam& ctuParam, int plane);
|
||||
|
||||
/* RDO functions */
|
||||
void estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
|
||||
void estCBFBit(EstBitsSbac& estBitsSbac) const;
|
||||
void estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const;
|
||||
void estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
|
||||
void estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const;
|
||||
|
||||
inline uint32_t bitsIntraModeNonMPM() const { return bitsCodeBin(0, m_contextState[OFF_ADI_CTX]) + 5; }
|
||||
inline uint32_t bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const { return bitsCodeBin(1, m_contextState[OFF_ADI_CTX]) + (dir == preds[0] ? 1 : 2); }
|
||||
inline uint32_t estimateCbfBits(uint32_t cbf, TextType ttype, uint32_t tuDepth) const { return bitsCodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][tuDepth]]); }
|
||||
uint32_t bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const;
|
||||
uint32_t bitsIntraMode(const CUData& cu, uint32_t absPartIdx) const
|
||||
{
|
||||
return bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]) + /* not skip */
|
||||
bitsCodeBin(1, m_contextState[OFF_PRED_MODE_CTX]); /* intra */
|
||||
}
|
||||
|
||||
/* these functions are only used to estimate the bits when cbf is 0 and will never be called when writing the bistream. */
|
||||
inline void codeQtRootCbfZero() { encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
|
||||
|
||||
private:
|
||||
|
||||
/* CABAC private methods */
|
||||
void start();
|
||||
void finish();
|
||||
|
||||
void encodeBin(uint32_t binValue, uint8_t& ctxModel);
|
||||
void encodeBinEP(uint32_t binValue);
|
||||
void encodeBinsEP(uint32_t binValues, int numBins);
|
||||
void encodeBinTrm(uint32_t binValue);
|
||||
|
||||
/* return the bits of encoding the context bin without updating */
|
||||
inline uint32_t bitsCodeBin(uint32_t binValue, uint32_t ctxModel) const
|
||||
{
|
||||
uint64_t fracBits = (m_fracBits & 32767) + sbacGetEntropyBits(ctxModel, binValue);
|
||||
return (uint32_t)(fracBits >> 15);
|
||||
}
|
||||
|
||||
void encodeCU(const CUData& ctu, const CUGeom &cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP);
|
||||
void finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP);
|
||||
|
||||
void writeOut();
|
||||
|
||||
/* SBac private methods */
|
||||
void writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol);
|
||||
void writeEpExGolomb(uint32_t symbol, uint32_t count);
|
||||
void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
|
||||
|
||||
void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers);
|
||||
void codeScalingList(const ScalingList&);
|
||||
void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
|
||||
|
||||
void codePredWeightTable(const Slice& slice);
|
||||
void codeInterDir(const CUData& cu, uint32_t absPartIdx);
|
||||
void codePUWise(const CUData& cu, uint32_t absPartIdx);
|
||||
void codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list);
|
||||
void codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list);
|
||||
|
||||
void codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol);
|
||||
|
||||
void codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx);
|
||||
|
||||
void encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
|
||||
bool& bCodeDQP, const uint32_t depthRange[2]);
|
||||
|
||||
void copyFrom(const Entropy& src);
|
||||
void copyContextsFrom(const Entropy& src);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_ENTROPY_H
|
1312
x265/source/encoder/frameencoder.cpp
Normal file
1312
x265/source/encoder/frameencoder.cpp
Normal file
File diff suppressed because it is too large
Load diff
234
x265/source/encoder/frameencoder.h
Normal file
234
x265/source/encoder/frameencoder.h
Normal file
|
@ -0,0 +1,234 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Shin Yee <shinyee@multicorewareinc.com>
|
||||
* Min Chen <chenm003@163.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_FRAMEENCODER_H
|
||||
#define X265_FRAMEENCODER_H
|
||||
|
||||
#include "common.h"
|
||||
#include "wavefront.h"
|
||||
#include "bitstream.h"
|
||||
#include "frame.h"
|
||||
#include "picyuv.h"
|
||||
#include "md5.h"
|
||||
|
||||
#include "analysis.h"
|
||||
#include "sao.h"
|
||||
|
||||
#include "entropy.h"
|
||||
#include "framefilter.h"
|
||||
#include "ratecontrol.h"
|
||||
#include "reference.h"
|
||||
#include "nal.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private x265 namespace
|
||||
|
||||
class ThreadPool;
|
||||
class Encoder;
|
||||
|
||||
#define ANGULAR_MODE_ID 2
|
||||
#define AMP_ID 3
|
||||
|
||||
struct StatisticLog
|
||||
{
|
||||
uint64_t cntInter[4];
|
||||
uint64_t cntIntra[4];
|
||||
uint64_t cuInterDistribution[4][INTER_MODES];
|
||||
uint64_t cuIntraDistribution[4][INTRA_MODES];
|
||||
uint64_t cntIntraNxN;
|
||||
uint64_t cntSkipCu[4];
|
||||
uint64_t cntTotalCu[4];
|
||||
uint64_t totalCu;
|
||||
|
||||
StatisticLog()
|
||||
{
|
||||
memset(this, 0, sizeof(StatisticLog));
|
||||
}
|
||||
};
|
||||
|
||||
/* manages the state of encoding one row of CTU blocks. When
|
||||
* WPP is active, several rows will be simultaneously encoded. */
|
||||
struct CTURow
|
||||
{
|
||||
Entropy bufferedEntropy; /* store CTU2 context for next row CTU0 */
|
||||
Entropy rowGoOnCoder; /* store context between CTUs, code bitstream if !SAO */
|
||||
|
||||
FrameStats rowStats;
|
||||
|
||||
/* Threading variables */
|
||||
|
||||
/* This lock must be acquired when reading or writing m_active or m_busy */
|
||||
Lock lock;
|
||||
|
||||
/* row is ready to run, has no neighbor dependencies. The row may have
|
||||
* external dependencies (reference frame pixels) that prevent it from being
|
||||
* processed, so it may stay with m_active=true for some time before it is
|
||||
* encoded by a worker thread. */
|
||||
volatile bool active;
|
||||
|
||||
/* row is being processed by a worker thread. This flag is only true when a
|
||||
* worker thread is within the context of FrameEncoder::processRow(). This
|
||||
* flag is used to detect multiple possible wavefront problems. */
|
||||
volatile bool busy;
|
||||
|
||||
/* count of completed CUs in this row */
|
||||
volatile uint32_t completed;
|
||||
|
||||
/* called at the start of each frame to initialize state */
|
||||
void init(Entropy& initContext)
|
||||
{
|
||||
active = false;
|
||||
busy = false;
|
||||
completed = 0;
|
||||
memset(&rowStats, 0, sizeof(rowStats));
|
||||
rowGoOnCoder.load(initContext);
|
||||
}
|
||||
};
|
||||
|
||||
// Manages the wave-front processing of a single encoding frame
|
||||
class FrameEncoder : public WaveFront, public Thread
|
||||
{
|
||||
public:
|
||||
|
||||
FrameEncoder();
|
||||
|
||||
virtual ~FrameEncoder() {}
|
||||
|
||||
virtual bool init(Encoder *top, int numRows, int numCols);
|
||||
|
||||
void destroy();
|
||||
|
||||
/* triggers encode of a new frame by the worker thread */
|
||||
bool startCompressFrame(Frame* curFrame);
|
||||
|
||||
/* blocks until worker thread is done, returns access unit */
|
||||
Frame *getEncodedPicture(NALList& list);
|
||||
|
||||
Event m_enable;
|
||||
Event m_done;
|
||||
Event m_completionEvent;
|
||||
int m_localTldIdx;
|
||||
|
||||
volatile bool m_threadActive;
|
||||
volatile bool m_bAllRowsStop;
|
||||
volatile int m_completionCount;
|
||||
volatile int m_vbvResetTriggerRow;
|
||||
|
||||
uint32_t m_numRows;
|
||||
uint32_t m_numCols;
|
||||
uint32_t m_filterRowDelay;
|
||||
uint32_t m_filterRowDelayCus;
|
||||
uint32_t m_refLagRows;
|
||||
|
||||
CTURow* m_rows;
|
||||
RateControlEntry m_rce;
|
||||
SEIDecodedPictureHash m_seiReconPictureDigest;
|
||||
|
||||
uint64_t m_SSDY;
|
||||
uint64_t m_SSDU;
|
||||
uint64_t m_SSDV;
|
||||
double m_ssim;
|
||||
uint64_t m_accessUnitBits;
|
||||
uint32_t m_ssimCnt;
|
||||
MD5Context m_state[3];
|
||||
uint32_t m_crc[3];
|
||||
uint32_t m_checksum[3];
|
||||
|
||||
volatile int m_activeWorkerCount; // count of workers currently encoding or filtering CTUs
|
||||
volatile int m_totalActiveWorkerCount; // sum of m_activeWorkerCount sampled at end of each CTU
|
||||
volatile int m_activeWorkerCountSamples; // count of times m_activeWorkerCount was sampled (think vbv restarts)
|
||||
volatile int m_countRowBlocks; // count of workers forced to abandon a row because of top dependency
|
||||
int64_t m_startCompressTime; // timestamp when frame encoder is given a frame
|
||||
int64_t m_row0WaitTime; // timestamp when row 0 is allowed to start
|
||||
int64_t m_allRowsAvailableTime; // timestamp when all reference dependencies are resolved
|
||||
int64_t m_endCompressTime; // timestamp after all CTUs are compressed
|
||||
int64_t m_endFrameTime; // timestamp after RCEnd, NR updates, etc
|
||||
int64_t m_stallStartTime; // timestamp when worker count becomes 0
|
||||
int64_t m_prevOutputTime; // timestamp when prev frame was retrieved by API thread
|
||||
int64_t m_slicetypeWaitTime; // total elapsed time waiting for decided frame
|
||||
int64_t m_totalWorkerElapsedTime; // total elapsed time spent by worker threads processing CTUs
|
||||
int64_t m_totalNoWorkerTime; // total elapsed time without any active worker threads
|
||||
#if DETAILED_CU_STATS
|
||||
CUStats m_cuStats;
|
||||
#endif
|
||||
|
||||
Encoder* m_top;
|
||||
x265_param* m_param;
|
||||
Frame* m_frame;
|
||||
NoiseReduction* m_nr;
|
||||
ThreadLocalData* m_tld; /* for --no-wpp */
|
||||
Bitstream* m_outStreams;
|
||||
uint32_t* m_substreamSizes;
|
||||
|
||||
CUGeom* m_cuGeoms;
|
||||
uint32_t* m_ctuGeomMap;
|
||||
|
||||
Bitstream m_bs;
|
||||
MotionReference m_mref[2][MAX_NUM_REF + 1];
|
||||
Entropy m_entropyCoder;
|
||||
Entropy m_initSliceContext;
|
||||
FrameFilter m_frameFilter;
|
||||
NALList m_nalList;
|
||||
|
||||
class WeightAnalysis : public BondedTaskGroup
|
||||
{
|
||||
public:
|
||||
|
||||
FrameEncoder& master;
|
||||
|
||||
WeightAnalysis(FrameEncoder& fe) : master(fe) {}
|
||||
|
||||
void processTasks(int workerThreadId);
|
||||
|
||||
protected:
|
||||
|
||||
WeightAnalysis operator=(const WeightAnalysis&);
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
bool initializeGeoms();
|
||||
|
||||
/* analyze / compress frame, can be run in parallel within reference constraints */
|
||||
void compressFrame();
|
||||
|
||||
/* called by compressFrame to generate final per-row bitstreams */
|
||||
void encodeSlice();
|
||||
|
||||
void threadMain();
|
||||
int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
|
||||
void noiseReductionUpdate();
|
||||
|
||||
/* Called by WaveFront::findJob() */
|
||||
virtual void processRow(int row, int threadId);
|
||||
virtual void processRowEncoder(int row, ThreadLocalData& tld);
|
||||
|
||||
void enqueueRowEncoder(int row) { WaveFront::enqueueRow(row * 2 + 0); }
|
||||
void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); }
|
||||
void enableRowEncoder(int row) { WaveFront::enableRow(row * 2 + 0); }
|
||||
void enableRowFilter(int row) { WaveFront::enableRow(row * 2 + 1); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_FRAMEENCODER_H
|
494
x265/source/encoder/framefilter.cpp
Normal file
494
x265/source/encoder/framefilter.cpp
Normal file
|
@ -0,0 +1,494 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
|
||||
* Min Chen <chenm003@163.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "frame.h"
|
||||
#include "framedata.h"
|
||||
#include "encoder.h"
|
||||
#include "framefilter.h"
|
||||
#include "frameencoder.h"
|
||||
#include "wavefront.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
|
||||
static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height);
|
||||
static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt);
|
||||
|
||||
FrameFilter::FrameFilter()
|
||||
: m_param(NULL)
|
||||
, m_frame(NULL)
|
||||
, m_frameEncoder(NULL)
|
||||
, m_ssimBuf(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void FrameFilter::destroy()
|
||||
{
|
||||
if (m_param->bEnableSAO)
|
||||
m_sao.destroy();
|
||||
|
||||
X265_FREE(m_ssimBuf);
|
||||
}
|
||||
|
||||
void FrameFilter::init(Encoder *top, FrameEncoder *frame, int numRows)
|
||||
{
|
||||
m_param = top->m_param;
|
||||
m_frameEncoder = frame;
|
||||
m_numRows = numRows;
|
||||
m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
|
||||
m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
|
||||
m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
|
||||
m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
|
||||
m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
|
||||
m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
|
||||
|
||||
if (m_param->bEnableSAO)
|
||||
if (!m_sao.create(m_param))
|
||||
m_param->bEnableSAO = 0;
|
||||
|
||||
if (m_param->bEnableSsim)
|
||||
m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
|
||||
}
|
||||
|
||||
void FrameFilter::start(Frame *frame, Entropy& initState, int qp)
|
||||
{
|
||||
m_frame = frame;
|
||||
|
||||
if (m_param->bEnableSAO)
|
||||
m_sao.startSlice(frame, initState, qp);
|
||||
}
|
||||
|
||||
void FrameFilter::processRow(int row)
|
||||
{
|
||||
ProfileScopeEvent(filterCTURow);
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
ScopedElapsedTime filterPerfScope(m_frameEncoder->m_cuStats.loopFilterElapsedTime);
|
||||
m_frameEncoder->m_cuStats.countLoopFilter++;
|
||||
#endif
|
||||
|
||||
if (!m_param->bEnableLoopFilter && !m_param->bEnableSAO)
|
||||
{
|
||||
processRowPost(row);
|
||||
return;
|
||||
}
|
||||
FrameData& encData = *m_frame->m_encData;
|
||||
const uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
|
||||
const uint32_t lineStartCUAddr = row * numCols;
|
||||
|
||||
if (m_param->bEnableLoopFilter)
|
||||
{
|
||||
const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
|
||||
const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
|
||||
|
||||
for (uint32_t col = 0; col < numCols; col++)
|
||||
{
|
||||
uint32_t cuAddr = lineStartCUAddr + col;
|
||||
const CUData* ctu = encData.getPicCTU(cuAddr);
|
||||
deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
|
||||
|
||||
if (col > 0)
|
||||
{
|
||||
const CUData* ctuPrev = encData.getPicCTU(cuAddr - 1);
|
||||
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
|
||||
}
|
||||
}
|
||||
|
||||
const CUData* ctuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1);
|
||||
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[lineStartCUAddr + numCols - 1]], Deblock::EDGE_HOR);
|
||||
}
|
||||
|
||||
// SAO
|
||||
SAOParam* saoParam = encData.m_saoParam;
|
||||
if (m_param->bEnableSAO)
|
||||
{
|
||||
m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
|
||||
m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext);
|
||||
m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext);
|
||||
|
||||
m_sao.rdoSaoUnitRow(saoParam, row);
|
||||
|
||||
// NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
|
||||
if (row >= m_saoRowDelay)
|
||||
processSao(row - m_saoRowDelay);
|
||||
}
|
||||
|
||||
// this row of CTUs has been encoded
|
||||
|
||||
if (row > 0)
|
||||
processRowPost(row - 1);
|
||||
|
||||
if (row == m_numRows - 1)
|
||||
{
|
||||
if (m_param->bEnableSAO)
|
||||
{
|
||||
m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
|
||||
|
||||
for (int i = m_numRows - m_saoRowDelay; i < m_numRows; i++)
|
||||
processSao(i);
|
||||
}
|
||||
|
||||
processRowPost(row);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t FrameFilter::getCUHeight(int rowNum) const
|
||||
{
|
||||
return rowNum == m_numRows - 1 ? m_lastHeight : g_maxCUSize;
|
||||
}
|
||||
|
||||
void FrameFilter::processRowPost(int row)
|
||||
{
|
||||
PicYuv *reconPic = m_frame->m_reconPic;
|
||||
const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
|
||||
const uint32_t lineStartCUAddr = row * numCols;
|
||||
const int realH = getCUHeight(row);
|
||||
|
||||
// Border extend Left and Right
|
||||
primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
|
||||
primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
|
||||
}
|
||||
|
||||
// Border extend Top
|
||||
if (!row)
|
||||
{
|
||||
const intptr_t stride = reconPic->m_stride;
|
||||
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX;
|
||||
|
||||
for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
|
||||
memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
|
||||
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
const intptr_t strideC = reconPic->m_strideC;
|
||||
pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
|
||||
pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
|
||||
for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
|
||||
{
|
||||
memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
|
||||
memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Border extend Bottom
|
||||
if (row == m_numRows - 1)
|
||||
{
|
||||
const intptr_t stride = reconPic->m_stride;
|
||||
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride;
|
||||
for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
|
||||
memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
|
||||
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
const intptr_t strideC = reconPic->m_strideC;
|
||||
pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
|
||||
pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
|
||||
for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
|
||||
{
|
||||
memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
|
||||
memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Notify other FrameEncoders that this row of reconstructed pixels is available
|
||||
m_frame->m_reconRowCount.incr();
|
||||
|
||||
uint32_t cuAddr = lineStartCUAddr;
|
||||
if (m_param->bEnablePsnr)
|
||||
{
|
||||
PicYuv* fencPic = m_frame->m_fencPic;
|
||||
|
||||
intptr_t stride = reconPic->m_stride;
|
||||
uint32_t width = reconPic->m_picWidth - m_pad[0];
|
||||
uint32_t height = getCUHeight(row);
|
||||
|
||||
uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height);
|
||||
m_frameEncoder->m_SSDY += ssdY;
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
height >>= m_vChromaShift;
|
||||
width >>= m_hChromaShift;
|
||||
stride = reconPic->m_strideC;
|
||||
|
||||
uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height);
|
||||
uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height);
|
||||
|
||||
m_frameEncoder->m_SSDU += ssdU;
|
||||
m_frameEncoder->m_SSDV += ssdV;
|
||||
}
|
||||
}
|
||||
if (m_param->bEnableSsim && m_ssimBuf)
|
||||
{
|
||||
pixel *rec = reconPic->m_picOrg[0];
|
||||
pixel *fenc = m_frame->m_fencPic->m_picOrg[0];
|
||||
intptr_t stride1 = reconPic->m_stride;
|
||||
intptr_t stride2 = m_frame->m_fencPic->m_stride;
|
||||
uint32_t bEnd = ((row + 1) == (this->m_numRows - 1));
|
||||
uint32_t bStart = (row == 0);
|
||||
uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;
|
||||
uint32_t maxPixY = (row + 1) * g_maxCUSize - 4 * !bEnd;
|
||||
uint32_t ssim_cnt;
|
||||
x265_emms();
|
||||
|
||||
/* SSIM is done for each row in blocks of 4x4 . The First blocks are offset by 2 pixels to the right
|
||||
* to avoid alignment of ssim blocks with DCT blocks. */
|
||||
minPixY += bStart ? 2 : -6;
|
||||
m_frameEncoder->m_ssim += calculateSSIM(rec + 2 + minPixY * stride1, stride1, fenc + 2 + minPixY * stride2, stride2,
|
||||
m_param->sourceWidth - 2, maxPixY - minPixY, m_ssimBuf, ssim_cnt);
|
||||
m_frameEncoder->m_ssimCnt += ssim_cnt;
|
||||
}
|
||||
if (m_param->decodedPictureHashSEI == 1)
|
||||
{
|
||||
uint32_t height = getCUHeight(row);
|
||||
uint32_t width = reconPic->m_picWidth;
|
||||
intptr_t stride = reconPic->m_stride;
|
||||
|
||||
if (!row)
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
MD5Init(&m_frameEncoder->m_state[i]);
|
||||
}
|
||||
|
||||
updateMD5Plane(m_frameEncoder->m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
width >>= m_hChromaShift;
|
||||
height >>= m_vChromaShift;
|
||||
stride = reconPic->m_strideC;
|
||||
|
||||
updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
|
||||
updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
|
||||
}
|
||||
}
|
||||
else if (m_param->decodedPictureHashSEI == 2)
|
||||
{
|
||||
uint32_t height = getCUHeight(row);
|
||||
uint32_t width = reconPic->m_picWidth;
|
||||
intptr_t stride = reconPic->m_stride;
|
||||
if (!row)
|
||||
m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] = m_frameEncoder->m_crc[2] = 0xffff;
|
||||
updateCRC(reconPic->getLumaAddr(cuAddr), m_frameEncoder->m_crc[0], height, width, stride);
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
width >>= m_hChromaShift;
|
||||
height >>= m_vChromaShift;
|
||||
stride = reconPic->m_strideC;
|
||||
|
||||
updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride);
|
||||
updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride);
|
||||
}
|
||||
}
|
||||
else if (m_param->decodedPictureHashSEI == 3)
|
||||
{
|
||||
uint32_t width = reconPic->m_picWidth;
|
||||
uint32_t height = getCUHeight(row);
|
||||
intptr_t stride = reconPic->m_stride;
|
||||
uint32_t cuHeight = g_maxCUSize;
|
||||
if (!row)
|
||||
m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1] = m_frameEncoder->m_checksum[2] = 0;
|
||||
updateChecksum(reconPic->m_picOrg[0], m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight);
|
||||
if (reconPic->m_picCsp != X265_CSP_I400) {
|
||||
width >>= m_hChromaShift;
|
||||
height >>= m_vChromaShift;
|
||||
stride = reconPic->m_strideC;
|
||||
cuHeight >>= m_vChromaShift;
|
||||
|
||||
updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
|
||||
updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
|
||||
}
|
||||
}
|
||||
|
||||
if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 * (int)m_frameEncoder->m_numRows)
|
||||
m_frameEncoder->m_completionEvent.trigger();
|
||||
}
|
||||
|
||||
static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height)
|
||||
{
|
||||
uint64_t ssd = 0;
|
||||
|
||||
if ((width | height) & 3)
|
||||
{
|
||||
/* Slow Path */
|
||||
for (uint32_t y = 0; y < height; y++)
|
||||
{
|
||||
for (uint32_t x = 0; x < width; x++)
|
||||
{
|
||||
int diff = (int)(fenc[x] - rec[x]);
|
||||
ssd += diff * diff;
|
||||
}
|
||||
|
||||
fenc += stride;
|
||||
rec += stride;
|
||||
}
|
||||
|
||||
return ssd;
|
||||
}
|
||||
|
||||
uint32_t y = 0;
|
||||
|
||||
/* Consume rows in ever narrower chunks of height */
|
||||
for (int size = BLOCK_64x64; size >= BLOCK_4x4 && y < height; size--)
|
||||
{
|
||||
uint32_t rowHeight = 1 << (size + 2);
|
||||
|
||||
for (; y + rowHeight <= height; y += rowHeight)
|
||||
{
|
||||
uint32_t y1, x = 0;
|
||||
|
||||
/* Consume each row using the largest square blocks possible */
|
||||
if (size == BLOCK_64x64 && !(stride & 31))
|
||||
for (; x + 64 <= width; x += 64)
|
||||
ssd += primitives.cu[BLOCK_64x64].sse_pp(fenc + x, stride, rec + x, stride);
|
||||
|
||||
if (size >= BLOCK_32x32 && !(stride & 15))
|
||||
for (; x + 32 <= width; x += 32)
|
||||
for (y1 = 0; y1 + 32 <= rowHeight; y1 += 32)
|
||||
ssd += primitives.cu[BLOCK_32x32].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
|
||||
|
||||
if (size >= BLOCK_16x16)
|
||||
for (; x + 16 <= width; x += 16)
|
||||
for (y1 = 0; y1 + 16 <= rowHeight; y1 += 16)
|
||||
ssd += primitives.cu[BLOCK_16x16].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
|
||||
|
||||
if (size >= BLOCK_8x8)
|
||||
for (; x + 8 <= width; x += 8)
|
||||
for (y1 = 0; y1 + 8 <= rowHeight; y1 += 8)
|
||||
ssd += primitives.cu[BLOCK_8x8].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
|
||||
|
||||
for (; x + 4 <= width; x += 4)
|
||||
for (y1 = 0; y1 + 4 <= rowHeight; y1 += 4)
|
||||
ssd += primitives.cu[BLOCK_4x4].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
|
||||
|
||||
fenc += stride * rowHeight;
|
||||
rec += stride * rowHeight;
|
||||
}
|
||||
}
|
||||
|
||||
return ssd;
|
||||
}
|
||||
|
||||
/* Function to calculate SSIM for each row */
|
||||
static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt)
|
||||
{
|
||||
uint32_t z = 0;
|
||||
float ssim = 0.0;
|
||||
|
||||
int(*sum0)[4] = (int(*)[4])buf;
|
||||
int(*sum1)[4] = sum0 + (width >> 2) + 3;
|
||||
width >>= 2;
|
||||
height >>= 2;
|
||||
|
||||
for (uint32_t y = 1; y < height; y++)
|
||||
{
|
||||
for (; z <= y; z++)
|
||||
{
|
||||
std::swap(sum0, sum1);
|
||||
for (uint32_t x = 0; x < width; x += 2)
|
||||
primitives.ssim_4x4x2_core(&pix1[(4 * x + (z * stride1))], stride1, &pix2[(4 * x + (z * stride2))], stride2, &sum0[x]);
|
||||
}
|
||||
|
||||
for (uint32_t x = 0; x < width - 1; x += 4)
|
||||
ssim += primitives.ssim_end_4(sum0 + x, sum1 + x, X265_MIN(4, width - x - 1));
|
||||
}
|
||||
|
||||
cnt = (height - 1) * (width - 1);
|
||||
return ssim;
|
||||
}
|
||||
|
||||
/* restore original YUV samples to recon after SAO (if lossless) */
|
||||
static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx)
|
||||
{
|
||||
int size = cu->m_log2CUSize[absPartIdx] - 2;
|
||||
uint32_t cuAddr = cu->m_cuAddr;
|
||||
|
||||
PicYuv* reconPic = frame.m_reconPic;
|
||||
PicYuv* fencPic = frame.m_fencPic;
|
||||
|
||||
pixel* dst = reconPic->getLumaAddr(cuAddr, absPartIdx);
|
||||
pixel* src = fencPic->getLumaAddr(cuAddr, absPartIdx);
|
||||
|
||||
primitives.cu[size].copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
|
||||
|
||||
int csp = fencPic->m_picCsp;
|
||||
if (csp != X265_CSP_I400) {
|
||||
pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
|
||||
pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
|
||||
|
||||
pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
|
||||
pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
|
||||
|
||||
primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
|
||||
primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
|
||||
}
|
||||
}
|
||||
|
||||
/* Original YUV restoration for CU in lossless coding */
|
||||
static void origCUSampleRestoration(const CUData* cu, const CUGeom& cuGeom, Frame& frame)
|
||||
{
|
||||
uint32_t absPartIdx = cuGeom.absPartIdx;
|
||||
if (cu->m_cuDepth[absPartIdx] > cuGeom.depth)
|
||||
{
|
||||
for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++)
|
||||
{
|
||||
const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
|
||||
if (childGeom.flags & CUGeom::PRESENT)
|
||||
origCUSampleRestoration(cu, childGeom, frame);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// restore original YUV samples
|
||||
if (cu->m_tqBypass[absPartIdx])
|
||||
restoreOrigLosslessYuv(cu, frame, absPartIdx);
|
||||
}
|
||||
|
||||
void FrameFilter::processSao(int row)
|
||||
{
|
||||
FrameData& encData = *m_frame->m_encData;
|
||||
SAOParam* saoParam = encData.m_saoParam;
|
||||
|
||||
if (saoParam->bSaoFlag[0])
|
||||
m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
|
||||
|
||||
if (saoParam->bSaoFlag[1])
|
||||
{
|
||||
m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1);
|
||||
m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
|
||||
}
|
||||
|
||||
if (encData.m_slice->m_pps->bTransquantBypassEnabled)
|
||||
{
|
||||
uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
|
||||
uint32_t lineStartCUAddr = row * numCols;
|
||||
|
||||
const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
|
||||
const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
|
||||
|
||||
for (uint32_t col = 0; col < numCols; col++)
|
||||
{
|
||||
uint32_t cuAddr = lineStartCUAddr + col;
|
||||
const CUData* ctu = encData.getPicCTU(cuAddr);
|
||||
origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
|
||||
}
|
||||
}
|
||||
}
|
74
x265/source/encoder/framefilter.h
Normal file
74
x265/source/encoder/framefilter.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
|
||||
* Min Chen <chenm003@163.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_FRAMEFILTER_H
|
||||
#define X265_FRAMEFILTER_H
|
||||
|
||||
#include "common.h"
|
||||
#include "frame.h"
|
||||
#include "deblock.h"
|
||||
#include "sao.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private x265 namespace
|
||||
|
||||
class Encoder;
|
||||
class Entropy;
|
||||
class FrameEncoder;
|
||||
struct ThreadLocalData;
|
||||
|
||||
// Manages the processing of a single frame loopfilter
|
||||
class FrameFilter : public Deblock
|
||||
{
|
||||
public:
|
||||
|
||||
x265_param* m_param;
|
||||
Frame* m_frame;
|
||||
FrameEncoder* m_frameEncoder;
|
||||
int m_hChromaShift;
|
||||
int m_vChromaShift;
|
||||
int m_pad[2];
|
||||
|
||||
SAO m_sao;
|
||||
int m_numRows;
|
||||
int m_saoRowDelay;
|
||||
int m_lastHeight;
|
||||
|
||||
void* m_ssimBuf; /* Temp storage for ssim computation */
|
||||
|
||||
FrameFilter();
|
||||
|
||||
void init(Encoder *top, FrameEncoder *frame, int numRows);
|
||||
void destroy();
|
||||
|
||||
void start(Frame *pic, Entropy& initState, int qp);
|
||||
|
||||
void processRow(int row);
|
||||
void processRowPost(int row);
|
||||
void processSao(int row);
|
||||
uint32_t getCUHeight(int rowNum) const;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_FRAMEFILTER_H
|
496
x265/source/encoder/level.cpp
Normal file
496
x265/source/encoder/level.cpp
Normal file
|
@ -0,0 +1,496 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "slice.h"
|
||||
#include "level.h"
|
||||
|
||||
namespace X265_NS {
|
||||
typedef struct
|
||||
{
|
||||
uint32_t maxLumaSamples;
|
||||
uint32_t maxLumaSamplesPerSecond;
|
||||
uint32_t maxBitrateMain;
|
||||
uint32_t maxBitrateHigh;
|
||||
uint32_t maxCpbSizeMain;
|
||||
uint32_t maxCpbSizeHigh;
|
||||
uint32_t minCompressionRatio;
|
||||
Level::Name levelEnum;
|
||||
const char* name;
|
||||
int levelIdc;
|
||||
} LevelSpec;
|
||||
|
||||
LevelSpec levels[] =
|
||||
{
|
||||
{ 36864, 552960, 128, MAX_UINT, 350, MAX_UINT, 2, Level::LEVEL1, "1", 10 },
|
||||
{ 122880, 3686400, 1500, MAX_UINT, 1500, MAX_UINT, 2, Level::LEVEL2, "2", 20 },
|
||||
{ 245760, 7372800, 3000, MAX_UINT, 3000, MAX_UINT, 2, Level::LEVEL2_1, "2.1", 21 },
|
||||
{ 552960, 16588800, 6000, MAX_UINT, 6000, MAX_UINT, 2, Level::LEVEL3, "3", 30 },
|
||||
{ 983040, 33177600, 10000, MAX_UINT, 10000, MAX_UINT, 2, Level::LEVEL3_1, "3.1", 31 },
|
||||
{ 2228224, 66846720, 12000, 30000, 12000, 30000, 4, Level::LEVEL4, "4", 40 },
|
||||
{ 2228224, 133693440, 20000, 50000, 20000, 50000, 4, Level::LEVEL4_1, "4.1", 41 },
|
||||
{ 8912896, 267386880, 25000, 100000, 25000, 100000, 6, Level::LEVEL5, "5", 50 },
|
||||
{ 8912896, 534773760, 40000, 160000, 40000, 160000, 8, Level::LEVEL5_1, "5.1", 51 },
|
||||
{ 8912896, 1069547520, 60000, 240000, 60000, 240000, 8, Level::LEVEL5_2, "5.2", 52 },
|
||||
{ 35651584, 1069547520, 60000, 240000, 60000, 240000, 8, Level::LEVEL6, "6", 60 },
|
||||
{ 35651584, 2139095040, 120000, 480000, 120000, 480000, 8, Level::LEVEL6_1, "6.1", 61 },
|
||||
{ 35651584, 4278190080U, 240000, 800000, 240000, 800000, 6, Level::LEVEL6_2, "6.2", 62 },
|
||||
{ MAX_UINT, MAX_UINT, MAX_UINT, MAX_UINT, MAX_UINT, MAX_UINT, 1, Level::LEVEL8_5, "8.5", 85 },
|
||||
};
|
||||
|
||||
/* determine minimum decoder level required to decode the described video */
|
||||
void determineLevel(const x265_param ¶m, VPS& vps)
|
||||
{
|
||||
vps.ptl.onePictureOnlyConstraintFlag = param.totalFrames == 1;
|
||||
vps.ptl.intraConstraintFlag = param.keyframeMax <= 1 || vps.ptl.onePictureOnlyConstraintFlag;
|
||||
vps.ptl.bitDepthConstraint = param.internalBitDepth;
|
||||
vps.ptl.chromaFormatConstraint = param.internalCsp;
|
||||
|
||||
/* TODO: figure out HighThroughput signaling, aka: HbrFactor in section A.4.2, only available
|
||||
* for intra-only profiles (vps.ptl.intraConstraintFlag) */
|
||||
vps.ptl.lowerBitRateConstraintFlag = true;
|
||||
|
||||
vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
|
||||
|
||||
if (param.internalCsp == X265_CSP_I420 && param.internalBitDepth <= 10)
|
||||
{
|
||||
/* Probably an HEVC v1 profile, but must check to be sure */
|
||||
if (param.internalBitDepth <= 8)
|
||||
{
|
||||
if (vps.ptl.onePictureOnlyConstraintFlag)
|
||||
vps.ptl.profileIdc = Profile::MAINSTILLPICTURE;
|
||||
else if (vps.ptl.intraConstraintFlag)
|
||||
vps.ptl.profileIdc = Profile::MAINREXT; /* Main Intra */
|
||||
else
|
||||
vps.ptl.profileIdc = Profile::MAIN;
|
||||
}
|
||||
else if (param.internalBitDepth <= 10)
|
||||
{
|
||||
/* note there is no 10bit still picture profile */
|
||||
if (vps.ptl.intraConstraintFlag)
|
||||
vps.ptl.profileIdc = Profile::MAINREXT; /* Main10 Intra */
|
||||
else
|
||||
vps.ptl.profileIdc = Profile::MAIN10;
|
||||
}
|
||||
}
|
||||
else
|
||||
vps.ptl.profileIdc = Profile::MAINREXT;
|
||||
|
||||
/* determine which profiles are compatible with this stream */
|
||||
|
||||
memset(vps.ptl.profileCompatibilityFlag, 0, sizeof(vps.ptl.profileCompatibilityFlag));
|
||||
vps.ptl.profileCompatibilityFlag[vps.ptl.profileIdc] = true;
|
||||
if (vps.ptl.profileIdc == Profile::MAIN10 && param.internalBitDepth == 8)
|
||||
vps.ptl.profileCompatibilityFlag[Profile::MAIN] = true;
|
||||
else if (vps.ptl.profileIdc == Profile::MAIN)
|
||||
vps.ptl.profileCompatibilityFlag[Profile::MAIN10] = true;
|
||||
else if (vps.ptl.profileIdc == Profile::MAINSTILLPICTURE)
|
||||
{
|
||||
vps.ptl.profileCompatibilityFlag[Profile::MAIN] = true;
|
||||
vps.ptl.profileCompatibilityFlag[Profile::MAIN10] = true;
|
||||
}
|
||||
else if (vps.ptl.profileIdc == Profile::MAINREXT)
|
||||
vps.ptl.profileCompatibilityFlag[Profile::MAINREXT] = true;
|
||||
|
||||
uint32_t lumaSamples = param.sourceWidth * param.sourceHeight;
|
||||
uint32_t samplesPerSec = (uint32_t)(lumaSamples * ((double)param.fpsNum / param.fpsDenom));
|
||||
uint32_t bitrate = param.rc.vbvMaxBitrate ? param.rc.vbvMaxBitrate : param.rc.bitrate;
|
||||
|
||||
const uint32_t MaxDpbPicBuf = 6;
|
||||
vps.ptl.levelIdc = Level::NONE;
|
||||
vps.ptl.tierFlag = Level::MAIN;
|
||||
|
||||
const size_t NumLevels = sizeof(levels) / sizeof(levels[0]);
|
||||
uint32_t i;
|
||||
if (param.bLossless)
|
||||
{
|
||||
i = 13;
|
||||
vps.ptl.minCrForLevel = 1;
|
||||
vps.ptl.maxLumaSrForLevel = MAX_UINT;
|
||||
vps.ptl.levelIdc = Level::LEVEL8_5;
|
||||
vps.ptl.tierFlag = Level::MAIN;
|
||||
}
|
||||
else for (i = 0; i < NumLevels; i++)
|
||||
{
|
||||
if (lumaSamples > levels[i].maxLumaSamples)
|
||||
continue;
|
||||
else if (samplesPerSec > levels[i].maxLumaSamplesPerSecond)
|
||||
continue;
|
||||
else if (bitrate > levels[i].maxBitrateMain && levels[i].maxBitrateHigh == MAX_UINT)
|
||||
continue;
|
||||
else if (bitrate > levels[i].maxBitrateHigh)
|
||||
continue;
|
||||
else if (param.sourceWidth > sqrt(levels[i].maxLumaSamples * 8.0f))
|
||||
continue;
|
||||
else if (param.sourceHeight > sqrt(levels[i].maxLumaSamples * 8.0f))
|
||||
continue;
|
||||
|
||||
uint32_t maxDpbSize = MaxDpbPicBuf;
|
||||
if (lumaSamples <= (levels[i].maxLumaSamples >> 2))
|
||||
maxDpbSize = X265_MIN(4 * MaxDpbPicBuf, 16);
|
||||
else if (lumaSamples <= (levels[i].maxLumaSamples >> 1))
|
||||
maxDpbSize = X265_MIN(2 * MaxDpbPicBuf, 16);
|
||||
else if (lumaSamples <= ((3 * levels[i].maxLumaSamples) >> 2))
|
||||
maxDpbSize = X265_MIN((4 * MaxDpbPicBuf) / 3, 16);
|
||||
|
||||
/* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than
|
||||
* or equal to MaxDpbSize */
|
||||
if (vps.maxDecPicBuffering > maxDpbSize)
|
||||
continue;
|
||||
|
||||
/* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
|
||||
if (levels[i].levelEnum >= Level::LEVEL5 && param.maxCUSize < 32)
|
||||
{
|
||||
x265_log(¶m, X265_LOG_WARNING, "level %s detected, but CTU size 16 is non-compliant\n", levels[i].name);
|
||||
vps.ptl.profileIdc = Profile::NONE;
|
||||
vps.ptl.levelIdc = Level::NONE;
|
||||
vps.ptl.tierFlag = Level::MAIN;
|
||||
x265_log(¶m, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* The value of NumPocTotalCurr shall be less than or equal to 8 */
|
||||
int numPocTotalCurr = param.maxNumReferences + vps.numReorderPics;
|
||||
if (numPocTotalCurr > 8)
|
||||
{
|
||||
x265_log(¶m, X265_LOG_WARNING, "level %s detected, but NumPocTotalCurr (total references) is non-compliant\n", levels[i].name);
|
||||
vps.ptl.profileIdc = Profile::NONE;
|
||||
vps.ptl.levelIdc = Level::NONE;
|
||||
vps.ptl.tierFlag = Level::MAIN;
|
||||
x265_log(¶m, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
#define CHECK_RANGE(value, main, high) (high != MAX_UINT && value > main && value <= high)
|
||||
|
||||
if (CHECK_RANGE(bitrate, levels[i].maxBitrateMain, levels[i].maxBitrateHigh) ||
|
||||
CHECK_RANGE((uint32_t)param.rc.vbvBufferSize, levels[i].maxCpbSizeMain, levels[i].maxCpbSizeHigh))
|
||||
{
|
||||
/* The bitrate or buffer size are out of range for Main tier, but in
|
||||
* range for High tier. If the user requested High tier then give
|
||||
* them High tier at this level. Otherwise allow the loop to
|
||||
* progress to the Main tier of the next level */
|
||||
if (param.bHighTier)
|
||||
vps.ptl.tierFlag = Level::HIGH;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
else
|
||||
vps.ptl.tierFlag = Level::MAIN;
|
||||
#undef CHECK_RANGE
|
||||
|
||||
vps.ptl.levelIdc = levels[i].levelEnum;
|
||||
vps.ptl.minCrForLevel = levels[i].minCompressionRatio;
|
||||
vps.ptl.maxLumaSrForLevel = levels[i].maxLumaSamplesPerSecond;
|
||||
break;
|
||||
}
|
||||
|
||||
static const char *profiles[] = { "None", "Main", "Main 10", "Main Still Picture", "RExt" };
|
||||
static const char *tiers[] = { "Main", "High" };
|
||||
|
||||
char profbuf[64];
|
||||
strcpy(profbuf, profiles[vps.ptl.profileIdc]);
|
||||
|
||||
bool bStillPicture = false;
|
||||
if (vps.ptl.profileIdc == Profile::MAINREXT)
|
||||
{
|
||||
if (vps.ptl.bitDepthConstraint > 12 && vps.ptl.intraConstraintFlag)
|
||||
{
|
||||
if (vps.ptl.onePictureOnlyConstraintFlag)
|
||||
{
|
||||
strcpy(profbuf, "Main 4:4:4 16 Still Picture");
|
||||
bStillPicture = true;
|
||||
}
|
||||
else
|
||||
strcpy(profbuf, "Main 4:4:4 16");
|
||||
}
|
||||
else if (param.internalCsp == X265_CSP_I420)
|
||||
{
|
||||
X265_CHECK(vps.ptl.intraConstraintFlag || vps.ptl.bitDepthConstraint > 10, "rext fail\n");
|
||||
if (vps.ptl.bitDepthConstraint <= 8)
|
||||
strcpy(profbuf, "Main");
|
||||
else if (vps.ptl.bitDepthConstraint <= 10)
|
||||
strcpy(profbuf, "Main 10");
|
||||
else if (vps.ptl.bitDepthConstraint <= 12)
|
||||
strcpy(profbuf, "Main 12");
|
||||
}
|
||||
else if (param.internalCsp == X265_CSP_I422)
|
||||
{
|
||||
/* there is no Main 4:2:2 profile, so it must be signaled as Main10 4:2:2 */
|
||||
if (param.internalBitDepth <= 10)
|
||||
strcpy(profbuf, "Main 4:2:2 10");
|
||||
else if (vps.ptl.bitDepthConstraint <= 12)
|
||||
strcpy(profbuf, "Main 4:2:2 12");
|
||||
}
|
||||
else if (param.internalCsp == X265_CSP_I444)
|
||||
{
|
||||
if (vps.ptl.bitDepthConstraint <= 8)
|
||||
{
|
||||
if (vps.ptl.onePictureOnlyConstraintFlag)
|
||||
{
|
||||
strcpy(profbuf, "Main 4:4:4 Still Picture");
|
||||
bStillPicture = true;
|
||||
}
|
||||
else
|
||||
strcpy(profbuf, "Main 4:4:4");
|
||||
}
|
||||
else if (vps.ptl.bitDepthConstraint <= 10)
|
||||
strcpy(profbuf, "Main 4:4:4 10");
|
||||
else if (vps.ptl.bitDepthConstraint <= 12)
|
||||
strcpy(profbuf, "Main 4:4:4 12");
|
||||
}
|
||||
else
|
||||
strcpy(profbuf, "Unknown");
|
||||
|
||||
if (vps.ptl.intraConstraintFlag && !bStillPicture)
|
||||
strcat(profbuf, " Intra");
|
||||
}
|
||||
x265_log(¶m, X265_LOG_INFO, "%s profile, Level-%s (%s tier)\n",
|
||||
profbuf, levels[i].name, tiers[vps.ptl.tierFlag]);
|
||||
}
|
||||
|
||||
/* enforce a maximum decoder level requirement, in other words assure that a
|
||||
* decoder of the specified level may decode the video about to be created.
|
||||
* Lower parameters where necessary to ensure the video will be decodable by a
|
||||
* decoder meeting this level of requirement. Some parameters (resolution and
|
||||
* frame rate) are non-negotiable and thus this function may fail. In those
|
||||
* circumstances it will be quite noisy */
|
||||
bool enforceLevel(x265_param& param, VPS& vps)
|
||||
{
|
||||
vps.numReorderPics = (param.bBPyramid && param.bframes > 1) ? 2 : !!param.bframes;
|
||||
vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 2, (uint32_t)param.maxNumReferences) + vps.numReorderPics);
|
||||
|
||||
/* no level specified by user, just auto-detect from the configuration */
|
||||
if (param.levelIdc <= 0)
|
||||
return true;
|
||||
|
||||
uint32_t level = 0;
|
||||
while (levels[level].levelIdc != param.levelIdc && level + 1 < sizeof(levels) / sizeof(levels[0]))
|
||||
level++;
|
||||
if (levels[level].levelIdc != param.levelIdc)
|
||||
{
|
||||
x265_log(¶m, X265_LOG_WARNING, "specified level %d does not exist\n", param.levelIdc);
|
||||
return false;
|
||||
}
|
||||
|
||||
LevelSpec& l = levels[level];
|
||||
bool highTier = !!param.bHighTier;
|
||||
if (highTier && l.maxBitrateHigh == MAX_UINT)
|
||||
{
|
||||
highTier = false;
|
||||
x265_log(¶m, X265_LOG_WARNING, "Level %s has no High tier, using Main tier\n", l.name);
|
||||
}
|
||||
|
||||
uint32_t lumaSamples = param.sourceWidth * param.sourceHeight;
|
||||
uint32_t samplesPerSec = (uint32_t)(lumaSamples * ((double)param.fpsNum / param.fpsDenom));
|
||||
bool ok = true;
|
||||
if (lumaSamples > l.maxLumaSamples)
|
||||
ok = false;
|
||||
else if (param.sourceWidth > sqrt(l.maxLumaSamples * 8.0f))
|
||||
ok = false;
|
||||
else if (param.sourceHeight > sqrt(l.maxLumaSamples * 8.0f))
|
||||
ok = false;
|
||||
if (!ok)
|
||||
{
|
||||
x265_log(¶m, X265_LOG_WARNING, "picture dimensions are out of range for specified level\n");
|
||||
return false;
|
||||
}
|
||||
else if (samplesPerSec > l.maxLumaSamplesPerSecond)
|
||||
{
|
||||
x265_log(¶m, X265_LOG_WARNING, "frame rate is out of range for specified level\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((uint32_t)param.rc.vbvMaxBitrate > (highTier ? l.maxBitrateHigh : l.maxBitrateMain))
|
||||
{
|
||||
param.rc.vbvMaxBitrate = highTier ? l.maxBitrateHigh : l.maxBitrateMain;
|
||||
x265_log(¶m, X265_LOG_INFO, "lowering VBV max bitrate to %dKbps\n", param.rc.vbvMaxBitrate);
|
||||
}
|
||||
if ((uint32_t)param.rc.vbvBufferSize > (highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain))
|
||||
{
|
||||
param.rc.vbvBufferSize = highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain;
|
||||
x265_log(¶m, X265_LOG_INFO, "lowering VBV buffer size to %dKb\n", param.rc.vbvBufferSize);
|
||||
}
|
||||
|
||||
switch (param.rc.rateControlMode)
|
||||
{
|
||||
case X265_RC_ABR:
|
||||
if ((uint32_t)param.rc.bitrate > (highTier ? l.maxBitrateHigh : l.maxBitrateMain))
|
||||
{
|
||||
param.rc.bitrate = l.maxBitrateHigh;
|
||||
x265_log(¶m, X265_LOG_INFO, "lowering target bitrate to High tier limit of %dKbps\n", param.rc.bitrate);
|
||||
}
|
||||
break;
|
||||
|
||||
case X265_RC_CQP:
|
||||
x265_log(¶m, X265_LOG_WARNING, "Constant QP is inconsistent with specifying a decoder level, no bitrate guarantee is possible.\n");
|
||||
return false;
|
||||
|
||||
case X265_RC_CRF:
|
||||
if (!param.rc.vbvBufferSize || !param.rc.vbvMaxBitrate)
|
||||
{
|
||||
if (!param.rc.vbvMaxBitrate)
|
||||
param.rc.vbvMaxBitrate = highTier ? l.maxBitrateHigh : l.maxBitrateMain;
|
||||
if (!param.rc.vbvBufferSize)
|
||||
param.rc.vbvBufferSize = highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain;
|
||||
x265_log(¶m, X265_LOG_WARNING, "Specifying a decoder level with constant rate factor rate-control requires\n");
|
||||
x265_log(¶m, X265_LOG_WARNING, "enabling VBV with vbv-bufsize=%dkb vbv-maxrate=%dkbps. VBV outputs are non-deterministic!\n",
|
||||
param.rc.vbvBufferSize, param.rc.vbvMaxBitrate);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
x265_log(¶m, X265_LOG_ERROR, "Unknown rate control mode is inconsistent with specifying a decoder level\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than or equal to MaxDpbSize */
|
||||
const uint32_t MaxDpbPicBuf = 6;
|
||||
uint32_t maxDpbSize = MaxDpbPicBuf;
|
||||
if (lumaSamples <= (l.maxLumaSamples >> 2))
|
||||
maxDpbSize = X265_MIN(4 * MaxDpbPicBuf, 16);
|
||||
else if (lumaSamples <= (l.maxLumaSamples >> 1))
|
||||
maxDpbSize = X265_MIN(2 * MaxDpbPicBuf, 16);
|
||||
else if (lumaSamples <= ((3 * l.maxLumaSamples) >> 2))
|
||||
maxDpbSize = X265_MIN((4 * MaxDpbPicBuf) / 3, 16);
|
||||
|
||||
int savedRefCount = param.maxNumReferences;
|
||||
while (vps.maxDecPicBuffering > maxDpbSize && param.maxNumReferences > 1)
|
||||
{
|
||||
param.maxNumReferences--;
|
||||
vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 1, (uint32_t)param.maxNumReferences) + vps.numReorderPics);
|
||||
}
|
||||
if (param.maxNumReferences != savedRefCount)
|
||||
x265_log(¶m, X265_LOG_INFO, "Lowering max references to %d to meet level requirement\n", param.maxNumReferences);
|
||||
|
||||
/* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
|
||||
if (param.levelIdc >= 50 && param.maxCUSize < 32)
|
||||
{
|
||||
param.maxCUSize = 32;
|
||||
x265_log(¶m, X265_LOG_INFO, "Levels 5.0 and above require a maximum CTU size of at least 32, using --ctu 32\n");
|
||||
}
|
||||
|
||||
/* The value of NumPocTotalCurr shall be less than or equal to 8 */
|
||||
int numPocTotalCurr = param.maxNumReferences + !!param.bframes;
|
||||
if (numPocTotalCurr > 8)
|
||||
{
|
||||
param.maxNumReferences = 8 - !!param.bframes;
|
||||
x265_log(¶m, X265_LOG_INFO, "Lowering max references to %d to meet numPocTotalCurr requirement\n", param.maxNumReferences);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
#if EXPORT_C_API
|
||||
|
||||
/* these functions are exported as C functions (default) */
|
||||
using namespace X265_NS;
|
||||
extern "C" {
|
||||
|
||||
#else
|
||||
|
||||
/* these functions exist within private namespace (multilib) */
|
||||
namespace X265_NS {
|
||||
|
||||
#endif
|
||||
|
||||
int x265_param_apply_profile(x265_param *param, const char *profile)
|
||||
{
|
||||
if (!param || !profile)
|
||||
return 0;
|
||||
|
||||
/* Check if profile bit-depth requirement is exceeded by internal bit depth */
|
||||
bool bInvalidDepth = false;
|
||||
#if X265_DEPTH > 8
|
||||
if (!strcmp(profile, "main") || !strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp") ||
|
||||
!strcmp(profile, "main444-8") || !strcmp(profile, "main-intra") ||
|
||||
!strcmp(profile, "main444-intra") || !strcmp(profile, "main444-stillpicture"))
|
||||
bInvalidDepth = true;
|
||||
#endif
|
||||
#if X265_DEPTH > 10
|
||||
if (!strcmp(profile, "main10") || !strcmp(profile, "main422-10") || !strcmp(profile, "main444-10") ||
|
||||
!strcmp(profile, "main10-intra") || !strcmp(profile, "main422-10-intra") || !strcmp(profile, "main444-10-intra"))
|
||||
bInvalidDepth = true;
|
||||
#endif
|
||||
#if X265_DEPTH > 12
|
||||
if (!strcmp(profile, "main12") || !strcmp(profile, "main422-12") || !strcmp(profile, "main444-12") ||
|
||||
!strcmp(profile, "main12-intra") || !strcmp(profile, "main422-12-intra") || !strcmp(profile, "main444-12-intra"))
|
||||
bInvalidDepth = true;
|
||||
#endif
|
||||
|
||||
if (bInvalidDepth)
|
||||
{
|
||||
x265_log(param, X265_LOG_ERROR, "%s profile not supported, internal bit depth %d.\n", profile, X265_DEPTH);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t l = strlen(profile);
|
||||
bool bBoolIntra = (l > 6 && !strcmp(profile + l - 6, "-intra")) ||
|
||||
!strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp");
|
||||
if (bBoolIntra)
|
||||
{
|
||||
/* The profile may be detected as still picture if param->totalFrames is 1 */
|
||||
param->keyframeMax = 1;
|
||||
}
|
||||
|
||||
/* check that input color space is supported by profile */
|
||||
if (!strcmp(profile, "main") || !strcmp(profile, "main-intra") ||
|
||||
!strcmp(profile, "main10") || !strcmp(profile, "main10-intra") ||
|
||||
!strcmp(profile, "main12") || !strcmp(profile, "main12-intra") ||
|
||||
!strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp"))
|
||||
{
|
||||
if (param->internalCsp != X265_CSP_I420)
|
||||
{
|
||||
x265_log(param, X265_LOG_ERROR, "%s profile not compatible with %s input color space.\n",
|
||||
profile, x265_source_csp_names[param->internalCsp]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(profile, "main422-10") || !strcmp(profile, "main422-10-intra") ||
|
||||
!strcmp(profile, "main422-12") || !strcmp(profile, "main422-12-intra"))
|
||||
{
|
||||
if (param->internalCsp != X265_CSP_I420 && param->internalCsp != X265_CSP_I422)
|
||||
{
|
||||
x265_log(param, X265_LOG_ERROR, "%s profile not compatible with %s input color space.\n",
|
||||
profile, x265_source_csp_names[param->internalCsp]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(profile, "main444-8") ||
|
||||
!strcmp(profile, "main444-intra") || !strcmp(profile, "main444-stillpicture") ||
|
||||
!strcmp(profile, "main444-10") || !strcmp(profile, "main444-10-intra") ||
|
||||
!strcmp(profile, "main444-12") || !strcmp(profile, "main444-12-intra") ||
|
||||
!strcmp(profile, "main444-16-intra") || !strcmp(profile, "main444-16-stillpicture"))
|
||||
{
|
||||
/* any color space allowed */
|
||||
}
|
||||
else
|
||||
{
|
||||
x265_log(param, X265_LOG_ERROR, "unknown profile <%s>\n", profile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
39
x265/source/encoder/level.h
Normal file
39
x265/source/encoder/level.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_LEVEL_H
|
||||
#define X265_LEVEL_H 1
|
||||
|
||||
#include "common.h"
|
||||
#include "x265.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// encoder private namespace
|
||||
|
||||
struct VPS;
|
||||
void determineLevel(const x265_param ¶m, VPS& vps);
|
||||
bool enforceLevel(x265_param& param, VPS& vps);
|
||||
|
||||
}
|
||||
|
||||
#endif // ifndef X265_LEVEL_H
|
1264
x265/source/encoder/motion.cpp
Normal file
1264
x265/source/encoder/motion.cpp
Normal file
File diff suppressed because it is too large
Load diff
110
x265/source/encoder/motion.h
Normal file
110
x265/source/encoder/motion.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_MOTIONESTIMATE_H
|
||||
#define X265_MOTIONESTIMATE_H
|
||||
|
||||
#include "primitives.h"
|
||||
#include "reference.h"
|
||||
#include "mv.h"
|
||||
#include "bitcost.h"
|
||||
#include "yuv.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private x265 namespace
|
||||
|
||||
class MotionEstimate : public BitCost
|
||||
{
|
||||
protected:
|
||||
|
||||
intptr_t blockOffset;
|
||||
|
||||
int ctuAddr;
|
||||
int absPartIdx; // part index of PU, including CU offset within CTU
|
||||
|
||||
int searchMethod;
|
||||
int subpelRefine;
|
||||
|
||||
int blockwidth;
|
||||
int blockheight;
|
||||
|
||||
pixelcmp_t sad;
|
||||
pixelcmp_x3_t sad_x3;
|
||||
pixelcmp_x4_t sad_x4;
|
||||
pixelcmp_t satd;
|
||||
pixelcmp_t chromaSatd;
|
||||
|
||||
MotionEstimate& operator =(const MotionEstimate&);
|
||||
|
||||
public:
|
||||
|
||||
static const int COST_MAX = 1 << 28;
|
||||
|
||||
Yuv fencPUYuv;
|
||||
int partEnum;
|
||||
bool bChromaSATD;
|
||||
|
||||
MotionEstimate();
|
||||
~MotionEstimate();
|
||||
|
||||
static void initScales();
|
||||
static int hpelIterationCount(int subme);
|
||||
void init(int method, int refine, int csp);
|
||||
|
||||
/* Methods called at slice setup */
|
||||
|
||||
void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight);
|
||||
void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight);
|
||||
|
||||
/* buf*() and motionEstimate() methods all use cached fenc pixels and thus
|
||||
* require setSourcePU() to be called prior. */
|
||||
|
||||
inline int bufSAD(const pixel* fref, intptr_t stride) { return sad(fencPUYuv.m_buf[0], FENC_STRIDE, fref, stride); }
|
||||
|
||||
inline int bufSATD(const pixel* fref, intptr_t stride) { return satd(fencPUYuv.m_buf[0], FENC_STRIDE, fref, stride); }
|
||||
|
||||
inline int bufChromaSATD(const Yuv& refYuv, int puPartIdx)
|
||||
{
|
||||
return chromaSatd(refYuv.getCbAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[1], fencPUYuv.m_csize) +
|
||||
chromaSatd(refYuv.getCrAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[2], fencPUYuv.m_csize);
|
||||
}
|
||||
|
||||
int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
|
||||
|
||||
int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
|
||||
|
||||
protected:
|
||||
|
||||
inline void StarPatternSearch(ReferencePlanes *ref,
|
||||
const MV & mvmin,
|
||||
const MV & mvmax,
|
||||
MV & bmv,
|
||||
int & bcost,
|
||||
int & bPointNr,
|
||||
int & bDistance,
|
||||
int earlyExitIters,
|
||||
int merange);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_MOTIONESTIMATE_H
|
232
x265/source/encoder/nal.cpp
Normal file
232
x265/source/encoder/nal.cpp
Normal file
|
@ -0,0 +1,232 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "bitstream.h"
|
||||
#include "nal.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
|
||||
NALList::NALList()
|
||||
: m_numNal(0)
|
||||
, m_buffer(NULL)
|
||||
, m_occupancy(0)
|
||||
, m_allocSize(0)
|
||||
, m_extraBuffer(NULL)
|
||||
, m_extraOccupancy(0)
|
||||
, m_extraAllocSize(0)
|
||||
, m_annexB(true)
|
||||
{}
|
||||
|
||||
void NALList::takeContents(NALList& other)
|
||||
{
|
||||
/* take other NAL buffer, discard our old one */
|
||||
X265_FREE(m_buffer);
|
||||
m_buffer = other.m_buffer;
|
||||
m_allocSize = other.m_allocSize;
|
||||
m_occupancy = other.m_occupancy;
|
||||
|
||||
/* copy packet data */
|
||||
m_numNal = other.m_numNal;
|
||||
memcpy(m_nal, other.m_nal, sizeof(x265_nal) * m_numNal);
|
||||
|
||||
/* reset other list, re-allocate their buffer with same size */
|
||||
other.m_numNal = 0;
|
||||
other.m_occupancy = 0;
|
||||
other.m_buffer = X265_MALLOC(uint8_t, m_allocSize);
|
||||
}
|
||||
|
||||
void NALList::serialize(NalUnitType nalUnitType, const Bitstream& bs)
|
||||
{
|
||||
static const char startCodePrefix[] = { 0, 0, 0, 1 };
|
||||
|
||||
uint32_t payloadSize = bs.getNumberOfWrittenBytes();
|
||||
const uint8_t* bpayload = bs.getFIFO();
|
||||
if (!bpayload)
|
||||
return;
|
||||
|
||||
uint32_t nextSize = m_occupancy + sizeof(startCodePrefix) + 2 + payloadSize + (payloadSize >> 1) + m_extraOccupancy;
|
||||
if (nextSize > m_allocSize)
|
||||
{
|
||||
uint8_t *temp = X265_MALLOC(uint8_t, nextSize);
|
||||
if (temp)
|
||||
{
|
||||
memcpy(temp, m_buffer, m_occupancy);
|
||||
|
||||
/* fixup existing payload pointers */
|
||||
for (uint32_t i = 0; i < m_numNal; i++)
|
||||
m_nal[i].payload = temp + (m_nal[i].payload - m_buffer);
|
||||
|
||||
X265_FREE(m_buffer);
|
||||
m_buffer = temp;
|
||||
m_allocSize = nextSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
x265_log(NULL, X265_LOG_ERROR, "Unable to realloc access unit buffer\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t *out = m_buffer + m_occupancy;
|
||||
uint32_t bytes = 0;
|
||||
|
||||
if (!m_annexB)
|
||||
{
|
||||
/* Will write size later */
|
||||
bytes += 4;
|
||||
}
|
||||
else if (!m_numNal || nalUnitType == NAL_UNIT_VPS || nalUnitType == NAL_UNIT_SPS || nalUnitType == NAL_UNIT_PPS)
|
||||
{
|
||||
memcpy(out, startCodePrefix, 4);
|
||||
bytes += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(out, startCodePrefix + 1, 3);
|
||||
bytes += 3;
|
||||
}
|
||||
|
||||
/* 16 bit NAL header:
|
||||
* forbidden_zero_bit 1-bit
|
||||
* nal_unit_type 6-bits
|
||||
* nuh_reserved_zero_6bits 6-bits
|
||||
* nuh_temporal_id_plus1 3-bits */
|
||||
out[bytes++] = (uint8_t)nalUnitType << 1;
|
||||
out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);
|
||||
|
||||
/* 7.4.1 ...
|
||||
* Within the NAL unit, the following three-byte sequences shall not occur at
|
||||
* any byte-aligned position:
|
||||
* - 0x000000
|
||||
* - 0x000001
|
||||
* - 0x000002 */
|
||||
for (uint32_t i = 0; i < payloadSize; i++)
|
||||
{
|
||||
if (i > 2 && !out[bytes - 2] && !out[bytes - 3] && out[bytes - 1] <= 0x03)
|
||||
{
|
||||
/* inject 0x03 to prevent emulating a start code */
|
||||
out[bytes] = out[bytes - 1];
|
||||
out[bytes - 1] = 0x03;
|
||||
bytes++;
|
||||
}
|
||||
|
||||
out[bytes++] = bpayload[i];
|
||||
}
|
||||
|
||||
X265_CHECK(bytes <= 4 + 2 + payloadSize + (payloadSize >> 1), "NAL buffer overflow\n");
|
||||
|
||||
if (m_extraOccupancy)
|
||||
{
|
||||
/* these bytes were escaped by serializeSubstreams */
|
||||
memcpy(out + bytes, m_extraBuffer, m_extraOccupancy);
|
||||
bytes += m_extraOccupancy;
|
||||
m_extraOccupancy = 0;
|
||||
}
|
||||
|
||||
/* 7.4.1.1
|
||||
* ... when the last byte of the RBSP data is equal to 0x00 (which can
|
||||
* only occur when the RBSP ends in a cabac_zero_word), a final byte equal
|
||||
* to 0x03 is appended to the end of the data. */
|
||||
if (!out[bytes - 1])
|
||||
out[bytes++] = 0x03;
|
||||
|
||||
if (!m_annexB)
|
||||
{
|
||||
uint32_t dataSize = bytes - 4;
|
||||
out[0] = (uint8_t)(dataSize >> 24);
|
||||
out[1] = (uint8_t)(dataSize >> 16);
|
||||
out[2] = (uint8_t)(dataSize >> 8);
|
||||
out[3] = (uint8_t)dataSize;
|
||||
}
|
||||
|
||||
m_occupancy += bytes;
|
||||
|
||||
X265_CHECK(m_numNal < (uint32_t)MAX_NAL_UNITS, "NAL count overflow\n");
|
||||
|
||||
x265_nal& nal = m_nal[m_numNal++];
|
||||
nal.type = nalUnitType;
|
||||
nal.sizeBytes = bytes;
|
||||
nal.payload = out;
|
||||
}
|
||||
|
||||
/* concatenate and escape WPP sub-streams, return escaped row lengths.
|
||||
* These streams will be appended to the next serialized NAL */
|
||||
uint32_t NALList::serializeSubstreams(uint32_t* streamSizeBytes, uint32_t streamCount, const Bitstream* streams)
|
||||
{
|
||||
uint32_t maxStreamSize = 0;
|
||||
uint32_t estSize = 0;
|
||||
for (uint32_t s = 0; s < streamCount; s++)
|
||||
estSize += streams[s].getNumberOfWrittenBytes();
|
||||
estSize += estSize >> 1;
|
||||
|
||||
if (estSize > m_extraAllocSize)
|
||||
{
|
||||
uint8_t *temp = X265_MALLOC(uint8_t, estSize);
|
||||
if (temp)
|
||||
{
|
||||
X265_FREE(m_extraBuffer);
|
||||
m_extraBuffer = temp;
|
||||
m_extraAllocSize = estSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
x265_log(NULL, X265_LOG_ERROR, "Unable to realloc WPP substream concatenation buffer\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t bytes = 0;
|
||||
uint8_t *out = m_extraBuffer;
|
||||
for (uint32_t s = 0; s < streamCount; s++)
|
||||
{
|
||||
const Bitstream& stream = streams[s];
|
||||
uint32_t inSize = stream.getNumberOfWrittenBytes();
|
||||
const uint8_t *inBytes = stream.getFIFO();
|
||||
uint32_t prevBufSize = bytes;
|
||||
|
||||
if (inBytes)
|
||||
{
|
||||
for (uint32_t i = 0; i < inSize; i++)
|
||||
{
|
||||
if (bytes >= 2 && !out[bytes - 2] && !out[bytes - 1] && inBytes[i] <= 0x03)
|
||||
{
|
||||
/* inject 0x03 to prevent emulating a start code */
|
||||
out[bytes++] = 3;
|
||||
}
|
||||
|
||||
out[bytes++] = inBytes[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (s < streamCount - 1)
|
||||
{
|
||||
streamSizeBytes[s] = bytes - prevBufSize;
|
||||
if (streamSizeBytes[s] > maxStreamSize)
|
||||
maxStreamSize = streamSizeBytes[s];
|
||||
}
|
||||
}
|
||||
|
||||
m_extraOccupancy = bytes;
|
||||
return maxStreamSize;
|
||||
}
|
65
x265/source/encoder/nal.h
Normal file
65
x265/source/encoder/nal.h
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_NAL_H
|
||||
#define X265_NAL_H
|
||||
|
||||
#include "common.h"
|
||||
#include "x265.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
class Bitstream;
|
||||
|
||||
class NALList
|
||||
{
|
||||
static const int MAX_NAL_UNITS = 16;
|
||||
|
||||
public:
|
||||
|
||||
x265_nal m_nal[MAX_NAL_UNITS];
|
||||
uint32_t m_numNal;
|
||||
|
||||
uint8_t* m_buffer;
|
||||
uint32_t m_occupancy;
|
||||
uint32_t m_allocSize;
|
||||
|
||||
uint8_t* m_extraBuffer;
|
||||
uint32_t m_extraOccupancy;
|
||||
uint32_t m_extraAllocSize;
|
||||
bool m_annexB;
|
||||
|
||||
NALList();
|
||||
~NALList() { X265_FREE(m_buffer); X265_FREE(m_extraBuffer); }
|
||||
|
||||
void takeContents(NALList& other);
|
||||
|
||||
void serialize(NalUnitType nalUnitType, const Bitstream& bs);
|
||||
|
||||
uint32_t serializeSubstreams(uint32_t* streamSizeBytes, uint32_t streamCount, const Bitstream* streams);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ifndef X265_NAL_H
|
2424
x265/source/encoder/ratecontrol.cpp
Normal file
2424
x265/source/encoder/ratecontrol.cpp
Normal file
File diff suppressed because it is too large
Load diff
267
x265/source/encoder/ratecontrol.h
Normal file
267
x265/source/encoder/ratecontrol.h
Normal file
|
@ -0,0 +1,267 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Sumalatha Polureddy <sumalatha@multicorewareinc.com>
|
||||
* Aarthi Priya Thirumalai <aarthi@multicorewareinc.com>
|
||||
* Xun Xu, PPLive Corporation <xunxu@pptv.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_RATECONTROL_H
|
||||
#define X265_RATECONTROL_H
|
||||
|
||||
#include "common.h"
|
||||
#include "sei.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// encoder namespace
|
||||
|
||||
class Encoder;
|
||||
class Frame;
|
||||
class SEIBufferingPeriod;
|
||||
struct SPS;
|
||||
#define BASE_FRAME_DURATION 0.04
|
||||
|
||||
/* Arbitrary limitations as a sanity check. */
|
||||
#define MAX_FRAME_DURATION 1.00
|
||||
#define MIN_FRAME_DURATION 0.01
|
||||
|
||||
#define MIN_AMORTIZE_FRAME 10
|
||||
#define MIN_AMORTIZE_FRACTION 0.2
|
||||
#define CLIP_DURATION(f) x265_clip3(MIN_FRAME_DURATION, MAX_FRAME_DURATION, f)
|
||||
|
||||
struct Predictor
|
||||
{
|
||||
double coeff;
|
||||
double count;
|
||||
double decay;
|
||||
double offset;
|
||||
};
|
||||
|
||||
struct HRDTiming
|
||||
{
|
||||
double cpbInitialAT;
|
||||
double cpbFinalAT;
|
||||
double dpbOutputTime;
|
||||
double cpbRemovalTime;
|
||||
};
|
||||
|
||||
struct RateControlEntry
|
||||
{
|
||||
Predictor rowPreds[3][2];
|
||||
Predictor* rowPred[2];
|
||||
|
||||
int64_t lastSatd; /* Contains the picture cost of the previous frame, required for resetAbr and VBV */
|
||||
int64_t leadingNoBSatd;
|
||||
int64_t rowTotalBits; /* update cplxrsum and totalbits at the end of 2 rows */
|
||||
double blurredComplexity;
|
||||
double qpaRc;
|
||||
double qpAq;
|
||||
double qRceq;
|
||||
double frameSizePlanned; /* frame Size decided by RateCotrol before encoding the frame */
|
||||
double bufferRate;
|
||||
double movingAvgSum;
|
||||
double rowCplxrSum;
|
||||
double qpNoVbv;
|
||||
double bufferFill;
|
||||
double frameDuration;
|
||||
double clippedDuration;
|
||||
double frameSizeEstimated; /* hold frameSize, updated from cu level vbv rc */
|
||||
double frameSizeMaximum; /* max frame Size according to minCR restrictions and level of the video */
|
||||
int sliceType;
|
||||
int bframes;
|
||||
int poc;
|
||||
int encodeOrder;
|
||||
bool bLastMiniGopBFrame;
|
||||
bool isActive;
|
||||
double amortizeFrames;
|
||||
double amortizeFraction;
|
||||
/* Required in 2-pass rate control */
|
||||
uint64_t expectedBits; /* total expected bits up to the current frame (current one excluded) */
|
||||
double iCuCount;
|
||||
double pCuCount;
|
||||
double skipCuCount;
|
||||
double expectedVbv;
|
||||
double qScale;
|
||||
double newQScale;
|
||||
double newQp;
|
||||
int mvBits;
|
||||
int miscBits;
|
||||
int coeffBits;
|
||||
bool keptAsRef;
|
||||
|
||||
SEIPictureTiming *picTimingSEI;
|
||||
HRDTiming *hrdTiming;
|
||||
};
|
||||
|
||||
class RateControl
|
||||
{
|
||||
public:
|
||||
|
||||
x265_param* m_param;
|
||||
Slice* m_curSlice; /* all info about the current frame */
|
||||
SliceType m_sliceType; /* Current frame type */
|
||||
int m_ncu; /* number of CUs in a frame */
|
||||
int m_qp; /* updated qp for current frame */
|
||||
|
||||
bool m_isAbr;
|
||||
bool m_isVbv;
|
||||
bool m_isCbr;
|
||||
bool m_singleFrameVbv;
|
||||
|
||||
bool m_isAbrReset;
|
||||
int m_lastAbrResetPoc;
|
||||
|
||||
double m_rateTolerance;
|
||||
double m_frameDuration; /* current frame duration in seconds */
|
||||
double m_bitrate;
|
||||
double m_rateFactorConstant;
|
||||
double m_bufferSize;
|
||||
double m_bufferFillFinal; /* real buffer as of the last finished frame */
|
||||
double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */
|
||||
double m_bufferRate; /* # of bits added to buffer_fill after each frame */
|
||||
double m_vbvMaxRate; /* in kbps */
|
||||
double m_rateFactorMaxIncrement; /* Don't allow RF above (CRF + this value). */
|
||||
double m_rateFactorMaxDecrement; /* don't allow RF below (this value). */
|
||||
|
||||
Predictor m_pred[4]; /* Slice predictors to preidct bits for each Slice type - I,P,Bref and B */
|
||||
int64_t m_leadingNoBSatd;
|
||||
int m_predType; /* Type of slice predictors to be used - depends on the slice type */
|
||||
double m_ipOffset;
|
||||
double m_pbOffset;
|
||||
int64_t m_bframeBits;
|
||||
int64_t m_currentSatd;
|
||||
int m_qpConstant[3];
|
||||
int m_lastNonBPictType;
|
||||
int m_framesDone; /* # of frames passed through RateCotrol already */
|
||||
|
||||
double m_cplxrSum; /* sum of bits*qscale/rceq */
|
||||
double m_wantedBitsWindow; /* target bitrate * window */
|
||||
double m_accumPQp; /* for determining I-frame quant */
|
||||
double m_accumPNorm;
|
||||
double m_lastQScaleFor[3]; /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */
|
||||
double m_lstep;
|
||||
double m_shortTermCplxSum;
|
||||
double m_shortTermCplxCount;
|
||||
double m_lastRceq;
|
||||
double m_qCompress;
|
||||
int64_t m_totalBits; /* total bits used for already encoded frames (after ammortization) */
|
||||
int64_t m_encodedBits; /* bits used for encoded frames (without ammortization) */
|
||||
double m_fps;
|
||||
int64_t m_satdCostWindow[50];
|
||||
int64_t m_encodedBitsWindow[50];
|
||||
int m_sliderPos;
|
||||
|
||||
/* To detect a pattern of low detailed static frames in single pass ABR using satdcosts */
|
||||
int64_t m_lastBsliceSatdCost;
|
||||
int m_numBframesInPattern;
|
||||
bool m_isPatternPresent;
|
||||
bool m_isSceneTransition;
|
||||
|
||||
/* a common variable on which rateControlStart, rateControlEnd and rateControUpdateStats waits to
|
||||
* sync the calls to these functions. For example
|
||||
* -F2:
|
||||
* rceStart 10
|
||||
* rceUpdate 10
|
||||
* rceEnd 9
|
||||
* rceStart 11
|
||||
* rceUpdate 11
|
||||
* rceEnd 10
|
||||
* rceStart 12
|
||||
* rceUpdate 12
|
||||
* rceEnd 11 */
|
||||
ThreadSafeInteger m_startEndOrder;
|
||||
int m_finalFrameCount; /* set when encoder begins flushing */
|
||||
bool m_bTerminated; /* set true when encoder is closing */
|
||||
|
||||
/* hrd stuff */
|
||||
SEIBufferingPeriod m_bufPeriodSEI;
|
||||
double m_nominalRemovalTime;
|
||||
double m_prevCpbFinalAT;
|
||||
|
||||
/* 2 pass */
|
||||
bool m_2pass;
|
||||
int m_numEntries;
|
||||
FILE* m_statFileOut;
|
||||
FILE* m_cutreeStatFileOut;
|
||||
FILE* m_cutreeStatFileIn;
|
||||
double m_lastAccumPNorm;
|
||||
double m_expectedBitsSum; /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
|
||||
int64_t m_predictedBits;
|
||||
RateControlEntry* m_rce2Pass;
|
||||
|
||||
struct
|
||||
{
|
||||
uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */
|
||||
int qpBufPos; /* In order to handle pyramid reordering, QP buffer acts as a stack.
|
||||
* This value is the current position (0 or 1). */
|
||||
} m_cuTreeStats;
|
||||
|
||||
RateControl(x265_param& p);
|
||||
bool init(const SPS& sps);
|
||||
void initHRD(SPS& sps);
|
||||
|
||||
void setFinalFrameCount(int count);
|
||||
void terminate(); /* un-block all waiting functions so encoder may close */
|
||||
void destroy();
|
||||
|
||||
// to be called for each curFrame to process RateControl and set QP
|
||||
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);
|
||||
void rateControlUpdateStats(RateControlEntry* rce);
|
||||
int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);
|
||||
int rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
|
||||
int rateControlSliceType(int frameNum);
|
||||
bool cuTreeReadFor2Pass(Frame* curFrame);
|
||||
void hrdFullness(SEIBufferingPeriod* sei);
|
||||
int writeRateControlFrameStats(Frame* curFrame, RateControlEntry* rce);
|
||||
protected:
|
||||
|
||||
static const int s_slidingWindowFrames;
|
||||
static const char* s_defaultStatFileName;
|
||||
|
||||
double m_amortizeFraction;
|
||||
int m_amortizeFrames;
|
||||
int m_residualFrames;
|
||||
int m_partialResidualFrames;
|
||||
int m_residualCost;
|
||||
int m_partialResidualCost;
|
||||
|
||||
x265_zone* getZone();
|
||||
double getQScale(RateControlEntry *rce, double rateFactor);
|
||||
double rateEstimateQscale(Frame* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR
|
||||
double tuneAbrQScaleFromFeedback(double qScale);
|
||||
void accumPQpUpdate();
|
||||
|
||||
int getPredictorType(int lowresSliceType, int sliceType);
|
||||
void updateVbv(int64_t bits, RateControlEntry* rce);
|
||||
void updatePredictor(Predictor *p, double q, double var, double bits);
|
||||
double clipQscale(Frame* pic, RateControlEntry* rce, double q);
|
||||
void updateVbvPlan(Encoder* enc);
|
||||
double predictSize(Predictor *p, double q, double var);
|
||||
void checkAndResetABR(RateControlEntry* rce, bool isFrameDone);
|
||||
double predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpm, int32_t& encodedBits);
|
||||
bool initPass2();
|
||||
double getDiffLimitedQScale(RateControlEntry *rce, double q);
|
||||
double countExpectedBits();
|
||||
bool vbv2Pass(uint64_t allAvailableBits);
|
||||
bool findUnderflow(double *fills, int *t0, int *t1, int over);
|
||||
bool fixUnderflow(int t0, int t1, double adjustment, double qscaleMin, double qscaleMax);
|
||||
};
|
||||
}
|
||||
#endif // ifndef X265_RATECONTROL_H
|
147
x265/source/encoder/rdcost.h
Normal file
147
x265/source/encoder/rdcost.h
Normal file
|
@ -0,0 +1,147 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_RDCOST_H
|
||||
#define X265_RDCOST_H
|
||||
|
||||
#include "common.h"
|
||||
#include "slice.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
class RDCost
|
||||
{
|
||||
public:
|
||||
|
||||
/* all weights and factors stored as FIX8 */
|
||||
uint64_t m_lambda2;
|
||||
uint64_t m_lambda;
|
||||
uint32_t m_chromaDistWeight[2];
|
||||
uint32_t m_psyRdBase;
|
||||
uint32_t m_psyRd;
|
||||
int m_qp; /* QP used to configure lambda, may be higher than QP_MAX_SPEC but <= QP_MAX_MAX */
|
||||
|
||||
void setPsyRdScale(double scale) { m_psyRdBase = (uint32_t)floor(65536.0 * scale * 0.33); }
|
||||
|
||||
void setQP(const Slice& slice, int qp)
|
||||
{
|
||||
x265_emms(); /* TODO: if the lambda tables were ints, this would not be necessary */
|
||||
m_qp = qp;
|
||||
setLambda(x265_lambda2_tab[qp], x265_lambda_tab[qp]);
|
||||
|
||||
/* Scale PSY RD factor by a slice type factor */
|
||||
static const uint32_t psyScaleFix8[3] = { 300, 256, 96 }; /* B, P, I */
|
||||
m_psyRd = (m_psyRdBase * psyScaleFix8[slice.m_sliceType]) >> 8;
|
||||
|
||||
/* Scale PSY RD factor by QP, at high QP psy-rd can cause artifacts */
|
||||
if (qp >= 40)
|
||||
{
|
||||
int scale = qp >= QP_MAX_SPEC ? 0 : (QP_MAX_SPEC - qp) * 23;
|
||||
m_psyRd = (m_psyRd * scale) >> 8;
|
||||
}
|
||||
|
||||
int qpCb, qpCr;
|
||||
if (slice.m_sps->chromaFormatIdc == X265_CSP_I420)
|
||||
{
|
||||
qpCb = (int)g_chromaScale[x265_clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaQpOffset[0])];
|
||||
qpCr = (int)g_chromaScale[x265_clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps->chromaQpOffset[1])];
|
||||
}
|
||||
else
|
||||
{
|
||||
qpCb = x265_clip3(QP_MIN, QP_MAX_SPEC, qp + slice.m_pps->chromaQpOffset[0]);
|
||||
qpCr = x265_clip3(QP_MIN, QP_MAX_SPEC, qp + slice.m_pps->chromaQpOffset[1]);
|
||||
}
|
||||
|
||||
int chroma_offset_idx = X265_MIN(qp - qpCb + 12, MAX_CHROMA_LAMBDA_OFFSET);
|
||||
uint16_t lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
|
||||
m_chromaDistWeight[0] = lambdaOffset;
|
||||
|
||||
chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET);
|
||||
lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
|
||||
m_chromaDistWeight[1] = lambdaOffset;
|
||||
}
|
||||
|
||||
void setLambda(double lambda2, double lambda)
|
||||
{
|
||||
m_lambda2 = (uint64_t)floor(256.0 * lambda2);
|
||||
m_lambda = (uint64_t)floor(256.0 * lambda);
|
||||
}
|
||||
|
||||
inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const
|
||||
{
|
||||
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
|
||||
#if X265_DEPTH <= 10
|
||||
"calcRdCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n",
|
||||
#else
|
||||
"calcRdCost wrap detected dist: " X265_LL ", bits %u, lambda: " X265_LL "\n",
|
||||
#endif
|
||||
distortion, bits, m_lambda2);
|
||||
return distortion + ((bits * m_lambda2 + 128) >> 8);
|
||||
}
|
||||
|
||||
/* return the difference in energy between the source block and the recon block */
|
||||
inline int psyCost(int size, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) const
|
||||
{
|
||||
return primitives.cu[size].psy_cost_pp(source, sstride, recon, rstride);
|
||||
}
|
||||
|
||||
/* return the difference in energy between the source block and the recon block */
|
||||
inline int psyCost(int size, const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride) const
|
||||
{
|
||||
return primitives.cu[size].psy_cost_ss(source, sstride, recon, rstride);
|
||||
}
|
||||
|
||||
/* return the RD cost of this prediction, including the effect of psy-rd */
|
||||
inline uint64_t calcPsyRdCost(sse_ret_t distortion, uint32_t bits, uint32_t psycost) const
|
||||
{
|
||||
return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);
|
||||
}
|
||||
|
||||
inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const
|
||||
{
|
||||
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,
|
||||
"calcRdSADCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n", sadCost, bits, m_lambda);
|
||||
return sadCost + ((bits * m_lambda + 128) >> 8);
|
||||
}
|
||||
|
||||
inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const
|
||||
{
|
||||
X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
|
||||
#if X265_DEPTH <= 10
|
||||
"scaleChromaDist wrap detected dist: %u, lambda: %u\n",
|
||||
#else
|
||||
"scaleChromaDist wrap detected dist: " X265_LL " lambda: %u\n",
|
||||
#endif
|
||||
dist, m_chromaDistWeight[plane - 1]);
|
||||
return (sse_ret_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
|
||||
}
|
||||
|
||||
inline uint32_t getCost(uint32_t bits) const
|
||||
{
|
||||
return (uint32_t)((bits * m_lambda + 128) >> 8);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_TCOMRDCOST_H
|
174
x265/source/encoder/reference.cpp
Normal file
174
x265/source/encoder/reference.cpp
Normal file
|
@ -0,0 +1,174 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
* Deepthi Devaki <deepthidevaki@multicorewareinc.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "primitives.h"
|
||||
#include "slice.h"
|
||||
#include "picyuv.h"
|
||||
|
||||
#include "reference.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
|
||||
MotionReference::MotionReference()
|
||||
{
|
||||
weightBuffer[0] = NULL;
|
||||
weightBuffer[1] = NULL;
|
||||
weightBuffer[2] = NULL;
|
||||
}
|
||||
|
||||
MotionReference::~MotionReference()
|
||||
{
|
||||
X265_FREE(weightBuffer[0]);
|
||||
X265_FREE(weightBuffer[1]);
|
||||
X265_FREE(weightBuffer[2]);
|
||||
}
|
||||
|
||||
int MotionReference::init(PicYuv* recPic, WeightParam *wp, const x265_param& p)
|
||||
{
|
||||
reconPic = recPic;
|
||||
numWeightedRows = 0;
|
||||
lumaStride = recPic->m_stride;
|
||||
chromaStride = recPic->m_strideC;
|
||||
numInterpPlanes = p.subpelRefine > 2 ? 3 : 1; /* is chroma satd possible? */
|
||||
|
||||
/* directly reference the extended integer pel planes */
|
||||
fpelPlane[0] = recPic->m_picOrg[0];
|
||||
fpelPlane[1] = recPic->m_picOrg[1];
|
||||
fpelPlane[2] = recPic->m_picOrg[2];
|
||||
isWeighted = false;
|
||||
|
||||
if (wp)
|
||||
{
|
||||
uint32_t numCUinHeight = (reconPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
|
||||
|
||||
int marginX = reconPic->m_lumaMarginX;
|
||||
int marginY = reconPic->m_lumaMarginY;
|
||||
intptr_t stride = reconPic->m_stride;
|
||||
int cuHeight = g_maxCUSize;
|
||||
|
||||
for (int c = 0; c < numInterpPlanes; c++)
|
||||
{
|
||||
if (c == 1)
|
||||
{
|
||||
marginX = reconPic->m_chromaMarginX;
|
||||
marginY = reconPic->m_chromaMarginY;
|
||||
stride = reconPic->m_strideC;
|
||||
cuHeight >>= reconPic->m_vChromaShift;
|
||||
}
|
||||
|
||||
if (wp[c].bPresentFlag)
|
||||
{
|
||||
if (!weightBuffer[c])
|
||||
{
|
||||
size_t padheight = (numCUinHeight * cuHeight) + marginY * 2;
|
||||
weightBuffer[c] = X265_MALLOC(pixel, stride * padheight);
|
||||
if (!weightBuffer[c])
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* use our buffer which will have weighted pixels written to it */
|
||||
fpelPlane[c] = weightBuffer[c] + marginY * stride + marginX;
|
||||
X265_CHECK(recPic->m_picOrg[c] - recPic->m_picBuf[c] == marginY * stride + marginX, "PicYuv pad calculation mismatch\n");
|
||||
|
||||
w[c].weight = wp[c].inputWeight;
|
||||
w[c].offset = wp[c].inputOffset * (1 << (X265_DEPTH - 8));
|
||||
w[c].shift = wp[c].log2WeightDenom;
|
||||
w[c].round = w[c].shift ? 1 << (w[c].shift - 1) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
isWeighted = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MotionReference::applyWeight(int finishedRows, int maxNumRows)
|
||||
{
|
||||
finishedRows = X265_MIN(finishedRows, maxNumRows);
|
||||
if (numWeightedRows >= finishedRows)
|
||||
return;
|
||||
|
||||
int marginX = reconPic->m_lumaMarginX;
|
||||
int marginY = reconPic->m_lumaMarginY;
|
||||
intptr_t stride = reconPic->m_stride;
|
||||
int width = reconPic->m_picWidth;
|
||||
int height = (finishedRows - numWeightedRows) * g_maxCUSize;
|
||||
if (finishedRows == maxNumRows && (reconPic->m_picHeight % g_maxCUSize))
|
||||
{
|
||||
/* the last row may be partial height */
|
||||
height -= g_maxCUSize;
|
||||
height += reconPic->m_picHeight % g_maxCUSize;
|
||||
}
|
||||
int cuHeight = g_maxCUSize;
|
||||
|
||||
for (int c = 0; c < numInterpPlanes; c++)
|
||||
{
|
||||
if (c == 1)
|
||||
{
|
||||
marginX = reconPic->m_chromaMarginX;
|
||||
marginY = reconPic->m_chromaMarginY;
|
||||
stride = reconPic->m_strideC;
|
||||
width >>= reconPic->m_hChromaShift;
|
||||
height >>= reconPic->m_vChromaShift;
|
||||
cuHeight >>= reconPic->m_vChromaShift;
|
||||
}
|
||||
|
||||
/* Do not generate weighted predictions if using original picture */
|
||||
if (fpelPlane[c] == reconPic->m_picOrg[c])
|
||||
continue;
|
||||
|
||||
const pixel* src = reconPic->m_picOrg[c] + numWeightedRows * cuHeight * stride;
|
||||
pixel* dst = fpelPlane[c] + numWeightedRows * cuHeight * stride;
|
||||
|
||||
// Computing weighted CU rows
|
||||
int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
|
||||
int padwidth = (width + 15) & ~15; // weightp assembly needs even 16 byte widths
|
||||
primitives.weight_pp(src, dst, stride, padwidth, height, w[c].weight, w[c].round << correction, w[c].shift + correction, w[c].offset);
|
||||
|
||||
// Extending Left & Right
|
||||
primitives.extendRowBorder(dst, stride, width, height, marginX);
|
||||
|
||||
// Extending Above
|
||||
if (numWeightedRows == 0)
|
||||
{
|
||||
pixel *pixY = fpelPlane[c] - marginX;
|
||||
for (int y = 0; y < marginY; y++)
|
||||
memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
|
||||
}
|
||||
|
||||
// Extending Bottom
|
||||
if (finishedRows == maxNumRows)
|
||||
{
|
||||
int picHeight = reconPic->m_picHeight;
|
||||
if (c) picHeight >>= reconPic->m_vChromaShift;
|
||||
pixel *pixY = fpelPlane[c] - marginX + (picHeight - 1) * stride;
|
||||
for (int y = 0; y < marginY; y++)
|
||||
memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
|
||||
}
|
||||
}
|
||||
|
||||
numWeightedRows = finishedRows;
|
||||
}
|
56
x265/source/encoder/reference.h
Normal file
56
x265/source/encoder/reference.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_REFERENCE_H
|
||||
#define X265_REFERENCE_H
|
||||
|
||||
#include "primitives.h"
|
||||
#include "picyuv.h"
|
||||
#include "lowres.h"
|
||||
#include "mv.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private x265 namespace
|
||||
|
||||
struct WeightParam;
|
||||
|
||||
class MotionReference : public ReferencePlanes
|
||||
{
|
||||
public:
|
||||
|
||||
MotionReference();
|
||||
~MotionReference();
|
||||
int init(PicYuv*, WeightParam* wp, const x265_param& p);
|
||||
void applyWeight(int rows, int numRows);
|
||||
|
||||
pixel* weightBuffer[3];
|
||||
int numInterpPlanes;
|
||||
int numWeightedRows;
|
||||
|
||||
protected:
|
||||
|
||||
MotionReference& operator =(const MotionReference&);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_REFERENCE_H
|
1709
x265/source/encoder/sao.cpp
Normal file
1709
x265/source/encoder/sao.cpp
Normal file
File diff suppressed because it is too large
Load diff
154
x265/source/encoder/sao.h
Normal file
154
x265/source/encoder/sao.h
Normal file
|
@ -0,0 +1,154 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
* Min Chen <chenm003@163.com>
|
||||
* Praveen Kumar Tiwari <praveen@multicorewareinc.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_SAO_H
|
||||
#define X265_SAO_H
|
||||
|
||||
#include "common.h"
|
||||
#include "frame.h"
|
||||
#include "entropy.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
enum SAOTypeLen
|
||||
{
|
||||
SAO_EO_LEN = 4,
|
||||
SAO_BO_LEN = 4,
|
||||
SAO_NUM_BO_CLASSES = 32
|
||||
};
|
||||
|
||||
enum SAOType
|
||||
{
|
||||
SAO_EO_0 = 0,
|
||||
SAO_EO_1,
|
||||
SAO_EO_2,
|
||||
SAO_EO_3,
|
||||
SAO_BO,
|
||||
MAX_NUM_SAO_TYPE
|
||||
};
|
||||
|
||||
class SAO
|
||||
{
|
||||
public:
|
||||
|
||||
enum { SAO_MAX_DEPTH = 4 };
|
||||
enum { SAO_BO_BITS = 5 };
|
||||
enum { MAX_NUM_SAO_CLASS = 33 };
|
||||
enum { SAO_BIT_INC = 0 }; /* in HM12.0, it wrote as X265_MAX(X265_DEPTH - 10, 0) */
|
||||
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
|
||||
enum { NUM_EDGETYPE = 5 };
|
||||
enum { NUM_PLANE = 3 };
|
||||
enum { NUM_MERGE_MODE = 3 };
|
||||
|
||||
static const uint32_t s_eoTable[NUM_EDGETYPE];
|
||||
|
||||
typedef int32_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
|
||||
typedef int32_t (PerPlane[NUM_PLANE][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
|
||||
|
||||
protected:
|
||||
|
||||
/* allocated per part */
|
||||
PerClass* m_count;
|
||||
PerClass* m_offset;
|
||||
PerClass* m_offsetOrg;
|
||||
|
||||
/* allocated per CTU */
|
||||
PerPlane* m_countPreDblk;
|
||||
PerPlane* m_offsetOrgPreDblk;
|
||||
|
||||
double m_depthSaoRate[2][4];
|
||||
int8_t m_offsetBo[SAO_NUM_BO_CLASSES];
|
||||
int8_t m_offsetEo[NUM_EDGETYPE];
|
||||
|
||||
int m_numCuInWidth;
|
||||
int m_numCuInHeight;
|
||||
int m_numPlanes;
|
||||
int m_hChromaShift;
|
||||
int m_vChromaShift;
|
||||
|
||||
pixel* m_clipTable;
|
||||
pixel* m_clipTableBase;
|
||||
|
||||
pixel* m_tmpU1[3];
|
||||
pixel* m_tmpU2[3];
|
||||
pixel* m_tmpL1;
|
||||
pixel* m_tmpL2;
|
||||
|
||||
public:
|
||||
|
||||
struct SAOContexts
|
||||
{
|
||||
Entropy cur;
|
||||
Entropy next;
|
||||
Entropy temp;
|
||||
};
|
||||
|
||||
Frame* m_frame;
|
||||
Entropy m_entropyCoder;
|
||||
SAOContexts m_rdContexts;
|
||||
|
||||
x265_param* m_param;
|
||||
int m_refDepth;
|
||||
int m_numNoSao[2];
|
||||
|
||||
double m_lumaLambda;
|
||||
double m_chromaLambda;
|
||||
/* TODO: No doubles for distortion */
|
||||
|
||||
SAO();
|
||||
|
||||
bool create(x265_param* param);
|
||||
void destroy();
|
||||
|
||||
void allocSaoParam(SAOParam* saoParam) const;
|
||||
|
||||
void startSlice(Frame* pic, Entropy& initState, int qp);
|
||||
void resetStats();
|
||||
void resetSaoUnit(SaoCtuParam* saoUnit);
|
||||
|
||||
// CTU-based SAO process without slice granularity
|
||||
void processSaoCu(int addr, int typeIdx, int plane);
|
||||
void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
|
||||
|
||||
void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc);
|
||||
|
||||
void calcSaoStatsCu(int addr, int plane);
|
||||
void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
|
||||
|
||||
void saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[2], double* mergeDist);
|
||||
void sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist);
|
||||
|
||||
inline int estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
|
||||
int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
|
||||
inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
|
||||
|
||||
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
|
||||
void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ifndef X265_SAO_H
|
3557
x265/source/encoder/search.cpp
Normal file
3557
x265/source/encoder/search.cpp
Normal file
File diff suppressed because it is too large
Load diff
468
x265/source/encoder/search.h
Normal file
468
x265/source/encoder/search.h
Normal file
|
@ -0,0 +1,468 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_SEARCH_H
|
||||
#define X265_SEARCH_H
|
||||
|
||||
#include "common.h"
|
||||
#include "predict.h"
|
||||
#include "quant.h"
|
||||
#include "bitcost.h"
|
||||
#include "framedata.h"
|
||||
#include "yuv.h"
|
||||
#include "threadpool.h"
|
||||
|
||||
#include "rdcost.h"
|
||||
#include "entropy.h"
|
||||
#include "motion.h"
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
#define ProfileCUScopeNamed(name, cu, acc, count) \
|
||||
m_stats[cu.m_encData->m_frameEncoderID].count++; \
|
||||
ScopedElapsedTime name(m_stats[cu.m_encData->m_frameEncoderID].acc)
|
||||
#define ProfileCUScope(cu, acc, count) ProfileCUScopeNamed(timedScope, cu, acc, count)
|
||||
#define ProfileCounter(cu, count) m_stats[cu.m_encData->m_frameEncoderID].count++;
|
||||
#else
|
||||
#define ProfileCUScopeNamed(name, cu, acc, count)
|
||||
#define ProfileCUScope(cu, acc, count)
|
||||
#define ProfileCounter(cu, count)
|
||||
#endif
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
class Entropy;
|
||||
struct ThreadLocalData;
|
||||
|
||||
/* All the CABAC contexts that Analysis needs to keep track of at each depth
|
||||
* and temp buffers for residual, coeff, and recon for use during residual
|
||||
* quad-tree depth recursion */
|
||||
struct RQTData
|
||||
{
|
||||
Entropy cur; /* starting context for current CU */
|
||||
|
||||
/* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
|
||||
* the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
|
||||
* which are reconstructed at each depth are valid. At the end, the transform depth table
|
||||
* is walked and the coeff and recon at the final split depths are collected */
|
||||
Entropy rqtRoot; /* residual quad-tree start context */
|
||||
Entropy rqtTemp; /* residual quad-tree temp context */
|
||||
Entropy rqtTest; /* residual quad-tree test context */
|
||||
coeff_t* coeffRQT[3]; /* coeff storage for entire CTU for each RQT layer */
|
||||
Yuv reconQtYuv; /* recon storage for entire CTU for each RQT layer (intra) */
|
||||
ShortYuv resiQtYuv; /* residual storage for entire CTU for each RQT layer (inter) */
|
||||
|
||||
/* per-depth temp buffers for inter prediction */
|
||||
ShortYuv tmpResiYuv;
|
||||
Yuv tmpPredYuv;
|
||||
Yuv bidirPredYuv[2];
|
||||
};
|
||||
|
||||
struct MotionData
|
||||
{
|
||||
MV mv;
|
||||
MV mvp;
|
||||
int mvpIdx;
|
||||
int ref;
|
||||
uint32_t cost;
|
||||
int bits;
|
||||
};
|
||||
|
||||
struct Mode
|
||||
{
|
||||
CUData cu;
|
||||
const Yuv* fencYuv;
|
||||
Yuv predYuv;
|
||||
Yuv reconYuv;
|
||||
Entropy contexts;
|
||||
|
||||
enum { MAX_INTER_PARTS = 2 };
|
||||
|
||||
MotionData bestME[MAX_INTER_PARTS][2];
|
||||
MV amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
|
||||
|
||||
// Neighbour MVs of the current partition. 5 spatial candidates and the
|
||||
// temporal candidate.
|
||||
InterNeighbourMV interNeighbours[6];
|
||||
|
||||
uint64_t rdCost; // sum of partition (psy) RD costs (sse(fenc, recon) + lambda2 * bits)
|
||||
uint64_t sa8dCost; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
|
||||
uint32_t sa8dBits; // signal bits used in sa8dCost calculation
|
||||
uint32_t psyEnergy; // sum of partition psycho-visual energy difference
|
||||
sse_ret_t resEnergy; // sum of partition residual energy after motion prediction
|
||||
sse_ret_t lumaDistortion;
|
||||
sse_ret_t chromaDistortion;
|
||||
sse_ret_t distortion; // sum of partition SSE distortion
|
||||
uint32_t totalBits; // sum of partition bits (mv + coeff)
|
||||
uint32_t mvBits; // Mv bits + Ref + block type (or intra mode)
|
||||
uint32_t coeffBits; // Texture bits (DCT Coeffs)
|
||||
|
||||
void initCosts()
|
||||
{
|
||||
rdCost = 0;
|
||||
sa8dCost = 0;
|
||||
sa8dBits = 0;
|
||||
psyEnergy = 0;
|
||||
resEnergy = 0;
|
||||
lumaDistortion = 0;
|
||||
chromaDistortion = 0;
|
||||
distortion = 0;
|
||||
totalBits = 0;
|
||||
mvBits = 0;
|
||||
coeffBits = 0;
|
||||
}
|
||||
|
||||
void invalidate()
|
||||
{
|
||||
/* set costs to invalid data, catch uninitialized re-use */
|
||||
rdCost = UINT64_MAX / 2;
|
||||
sa8dCost = UINT64_MAX / 2;
|
||||
sa8dBits = MAX_UINT / 2;
|
||||
psyEnergy = MAX_UINT / 2;
|
||||
#if X265_DEPTH <= 10
|
||||
resEnergy = MAX_UINT / 2;
|
||||
lumaDistortion = MAX_UINT / 2;
|
||||
chromaDistortion = MAX_UINT / 2;
|
||||
distortion = MAX_UINT / 2;
|
||||
#else
|
||||
resEnergy = UINT64_MAX / 2;
|
||||
lumaDistortion = UINT64_MAX / 2;
|
||||
chromaDistortion = UINT64_MAX / 2;
|
||||
distortion = UINT64_MAX / 2;
|
||||
#endif
|
||||
totalBits = MAX_UINT / 2;
|
||||
mvBits = MAX_UINT / 2;
|
||||
coeffBits = MAX_UINT / 2;
|
||||
}
|
||||
|
||||
bool ok() const
|
||||
{
|
||||
#if X265_DEPTH <= 10
|
||||
return !(rdCost >= UINT64_MAX / 2 ||
|
||||
sa8dCost >= UINT64_MAX / 2 ||
|
||||
sa8dBits >= MAX_UINT / 2 ||
|
||||
psyEnergy >= MAX_UINT / 2 ||
|
||||
resEnergy >= MAX_UINT / 2 ||
|
||||
lumaDistortion >= MAX_UINT / 2 ||
|
||||
chromaDistortion >= MAX_UINT / 2 ||
|
||||
distortion >= MAX_UINT / 2 ||
|
||||
totalBits >= MAX_UINT / 2 ||
|
||||
mvBits >= MAX_UINT / 2 ||
|
||||
coeffBits >= MAX_UINT / 2);
|
||||
#else
|
||||
return !(rdCost >= UINT64_MAX / 2 ||
|
||||
sa8dCost >= UINT64_MAX / 2 ||
|
||||
sa8dBits >= MAX_UINT / 2 ||
|
||||
psyEnergy >= MAX_UINT / 2 ||
|
||||
resEnergy >= UINT64_MAX / 2 ||
|
||||
lumaDistortion >= UINT64_MAX / 2 ||
|
||||
chromaDistortion >= UINT64_MAX / 2 ||
|
||||
distortion >= UINT64_MAX / 2 ||
|
||||
totalBits >= MAX_UINT / 2 ||
|
||||
mvBits >= MAX_UINT / 2 ||
|
||||
coeffBits >= MAX_UINT / 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
void addSubCosts(const Mode& subMode)
|
||||
{
|
||||
X265_CHECK(subMode.ok(), "sub-mode not initialized");
|
||||
|
||||
rdCost += subMode.rdCost;
|
||||
sa8dCost += subMode.sa8dCost;
|
||||
sa8dBits += subMode.sa8dBits;
|
||||
psyEnergy += subMode.psyEnergy;
|
||||
resEnergy += subMode.resEnergy;
|
||||
lumaDistortion += subMode.lumaDistortion;
|
||||
chromaDistortion += subMode.chromaDistortion;
|
||||
distortion += subMode.distortion;
|
||||
totalBits += subMode.totalBits;
|
||||
mvBits += subMode.mvBits;
|
||||
coeffBits += subMode.coeffBits;
|
||||
}
|
||||
};
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
/* This structure is intended for performance debugging and we make no attempt
|
||||
* to handle dynamic range overflows. Care should be taken to avoid long encodes
|
||||
* if you care about the accuracy of these elapsed times and counters. This
|
||||
* profiling is orthogonal to PPA/VTune and can be enabled independently from
|
||||
* either of them */
|
||||
struct CUStats
|
||||
{
|
||||
int64_t intraRDOElapsedTime[NUM_CU_DEPTH]; // elapsed worker time in intra RDO per CU depth
|
||||
int64_t interRDOElapsedTime[NUM_CU_DEPTH]; // elapsed worker time in inter RDO per CU depth
|
||||
int64_t intraAnalysisElapsedTime; // elapsed worker time in intra sa8d analysis
|
||||
int64_t motionEstimationElapsedTime; // elapsed worker time in predInterSearch()
|
||||
int64_t loopFilterElapsedTime; // elapsed worker time in deblock and SAO and PSNR/SSIM
|
||||
int64_t pmeTime; // elapsed worker time processing ME slave jobs
|
||||
int64_t pmeBlockTime; // elapsed worker time blocked for pme batch completion
|
||||
int64_t pmodeTime; // elapsed worker time processing pmode slave jobs
|
||||
int64_t pmodeBlockTime; // elapsed worker time blocked for pmode batch completion
|
||||
int64_t weightAnalyzeTime; // elapsed worker time analyzing reference weights
|
||||
int64_t totalCTUTime; // elapsed worker time in compressCTU (includes pmode master)
|
||||
|
||||
uint32_t skippedMotionReferences[NUM_CU_DEPTH];
|
||||
uint32_t totalMotionReferences[NUM_CU_DEPTH];
|
||||
uint32_t skippedIntraCU[NUM_CU_DEPTH];
|
||||
uint32_t totalIntraCU[NUM_CU_DEPTH];
|
||||
|
||||
uint64_t countIntraRDO[NUM_CU_DEPTH];
|
||||
uint64_t countInterRDO[NUM_CU_DEPTH];
|
||||
uint64_t countIntraAnalysis;
|
||||
uint64_t countMotionEstimate;
|
||||
uint64_t countLoopFilter;
|
||||
uint64_t countPMETasks;
|
||||
uint64_t countPMEMasters;
|
||||
uint64_t countPModeTasks;
|
||||
uint64_t countPModeMasters;
|
||||
uint64_t countWeightAnalyze;
|
||||
uint64_t totalCTUs;
|
||||
|
||||
CUStats() { clear(); }
|
||||
|
||||
void clear()
|
||||
{
|
||||
memset(this, 0, sizeof(*this));
|
||||
}
|
||||
|
||||
void accumulate(CUStats& other)
|
||||
{
|
||||
for (uint32_t i = 0; i <= g_maxCUDepth; i++)
|
||||
{
|
||||
intraRDOElapsedTime[i] += other.intraRDOElapsedTime[i];
|
||||
interRDOElapsedTime[i] += other.interRDOElapsedTime[i];
|
||||
countIntraRDO[i] += other.countIntraRDO[i];
|
||||
countInterRDO[i] += other.countInterRDO[i];
|
||||
skippedMotionReferences[i] += other.skippedMotionReferences[i];
|
||||
totalMotionReferences[i] += other.totalMotionReferences[i];
|
||||
skippedIntraCU[i] += other.skippedIntraCU[i];
|
||||
totalIntraCU[i] += other.totalIntraCU[i];
|
||||
}
|
||||
|
||||
intraAnalysisElapsedTime += other.intraAnalysisElapsedTime;
|
||||
motionEstimationElapsedTime += other.motionEstimationElapsedTime;
|
||||
loopFilterElapsedTime += other.loopFilterElapsedTime;
|
||||
pmeTime += other.pmeTime;
|
||||
pmeBlockTime += other.pmeBlockTime;
|
||||
pmodeTime += other.pmodeTime;
|
||||
pmodeBlockTime += other.pmodeBlockTime;
|
||||
weightAnalyzeTime += other.weightAnalyzeTime;
|
||||
totalCTUTime += other.totalCTUTime;
|
||||
|
||||
countIntraAnalysis += other.countIntraAnalysis;
|
||||
countMotionEstimate += other.countMotionEstimate;
|
||||
countLoopFilter += other.countLoopFilter;
|
||||
countPMETasks += other.countPMETasks;
|
||||
countPMEMasters += other.countPMEMasters;
|
||||
countPModeTasks += other.countPModeTasks;
|
||||
countPModeMasters += other.countPModeMasters;
|
||||
countWeightAnalyze += other.countWeightAnalyze;
|
||||
totalCTUs += other.totalCTUs;
|
||||
|
||||
other.clear();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
inline int getTUBits(int idx, int numIdx)
|
||||
{
|
||||
return idx + (idx < numIdx - 1);
|
||||
}
|
||||
|
||||
class Search : public Predict
|
||||
{
|
||||
public:
|
||||
|
||||
static const int16_t zeroShort[MAX_CU_SIZE];
|
||||
|
||||
MotionEstimate m_me;
|
||||
Quant m_quant;
|
||||
RDCost m_rdCost;
|
||||
const x265_param* m_param;
|
||||
Frame* m_frame;
|
||||
const Slice* m_slice;
|
||||
|
||||
Entropy m_entropyCoder;
|
||||
RQTData m_rqt[NUM_FULL_DEPTH];
|
||||
|
||||
uint8_t* m_qtTempCbf[3];
|
||||
uint8_t* m_qtTempTransformSkipFlag[3];
|
||||
|
||||
pixel* m_fencScaled; /* 32x32 buffer for down-scaled version of 64x64 CU fenc */
|
||||
pixel* m_fencTransposed; /* 32x32 buffer for transposed copy of fenc */
|
||||
pixel* m_intraPred; /* 32x32 buffer for individual intra predictions */
|
||||
pixel* m_intraPredAngs; /* allocation for 33 consecutive (all angular) 32x32 intra predictions */
|
||||
|
||||
coeff_t* m_tsCoeff; /* transform skip coeff 32x32 */
|
||||
int16_t* m_tsResidual; /* transform skip residual 32x32 */
|
||||
pixel* m_tsRecon; /* transform skip reconstructed pixels 32x32 */
|
||||
|
||||
bool m_bFrameParallel;
|
||||
bool m_bEnableRDOQ;
|
||||
uint32_t m_numLayers;
|
||||
uint32_t m_refLagPixels;
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
/* Accumulate CU statistics separately for each frame encoder */
|
||||
CUStats m_stats[X265_MAX_FRAME_THREADS];
|
||||
#endif
|
||||
|
||||
Search();
|
||||
~Search();
|
||||
|
||||
bool initSearch(const x265_param& param, ScalingList& scalingList);
|
||||
int setLambdaFromQP(const CUData& ctu, int qp); /* returns real quant QP in valid spec range */
|
||||
|
||||
// mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
|
||||
void invalidateContexts(int fromDepth);
|
||||
|
||||
// full RD search of intra modes. if sharedModes is not NULL, it directly uses them
|
||||
void checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint8_t* sharedChromaModes);
|
||||
|
||||
// select best intra mode using only sa8d costs, cannot measure NxN intra
|
||||
void checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
|
||||
// encode luma mode selected by checkIntraInInter, then pick and encode a chroma mode
|
||||
void encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
|
||||
|
||||
// estimation inter prediction (non-skip)
|
||||
void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
|
||||
|
||||
// encode residual and compute rd-cost for inter mode
|
||||
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
|
||||
void encodeResAndCalcRdSkipCU(Mode& interMode);
|
||||
|
||||
// encode residual without rd-cost
|
||||
void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
|
||||
void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
|
||||
void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth);
|
||||
|
||||
// pick be chroma mode from available using just sa8d costs
|
||||
void getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);
|
||||
|
||||
/* update CBF flags and QP values to be internally consistent */
|
||||
void checkDQP(Mode& mode, const CUGeom& cuGeom);
|
||||
void checkDQPForSplitPred(Mode& mode, const CUGeom& cuGeom);
|
||||
|
||||
MV getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref);
|
||||
|
||||
class PME : public BondedTaskGroup
|
||||
{
|
||||
public:
|
||||
|
||||
Search& master;
|
||||
Mode& mode;
|
||||
const CUGeom& cuGeom;
|
||||
const PredictionUnit& pu;
|
||||
int puIdx;
|
||||
|
||||
struct {
|
||||
int ref[2][MAX_NUM_REF];
|
||||
int refCnt[2];
|
||||
} m_jobs;
|
||||
|
||||
PME(Search& s, Mode& m, const CUGeom& g, const PredictionUnit& u, int p) : master(s), mode(m), cuGeom(g), pu(u), puIdx(p) {}
|
||||
|
||||
void processTasks(int workerThreadId);
|
||||
|
||||
protected:
|
||||
|
||||
PME operator=(const PME&);
|
||||
};
|
||||
|
||||
void processPME(PME& pme, Search& slave);
|
||||
void singleMotionEstimation(Search& master, Mode& interMode, const PredictionUnit& pu, int part, int list, int ref);
|
||||
|
||||
protected:
|
||||
|
||||
/* motion estimation distribution */
|
||||
ThreadLocalData* m_tld;
|
||||
|
||||
uint32_t m_listSelBits[3];
|
||||
Lock m_meLock;
|
||||
|
||||
void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
|
||||
|
||||
// RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
|
||||
uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
|
||||
|
||||
// RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
|
||||
uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom, uint8_t* sharedChromaModes);
|
||||
|
||||
void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
|
||||
void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);
|
||||
void codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype);
|
||||
|
||||
struct Cost
|
||||
{
|
||||
uint64_t rdcost;
|
||||
uint32_t bits;
|
||||
sse_ret_t distortion;
|
||||
uint32_t energy;
|
||||
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
|
||||
};
|
||||
|
||||
uint64_t estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
|
||||
void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2]);
|
||||
|
||||
// generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
|
||||
void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
|
||||
void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& costs);
|
||||
void extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx);
|
||||
|
||||
// generate chroma prediction, generate residual and recon
|
||||
uint32_t codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, uint32_t& psyEnergy);
|
||||
uint32_t codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, uint32_t& psyEnergy);
|
||||
void extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth);
|
||||
|
||||
// reshuffle CBF flags after coding a pair of 4:2:2 chroma blocks
|
||||
void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx);
|
||||
|
||||
/* output of mergeEstimation, best merge candidate */
|
||||
struct MergeData
|
||||
{
|
||||
MVField mvField[2];
|
||||
uint32_t dir;
|
||||
uint32_t index;
|
||||
uint32_t bits;
|
||||
};
|
||||
|
||||
/* inter/ME helper functions */
|
||||
int selectMVP(const CUData& cu, const PredictionUnit& pu, const MV amvp[AMVP_NUM_CANDS], int list, int ref);
|
||||
const MV& checkBestMVP(const MV amvpCand[2], const MV& mv, int& mvpIdx, uint32_t& outBits, uint32_t& outCost) const;
|
||||
void setSearchRange(const CUData& cu, const MV& mvp, int merange, MV& mvmin, MV& mvmax) const;
|
||||
uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, const PredictionUnit& pu, int puIdx, MergeData& m);
|
||||
static void getBlkBits(PartSize cuMode, bool bPSlice, int puIdx, uint32_t lastMode, uint32_t blockBit[3]);
|
||||
|
||||
/* intra helper functions */
|
||||
enum { MAX_RD_INTRA_MODES = 16 };
|
||||
static void updateCandList(uint32_t mode, uint64_t cost, int maxCandCount, uint32_t* candModeList, uint64_t* candCostList);
|
||||
|
||||
// get most probable luma modes for CU part, and bit cost of all non mpm modes
|
||||
uint32_t getIntraRemModeBits(CUData & cu, uint32_t absPartIdx, uint32_t mpmModes[3], uint64_t& mpms) const;
|
||||
|
||||
void updateModeCost(Mode& m) const { m.rdCost = m_rdCost.m_psyRd ? m_rdCost.calcPsyRdCost(m.distortion, m.totalBits, m.psyEnergy) : m_rdCost.calcRdCost(m.distortion, m.totalBits); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_SEARCH_H
|
74
x265/source/encoder/sei.cpp
Normal file
74
x265/source/encoder/sei.cpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "bitstream.h"
|
||||
#include "slice.h"
|
||||
#include "sei.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
|
||||
/* x265's identifying GUID */
|
||||
const uint8_t SEIuserDataUnregistered::m_uuid_iso_iec_11578[16] = {
|
||||
0x2C, 0xA2, 0xDE, 0x09, 0xB5, 0x17, 0x47, 0xDB,
|
||||
0xBB, 0x55, 0xA4, 0xFE, 0x7F, 0xC2, 0xFC, 0x4E
|
||||
};
|
||||
|
||||
/* marshal a single SEI message sei, storing the marshalled representation
|
||||
* in bitstream bs */
|
||||
void SEI::write(Bitstream& bs, const SPS& sps)
|
||||
{
|
||||
BitCounter count;
|
||||
m_bitIf = &count;
|
||||
|
||||
/* virtual writeSEI method, write to bit counter */
|
||||
writeSEI(sps);
|
||||
|
||||
m_bitIf = &bs;
|
||||
uint32_t type = payloadType();
|
||||
for (; type >= 0xff; type -= 0xff)
|
||||
WRITE_CODE(0xff, 8, "payload_type");
|
||||
WRITE_CODE(type, 8, "payload_type");
|
||||
|
||||
X265_CHECK(0 == (count.getNumberOfWrittenBits() & 7), "payload unaligned\n");
|
||||
uint32_t payloadSize = count.getNumberOfWrittenBits() >> 3;
|
||||
for (; payloadSize >= 0xff; payloadSize -= 0xff)
|
||||
WRITE_CODE(0xff, 8, "payload_size");
|
||||
WRITE_CODE(payloadSize, 8, "payload_size");
|
||||
|
||||
/* virtual writeSEI method, write to bs */
|
||||
writeSEI(sps);
|
||||
}
|
||||
|
||||
void SEI::writeByteAlign()
|
||||
{
|
||||
// TODO: expose bs.writeByteAlignment() as virtual function
|
||||
if (m_bitIf->getNumberOfWrittenBits() % 8 != 0)
|
||||
{
|
||||
WRITE_FLAG(1, "bit_equal_to_one");
|
||||
while (m_bitIf->getNumberOfWrittenBits() % 8 != 0)
|
||||
{
|
||||
WRITE_FLAG(0, "bit_equal_to_zero");
|
||||
}
|
||||
}
|
||||
}
|
344
x265/source/encoder/sei.h
Normal file
344
x265/source/encoder/sei.h
Normal file
|
@ -0,0 +1,344 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_SEI_H
|
||||
#define X265_SEI_H
|
||||
|
||||
#include "common.h"
|
||||
#include "bitstream.h"
|
||||
#include "slice.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
class SEI : public SyntaxElementWriter
|
||||
{
|
||||
public:
|
||||
|
||||
/* SEI users call write() to marshal an SEI to a bitstream. SEI
|
||||
* subclasses may implement write() or accept the default write()
|
||||
* method which calls writeSEI() with a bitcounter to determine
|
||||
* the size, then it encodes the header and calls writeSEI a
|
||||
* second time for the real encode. */
|
||||
virtual void write(Bitstream& bs, const SPS& sps);
|
||||
|
||||
virtual ~SEI() {}
|
||||
|
||||
protected:
|
||||
|
||||
enum PayloadType
|
||||
{
|
||||
BUFFERING_PERIOD = 0,
|
||||
PICTURE_TIMING = 1,
|
||||
PAN_SCAN_RECT = 2,
|
||||
FILLER_PAYLOAD = 3,
|
||||
USER_DATA_REGISTERED_ITU_T_T35 = 4,
|
||||
USER_DATA_UNREGISTERED = 5,
|
||||
RECOVERY_POINT = 6,
|
||||
SCENE_INFO = 9,
|
||||
FULL_FRAME_SNAPSHOT = 15,
|
||||
PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
|
||||
PROGRESSIVE_REFINEMENT_SEGMENT_END = 17,
|
||||
FILM_GRAIN_CHARACTERISTICS = 19,
|
||||
POST_FILTER_HINT = 22,
|
||||
TONE_MAPPING_INFO = 23,
|
||||
FRAME_PACKING = 45,
|
||||
DISPLAY_ORIENTATION = 47,
|
||||
SOP_DESCRIPTION = 128,
|
||||
ACTIVE_PARAMETER_SETS = 129,
|
||||
DECODING_UNIT_INFO = 130,
|
||||
TEMPORAL_LEVEL0_INDEX = 131,
|
||||
DECODED_PICTURE_HASH = 132,
|
||||
SCALABLE_NESTING = 133,
|
||||
REGION_REFRESH_INFO = 134,
|
||||
MASTERING_DISPLAY_INFO = 137,
|
||||
CONTENT_LIGHT_LEVEL_INFO = 144,
|
||||
};
|
||||
|
||||
virtual PayloadType payloadType() const = 0;
|
||||
|
||||
virtual void writeSEI(const SPS&) { X265_CHECK(0, "empty writeSEI method called\n"); }
|
||||
|
||||
void writeByteAlign();
|
||||
};
|
||||
|
||||
class SEIuserDataUnregistered : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
PayloadType payloadType() const { return USER_DATA_UNREGISTERED; }
|
||||
|
||||
SEIuserDataUnregistered() : m_userData(NULL) {}
|
||||
|
||||
static const uint8_t m_uuid_iso_iec_11578[16];
|
||||
uint32_t m_userDataLength;
|
||||
uint8_t *m_userData;
|
||||
|
||||
void write(Bitstream& bs, const SPS&)
|
||||
{
|
||||
m_bitIf = &bs;
|
||||
|
||||
WRITE_CODE(USER_DATA_UNREGISTERED, 8, "payload_type");
|
||||
|
||||
uint32_t payloadSize = 16 + m_userDataLength;
|
||||
for (; payloadSize >= 0xff; payloadSize -= 0xff)
|
||||
WRITE_CODE(0xff, 8, "payload_size");
|
||||
WRITE_CODE(payloadSize, 8, "payload_size");
|
||||
|
||||
for (uint32_t i = 0; i < 16; i++)
|
||||
WRITE_CODE(m_uuid_iso_iec_11578[i], 8, "sei.uuid_iso_iec_11578[i]");
|
||||
|
||||
for (uint32_t i = 0; i < m_userDataLength; i++)
|
||||
WRITE_CODE(m_userData[i], 8, "user_data");
|
||||
}
|
||||
};
|
||||
|
||||
class SEIMasteringDisplayColorVolume : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
uint16_t displayPrimaryX[3];
|
||||
uint16_t displayPrimaryY[3];
|
||||
uint16_t whitePointX, whitePointY;
|
||||
uint32_t maxDisplayMasteringLuminance;
|
||||
uint32_t minDisplayMasteringLuminance;
|
||||
|
||||
PayloadType payloadType() const { return MASTERING_DISPLAY_INFO; }
|
||||
|
||||
bool parse(const char* value)
|
||||
{
|
||||
return sscanf(value, "G(%hu,%hu)B(%hu,%hu)R(%hu,%hu)WP(%hu,%hu)L(%u,%u)",
|
||||
&displayPrimaryX[0], &displayPrimaryY[0],
|
||||
&displayPrimaryX[1], &displayPrimaryY[1],
|
||||
&displayPrimaryX[2], &displayPrimaryY[2],
|
||||
&whitePointX, &whitePointY,
|
||||
&maxDisplayMasteringLuminance, &minDisplayMasteringLuminance) == 10;
|
||||
}
|
||||
|
||||
void write(Bitstream& bs, const SPS&)
|
||||
{
|
||||
m_bitIf = &bs;
|
||||
|
||||
WRITE_CODE(MASTERING_DISPLAY_INFO, 8, "payload_type");
|
||||
WRITE_CODE(8 * 2 + 2 * 4, 8, "payload_size");
|
||||
|
||||
for (uint32_t i = 0; i < 3; i++)
|
||||
{
|
||||
WRITE_CODE(displayPrimaryX[i], 16, "display_primaries_x[ c ]");
|
||||
WRITE_CODE(displayPrimaryY[i], 16, "display_primaries_y[ c ]");
|
||||
}
|
||||
WRITE_CODE(whitePointX, 16, "white_point_x");
|
||||
WRITE_CODE(whitePointY, 16, "white_point_y");
|
||||
WRITE_CODE(maxDisplayMasteringLuminance, 32, "max_display_mastering_luminance");
|
||||
WRITE_CODE(minDisplayMasteringLuminance, 32, "min_display_mastering_luminance");
|
||||
}
|
||||
};
|
||||
|
||||
class SEIContentLightLevel : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
uint16_t max_content_light_level;
|
||||
uint16_t max_pic_average_light_level;
|
||||
|
||||
PayloadType payloadType() const { return CONTENT_LIGHT_LEVEL_INFO; }
|
||||
|
||||
void write(Bitstream& bs, const SPS&)
|
||||
{
|
||||
m_bitIf = &bs;
|
||||
|
||||
WRITE_CODE(CONTENT_LIGHT_LEVEL_INFO, 8, "payload_type");
|
||||
WRITE_CODE(4, 8, "payload_size");
|
||||
WRITE_CODE(max_content_light_level, 16, "max_content_light_level");
|
||||
WRITE_CODE(max_pic_average_light_level, 16, "max_pic_average_light_level");
|
||||
}
|
||||
};
|
||||
|
||||
class SEIDecodedPictureHash : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
PayloadType payloadType() const { return DECODED_PICTURE_HASH; }
|
||||
|
||||
enum Method
|
||||
{
|
||||
MD5,
|
||||
CRC,
|
||||
CHECKSUM,
|
||||
} m_method;
|
||||
|
||||
uint8_t m_digest[3][16];
|
||||
|
||||
void write(Bitstream& bs, const SPS&)
|
||||
{
|
||||
m_bitIf = &bs;
|
||||
|
||||
WRITE_CODE(DECODED_PICTURE_HASH, 8, "payload_type");
|
||||
|
||||
switch (m_method)
|
||||
{
|
||||
case MD5:
|
||||
WRITE_CODE(1 + 16 * 3, 8, "payload_size");
|
||||
WRITE_CODE(MD5, 8, "hash_type");
|
||||
break;
|
||||
case CRC:
|
||||
WRITE_CODE(1 + 2 * 3, 8, "payload_size");
|
||||
WRITE_CODE(CRC, 8, "hash_type");
|
||||
break;
|
||||
case CHECKSUM:
|
||||
WRITE_CODE(1 + 4 * 3, 8, "payload_size");
|
||||
WRITE_CODE(CHECKSUM, 8, "hash_type");
|
||||
break;
|
||||
}
|
||||
|
||||
for (int yuvIdx = 0; yuvIdx < 3; yuvIdx++)
|
||||
{
|
||||
if (m_method == MD5)
|
||||
{
|
||||
for (uint32_t i = 0; i < 16; i++)
|
||||
WRITE_CODE(m_digest[yuvIdx][i], 8, "picture_md5");
|
||||
}
|
||||
else if (m_method == CRC)
|
||||
{
|
||||
uint32_t val = (m_digest[yuvIdx][0] << 8) + m_digest[yuvIdx][1];
|
||||
WRITE_CODE(val, 16, "picture_crc");
|
||||
}
|
||||
else if (m_method == CHECKSUM)
|
||||
{
|
||||
uint32_t val = (m_digest[yuvIdx][0] << 24) + (m_digest[yuvIdx][1] << 16) + (m_digest[yuvIdx][2] << 8) + m_digest[yuvIdx][3];
|
||||
WRITE_CODE(val, 32, "picture_checksum");
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SEIActiveParameterSets : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
PayloadType payloadType() const { return ACTIVE_PARAMETER_SETS; }
|
||||
|
||||
bool m_selfContainedCvsFlag;
|
||||
bool m_noParamSetUpdateFlag;
|
||||
|
||||
void writeSEI(const SPS&)
|
||||
{
|
||||
WRITE_CODE(0, 4, "active_vps_id");
|
||||
WRITE_FLAG(m_selfContainedCvsFlag, "self_contained_cvs_flag");
|
||||
WRITE_FLAG(m_noParamSetUpdateFlag, "no_param_set_update_flag");
|
||||
WRITE_UVLC(0, "num_sps_ids_minus1");
|
||||
WRITE_UVLC(0, "active_seq_param_set_id");
|
||||
writeByteAlign();
|
||||
}
|
||||
};
|
||||
|
||||
class SEIBufferingPeriod : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
PayloadType payloadType() const { return BUFFERING_PERIOD; }
|
||||
|
||||
SEIBufferingPeriod()
|
||||
: m_cpbDelayOffset(0)
|
||||
, m_dpbDelayOffset(0)
|
||||
, m_auCpbRemovalDelayDelta(1)
|
||||
{
|
||||
}
|
||||
|
||||
bool m_cpbDelayOffset;
|
||||
bool m_dpbDelayOffset;
|
||||
uint32_t m_initialCpbRemovalDelay;
|
||||
uint32_t m_initialCpbRemovalDelayOffset;
|
||||
uint32_t m_auCpbRemovalDelayDelta;
|
||||
|
||||
void writeSEI(const SPS& sps)
|
||||
{
|
||||
const HRDInfo& hrd = sps.vuiParameters.hrdParameters;
|
||||
|
||||
WRITE_UVLC(0, "bp_seq_parameter_set_id");
|
||||
WRITE_FLAG(0, "rap_cpb_params_present_flag");
|
||||
WRITE_FLAG(0, "concatenation_flag");
|
||||
WRITE_CODE(m_auCpbRemovalDelayDelta - 1, hrd.cpbRemovalDelayLength, "au_cpb_removal_delay_delta_minus1");
|
||||
WRITE_CODE(m_initialCpbRemovalDelay, hrd.initialCpbRemovalDelayLength, "initial_cpb_removal_delay");
|
||||
WRITE_CODE(m_initialCpbRemovalDelayOffset, hrd.initialCpbRemovalDelayLength, "initial_cpb_removal_delay_offset");
|
||||
|
||||
writeByteAlign();
|
||||
}
|
||||
};
|
||||
|
||||
class SEIPictureTiming : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
PayloadType payloadType() const { return PICTURE_TIMING; }
|
||||
|
||||
uint32_t m_picStruct;
|
||||
uint32_t m_sourceScanType;
|
||||
bool m_duplicateFlag;
|
||||
|
||||
uint32_t m_auCpbRemovalDelay;
|
||||
uint32_t m_picDpbOutputDelay;
|
||||
|
||||
void writeSEI(const SPS& sps)
|
||||
{
|
||||
const VUI *vui = &sps.vuiParameters;
|
||||
const HRDInfo *hrd = &vui->hrdParameters;
|
||||
|
||||
if (vui->frameFieldInfoPresentFlag)
|
||||
{
|
||||
WRITE_CODE(m_picStruct, 4, "pic_struct");
|
||||
WRITE_CODE(m_sourceScanType, 2, "source_scan_type");
|
||||
WRITE_FLAG(m_duplicateFlag, "duplicate_flag");
|
||||
}
|
||||
|
||||
if (vui->hrdParametersPresentFlag)
|
||||
{
|
||||
WRITE_CODE(m_auCpbRemovalDelay - 1, hrd->cpbRemovalDelayLength, "au_cpb_removal_delay_minus1");
|
||||
WRITE_CODE(m_picDpbOutputDelay, hrd->dpbOutputDelayLength, "pic_dpb_output_delay");
|
||||
/* Removed sub-pic signaling June 2014 */
|
||||
}
|
||||
writeByteAlign();
|
||||
}
|
||||
};
|
||||
|
||||
class SEIRecoveryPoint : public SEI
|
||||
{
|
||||
public:
|
||||
|
||||
PayloadType payloadType() const { return RECOVERY_POINT; }
|
||||
|
||||
int m_recoveryPocCnt;
|
||||
bool m_exactMatchingFlag;
|
||||
bool m_brokenLinkFlag;
|
||||
|
||||
void writeSEI(const SPS&)
|
||||
{
|
||||
WRITE_SVLC(m_recoveryPocCnt, "recovery_poc_cnt");
|
||||
WRITE_FLAG(m_exactMatchingFlag, "exact_matching_flag");
|
||||
WRITE_FLAG(m_brokenLinkFlag, "broken_link_flag");
|
||||
writeByteAlign();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ifndef X265_SEI_H
|
2190
x265/source/encoder/slicetype.cpp
Normal file
2190
x265/source/encoder/slicetype.cpp
Normal file
File diff suppressed because it is too large
Load diff
243
x265/source/encoder/slicetype.h
Normal file
243
x265/source/encoder/slicetype.h
Normal file
|
@ -0,0 +1,243 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Authors: Steve Borho <steve@borho.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X265_SLICETYPE_H
|
||||
#define X265_SLICETYPE_H
|
||||
|
||||
#include "common.h"
|
||||
#include "slice.h"
|
||||
#include "motion.h"
|
||||
#include "piclist.h"
|
||||
#include "threadpool.h"
|
||||
|
||||
namespace X265_NS {
|
||||
// private namespace
|
||||
|
||||
struct Lowres;
|
||||
class Frame;
|
||||
class Lookahead;
|
||||
|
||||
#define LOWRES_COST_MASK ((1 << 14) - 1)
|
||||
#define LOWRES_COST_SHIFT 14
|
||||
|
||||
/* Thread local data for lookahead tasks */
|
||||
struct LookaheadTLD
|
||||
{
|
||||
MotionEstimate me;
|
||||
ReferencePlanes weightedRef;
|
||||
pixel* wbuffer[4];
|
||||
int widthInCU;
|
||||
int heightInCU;
|
||||
int ncu;
|
||||
int paddedLines;
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
int64_t batchElapsedTime;
|
||||
int64_t coopSliceElapsedTime;
|
||||
uint64_t countBatches;
|
||||
uint64_t countCoopSlices;
|
||||
#endif
|
||||
|
||||
LookaheadTLD()
|
||||
{
|
||||
me.setQP(X265_LOOKAHEAD_QP);
|
||||
me.init(X265_HEX_SEARCH, 1, X265_CSP_I400);
|
||||
for (int i = 0; i < 4; i++)
|
||||
wbuffer[i] = NULL;
|
||||
widthInCU = heightInCU = ncu = paddedLines = 0;
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
batchElapsedTime = 0;
|
||||
coopSliceElapsedTime = 0;
|
||||
countBatches = 0;
|
||||
countCoopSlices = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void init(int w, int h, int n)
|
||||
{
|
||||
widthInCU = w;
|
||||
heightInCU = h;
|
||||
ncu = n;
|
||||
}
|
||||
|
||||
~LookaheadTLD() { X265_FREE(wbuffer[0]); }
|
||||
|
||||
void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
|
||||
void lowresIntraEstimate(Lowres& fenc);
|
||||
|
||||
void weightsAnalyse(Lowres& fenc, Lowres& ref);
|
||||
|
||||
protected:
|
||||
|
||||
uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp);
|
||||
uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
|
||||
bool allocWeightedRef(Lowres& fenc);
|
||||
};
|
||||
|
||||
class Lookahead : public JobProvider
|
||||
{
|
||||
public:
|
||||
|
||||
PicList m_inputQueue; // input pictures in order received
|
||||
PicList m_outputQueue; // pictures to be encoded, in encode order
|
||||
Lock m_inputLock;
|
||||
Lock m_outputLock;
|
||||
|
||||
/* pre-lookahead */
|
||||
int m_fullQueueSize;
|
||||
bool m_isActive;
|
||||
bool m_sliceTypeBusy;
|
||||
bool m_bAdaptiveQuant;
|
||||
bool m_outputSignalRequired;
|
||||
bool m_bBatchMotionSearch;
|
||||
bool m_bBatchFrameCosts;
|
||||
Event m_outputSignal;
|
||||
|
||||
LookaheadTLD* m_tld;
|
||||
x265_param* m_param;
|
||||
Lowres* m_lastNonB;
|
||||
int* m_scratch; // temp buffer for cutree propagate
|
||||
|
||||
int m_histogram[X265_BFRAME_MAX + 1];
|
||||
int m_lastKeyframe;
|
||||
int m_8x8Width;
|
||||
int m_8x8Height;
|
||||
int m_8x8Blocks;
|
||||
int m_numCoopSlices;
|
||||
int m_numRowsPerSlice;
|
||||
bool m_filled;
|
||||
bool m_isSceneTransition;
|
||||
Lookahead(x265_param *param, ThreadPool *pool);
|
||||
|
||||
#if DETAILED_CU_STATS
|
||||
int64_t m_slicetypeDecideElapsedTime;
|
||||
int64_t m_preLookaheadElapsedTime;
|
||||
uint64_t m_countSlicetypeDecide;
|
||||
uint64_t m_countPreLookahead;
|
||||
void getWorkerStats(int64_t& batchElapsedTime, uint64_t& batchCount, int64_t& coopSliceElapsedTime, uint64_t& coopSliceCount);
|
||||
#endif
|
||||
|
||||
bool create();
|
||||
void destroy();
|
||||
void stopJobs();
|
||||
|
||||
void addPicture(Frame&, int sliceType);
|
||||
void flush();
|
||||
Frame* getDecidedPicture();
|
||||
|
||||
void getEstimatedPictureCost(Frame *pic);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
void findJob(int workerThreadID);
|
||||
void slicetypeDecide();
|
||||
void slicetypeAnalyse(Lowres **frames, bool bKeyframe);
|
||||
|
||||
/* called by slicetypeAnalyse() to make slice decisions */
|
||||
bool scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames);
|
||||
bool scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut);
|
||||
void slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
|
||||
int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold);
|
||||
int64_t vbvFrameCost(Lowres **frames, int p0, int p1, int b);
|
||||
void vbvLookahead(Lowres **frames, int numFrames, int keyframes);
|
||||
|
||||
/* called by slicetypeAnalyse() to effect cuTree adjustments to adaptive
|
||||
* quant offsets */
|
||||
void cuTree(Lowres **frames, int numframes, bool bintra);
|
||||
void estimateCUPropagate(Lowres **frames, double average_duration, int p0, int p1, int b, int referenced);
|
||||
void cuTreeFinish(Lowres *frame, double averageDuration, int ref0Distance);
|
||||
|
||||
/* called by getEstimatedPictureCost() to finalize cuTree costs */
|
||||
int64_t frameCostRecalculate(Lowres **frames, int p0, int p1, int b);
|
||||
};
|
||||
|
||||
class PreLookaheadGroup : public BondedTaskGroup
|
||||
{
|
||||
public:
|
||||
|
||||
Frame* m_preframes[X265_LOOKAHEAD_MAX];
|
||||
Lookahead& m_lookahead;
|
||||
|
||||
PreLookaheadGroup(Lookahead& l) : m_lookahead(l) {}
|
||||
|
||||
void processTasks(int workerThreadID);
|
||||
|
||||
protected:
|
||||
|
||||
PreLookaheadGroup& operator=(const PreLookaheadGroup&);
|
||||
};
|
||||
|
||||
class CostEstimateGroup : public BondedTaskGroup
|
||||
{
|
||||
public:
|
||||
|
||||
Lookahead& m_lookahead;
|
||||
Lowres** m_frames;
|
||||
bool m_batchMode;
|
||||
|
||||
CostEstimateGroup(Lookahead& l, Lowres** f) : m_lookahead(l), m_frames(f), m_batchMode(false) {}
|
||||
|
||||
/* Cooperative cost estimate using multiple slices of downscaled frame */
|
||||
struct Coop
|
||||
{
|
||||
int p0, b, p1;
|
||||
bool bDoSearch[2];
|
||||
} m_coop;
|
||||
|
||||
enum { MAX_COOP_SLICES = 32 };
|
||||
struct Slice
|
||||
{
|
||||
int costEst;
|
||||
int costEstAq;
|
||||
int intraMbs;
|
||||
} m_slice[MAX_COOP_SLICES];
|
||||
|
||||
int64_t singleCost(int p0, int p1, int b, bool intraPenalty = false);
|
||||
|
||||
/* Batch cost estimates, using one worker thread per estimateFrameCost() call */
|
||||
enum { MAX_BATCH_SIZE = 512 };
|
||||
struct Estimate
|
||||
{
|
||||
int p0, b, p1;
|
||||
} m_estimates[MAX_BATCH_SIZE];
|
||||
|
||||
void add(int p0, int p1, int b);
|
||||
void finishBatch();
|
||||
|
||||
protected:
|
||||
|
||||
static const int s_merange = 16;
|
||||
|
||||
void processTasks(int workerThreadID);
|
||||
|
||||
int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty);
|
||||
void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice);
|
||||
|
||||
CostEstimateGroup& operator=(const CostEstimateGroup&);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ifndef X265_SLICETYPE_H
|
536
x265/source/encoder/weightPrediction.cpp
Normal file
536
x265/source/encoder/weightPrediction.cpp
Normal file
|
@ -0,0 +1,536 @@
|
|||
/*****************************************************************************
|
||||
* Copyright (C) 2013 x265 project
|
||||
*
|
||||
* Author: Shazeb Nawaz Khan <shazeb@multicorewareinc.com>
|
||||
* Steve Borho <steve@borho.org>
|
||||
* Kavitha Sampas <kavitha@multicorewareinc.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at license @ x265.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "frame.h"
|
||||
#include "picyuv.h"
|
||||
#include "lowres.h"
|
||||
#include "slice.h"
|
||||
#include "mv.h"
|
||||
#include "bitstream.h"
|
||||
|
||||
using namespace X265_NS;
|
||||
namespace {
|
||||
struct Cache
|
||||
{
|
||||
const int * intraCost;
|
||||
int numPredDir;
|
||||
int csp;
|
||||
int hshift;
|
||||
int vshift;
|
||||
int lowresWidthInCU;
|
||||
int lowresHeightInCU;
|
||||
};
|
||||
|
||||
int sliceHeaderCost(WeightParam *w, int lambda, int bChroma)
|
||||
{
|
||||
/* 4 times higher, because chroma is analyzed at full resolution. */
|
||||
if (bChroma)
|
||||
lambda *= 4;
|
||||
int denomCost = bs_size_ue(w[0].log2WeightDenom) * (2 - bChroma);
|
||||
return lambda * (10 + denomCost + 2 * (bs_size_se(w[0].inputWeight) + bs_size_se(w[0].inputOffset)));
|
||||
}
|
||||
|
||||
/* make a motion compensated copy of lowres ref into mcout with the same stride.
|
||||
* The borders of mcout are not extended */
|
||||
void mcLuma(pixel* mcout, Lowres& ref, const MV * mvs)
|
||||
{
|
||||
intptr_t stride = ref.lumaStride;
|
||||
const int mvshift = 1 << 2;
|
||||
const int cuSize = 8;
|
||||
MV mvmin, mvmax;
|
||||
|
||||
int cu = 0;
|
||||
|
||||
for (int y = 0; y < ref.lines; y += cuSize)
|
||||
{
|
||||
intptr_t pixoff = y * stride;
|
||||
mvmin.y = (int16_t)((-y - 8) * mvshift);
|
||||
mvmax.y = (int16_t)((ref.lines - y - 1 + 8) * mvshift);
|
||||
|
||||
for (int x = 0; x < ref.width; x += cuSize, pixoff += cuSize, cu++)
|
||||
{
|
||||
ALIGN_VAR_16(pixel, buf8x8[8 * 8]);
|
||||
intptr_t bstride = 8;
|
||||
mvmin.x = (int16_t)((-x - 8) * mvshift);
|
||||
mvmax.x = (int16_t)((ref.width - x - 1 + 8) * mvshift);
|
||||
|
||||
/* clip MV to available pixels */
|
||||
MV mv = mvs[cu];
|
||||
mv = mv.clipped(mvmin, mvmax);
|
||||
pixel *tmp = ref.lowresMC(pixoff, mv, buf8x8, bstride);
|
||||
primitives.cu[BLOCK_8x8].copy_pp(mcout + pixoff, stride, tmp, bstride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* use lowres MVs from lookahead to generate a motion compensated chroma plane.
|
||||
* if a block had cheaper lowres cost as intra, we treat it as MV 0 */
|
||||
void mcChroma(pixel * mcout,
|
||||
pixel * src,
|
||||
intptr_t stride,
|
||||
const MV * mvs,
|
||||
const Cache& cache,
|
||||
int height,
|
||||
int width)
|
||||
{
|
||||
/* the motion vectors correspond to 8x8 lowres luma blocks, or 16x16 fullres
|
||||
* luma blocks. We have to adapt block size to chroma csp */
|
||||
int csp = cache.csp;
|
||||
int bw = 16 >> cache.hshift;
|
||||
int bh = 16 >> cache.vshift;
|
||||
const int mvshift = 1 << 2;
|
||||
MV mvmin, mvmax;
|
||||
|
||||
for (int y = 0; y < height; y += bh)
|
||||
{
|
||||
/* note: lowres block count per row might be different from chroma block
|
||||
* count per row because of rounding issues, so be very careful with indexing
|
||||
* into the lowres structures */
|
||||
int cu = y * cache.lowresWidthInCU;
|
||||
intptr_t pixoff = y * stride;
|
||||
mvmin.y = (int16_t)((-y - 8) * mvshift);
|
||||
mvmax.y = (int16_t)((height - y - 1 + 8) * mvshift);
|
||||
|
||||
for (int x = 0; x < width; x += bw, cu++, pixoff += bw)
|
||||
{
|
||||
if (x < cache.lowresWidthInCU && y < cache.lowresHeightInCU)
|
||||
{
|
||||
MV mv = mvs[cu]; // lowres MV
|
||||
mv <<= 1; // fullres MV
|
||||
mv.x >>= cache.hshift;
|
||||
mv.y >>= cache.vshift;
|
||||
|
||||
/* clip MV to available pixels */
|
||||
mvmin.x = (int16_t)((-x - 8) * mvshift);
|
||||
mvmax.x = (int16_t)((width - x - 1 + 8) * mvshift);
|
||||
mv = mv.clipped(mvmin, mvmax);
|
||||
|
||||
intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
|
||||
pixel *temp = src + pixoff + fpeloffset;
|
||||
|
||||
int xFrac = mv.x & 0x7;
|
||||
int yFrac = mv.y & 0x7;
|
||||
if ((yFrac | xFrac) == 0)
|
||||
{
|
||||
primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, temp, stride);
|
||||
}
|
||||
else if (yFrac == 0)
|
||||
{
|
||||
primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp, stride, mcout + pixoff, stride, xFrac);
|
||||
}
|
||||
else if (xFrac == 0)
|
||||
{
|
||||
primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp, stride, mcout + pixoff, stride, yFrac);
|
||||
}
|
||||
else
|
||||
{
|
||||
ALIGN_VAR_16(int16_t, imm[16 * (16 + NTAPS_CHROMA)]);
|
||||
primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, imm, bw, xFrac, 1);
|
||||
primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(imm + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, src + pixoff, stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Measure sum of 8x8 satd costs between source frame and reference
|
||||
* frame (potentially weighted, potentially motion compensated). We
|
||||
* always use source images for this analysis since reference recon
|
||||
* pixels have unreliable availability */
|
||||
uint32_t weightCost(pixel * fenc,
|
||||
pixel * ref,
|
||||
pixel * weightTemp,
|
||||
intptr_t stride,
|
||||
const Cache & cache,
|
||||
int width,
|
||||
int height,
|
||||
WeightParam * w,
|
||||
bool bLuma)
|
||||
{
|
||||
if (w)
|
||||
{
|
||||
/* make a weighted copy of the reference plane */
|
||||
int offset = w->inputOffset << (X265_DEPTH - 8);
|
||||
int weight = w->inputWeight;
|
||||
int denom = w->log2WeightDenom;
|
||||
int round = denom ? 1 << (denom - 1) : 0;
|
||||
int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
|
||||
int pwidth = ((width + 15) >> 4) << 4;
|
||||
|
||||
primitives.weight_pp(ref, weightTemp, stride, pwidth, height,
|
||||
weight, round << correction, denom + correction, offset);
|
||||
ref = weightTemp;
|
||||
}
|
||||
|
||||
uint32_t cost = 0;
|
||||
pixel *f = fenc, *r = ref;
|
||||
|
||||
if (bLuma)
|
||||
{
|
||||
int cu = 0;
|
||||
for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride)
|
||||
{
|
||||
for (int x = 0; x < width; x += 8, cu++)
|
||||
{
|
||||
int cmp = primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride);
|
||||
cost += X265_MIN(cmp, cache.intraCost[cu]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (cache.csp == X265_CSP_I444)
|
||||
for (int y = 0; y < height; y += 16, r += 16 * stride, f += 16 * stride)
|
||||
for (int x = 0; x < width; x += 16)
|
||||
cost += primitives.pu[LUMA_16x16].satd(r + x, stride, f + x, stride);
|
||||
else
|
||||
for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride)
|
||||
for (int x = 0; x < width; x += 8)
|
||||
cost += primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride);
|
||||
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
namespace X265_NS {
|
||||
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param)
|
||||
{
|
||||
WeightParam wp[2][MAX_NUM_REF][3];
|
||||
PicYuv *fencPic = frame.m_fencPic;
|
||||
Lowres& fenc = frame.m_lowres;
|
||||
|
||||
Cache cache;
|
||||
|
||||
memset(&cache, 0, sizeof(cache));
|
||||
cache.intraCost = fenc.intraCost;
|
||||
cache.numPredDir = slice.isInterP() ? 1 : 2;
|
||||
cache.lowresWidthInCU = fenc.width >> 3;
|
||||
cache.lowresHeightInCU = fenc.lines >> 3;
|
||||
cache.csp = fencPic->m_picCsp;
|
||||
cache.hshift = CHROMA_H_SHIFT(cache.csp);
|
||||
cache.vshift = CHROMA_V_SHIFT(cache.csp);
|
||||
|
||||
/* Use single allocation for motion compensated ref and weight buffers */
|
||||
pixel *mcbuf = X265_MALLOC(pixel, 2 * fencPic->m_stride * fencPic->m_picHeight);
|
||||
if (!mcbuf)
|
||||
{
|
||||
slice.disableWeights();
|
||||
return;
|
||||
}
|
||||
pixel *weightTemp = mcbuf + fencPic->m_stride * fencPic->m_picHeight;
|
||||
|
||||
int lambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
|
||||
int curPoc = slice.m_poc;
|
||||
const float epsilon = 1.f / 128.f;
|
||||
|
||||
int chromaDenom, lumaDenom, denom;
|
||||
chromaDenom = lumaDenom = 7;
|
||||
int numpixels[3];
|
||||
int w16 = ((fencPic->m_picWidth + 15) >> 4) << 4;
|
||||
int h16 = ((fencPic->m_picHeight + 15) >> 4) << 4;
|
||||
numpixels[0] = w16 * h16;
|
||||
numpixels[1] = numpixels[2] = numpixels[0] >> (cache.hshift + cache.vshift);
|
||||
|
||||
for (int list = 0; list < cache.numPredDir; list++)
|
||||
{
|
||||
WeightParam *weights = wp[list][0];
|
||||
Frame *refFrame = slice.m_refFrameList[list][0];
|
||||
Lowres& refLowres = refFrame->m_lowres;
|
||||
int diffPoc = abs(curPoc - refFrame->m_poc);
|
||||
|
||||
/* prepare estimates */
|
||||
float guessScale[3], fencMean[3], refMean[3];
|
||||
for (int plane = 0; plane < 3; plane++)
|
||||
{
|
||||
SET_WEIGHT(weights[plane], false, 1, 0, 0);
|
||||
uint64_t fencVar = fenc.wp_ssd[plane] + !refLowres.wp_ssd[plane];
|
||||
uint64_t refVar = refLowres.wp_ssd[plane] + !refLowres.wp_ssd[plane];
|
||||
guessScale[plane] = sqrt((float)fencVar / refVar);
|
||||
fencMean[plane] = (float)fenc.wp_sum[plane] / (numpixels[plane]) / (1 << (X265_DEPTH - 8));
|
||||
refMean[plane] = (float)refLowres.wp_sum[plane] / (numpixels[plane]) / (1 << (X265_DEPTH - 8));
|
||||
}
|
||||
|
||||
/* make sure both our scale factors fit */
|
||||
while (!list && chromaDenom > 0)
|
||||
{
|
||||
float thresh = 127.f / (1 << chromaDenom);
|
||||
if (guessScale[1] < thresh && guessScale[2] < thresh)
|
||||
break;
|
||||
chromaDenom--;
|
||||
}
|
||||
|
||||
SET_WEIGHT(weights[1], false, 1 << chromaDenom, chromaDenom, 0);
|
||||
SET_WEIGHT(weights[2], false, 1 << chromaDenom, chromaDenom, 0);
|
||||
|
||||
MV *mvs = NULL;
|
||||
|
||||
for (int plane = 0; plane < 3; plane++)
|
||||
{
|
||||
denom = plane ? chromaDenom : lumaDenom;
|
||||
if (plane && !weights[0].bPresentFlag)
|
||||
break;
|
||||
|
||||
/* Early termination */
|
||||
x265_emms();
|
||||
if (fabsf(refMean[plane] - fencMean[plane]) < 0.5f && fabsf(1.f - guessScale[plane]) < epsilon)
|
||||
{
|
||||
SET_WEIGHT(weights[plane], 0, 1 << denom, denom, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (plane)
|
||||
{
|
||||
int scale = x265_clip3(0, 255, (int)(guessScale[plane] * (1 << denom) + 0.5f));
|
||||
if (scale > 127)
|
||||
continue;
|
||||
weights[plane].inputWeight = scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
weights[plane].setFromWeightAndOffset((int)(guessScale[plane] * (1 << denom) + 0.5f), 0, denom, !list);
|
||||
}
|
||||
|
||||
int mindenom = weights[plane].log2WeightDenom;
|
||||
int minscale = weights[plane].inputWeight;
|
||||
int minoff = 0;
|
||||
|
||||
if (!plane && diffPoc <= param.bframes + 1)
|
||||
{
|
||||
mvs = fenc.lowresMvs[list][diffPoc - 1];
|
||||
|
||||
/* test whether this motion search was performed by lookahead */
|
||||
if (mvs[0].x != 0x7FFF)
|
||||
{
|
||||
/* reference chroma planes must be extended prior to being
|
||||
* used as motion compensation sources */
|
||||
if (!refFrame->m_bChromaExtended)
|
||||
{
|
||||
refFrame->m_bChromaExtended = true;
|
||||
PicYuv *refPic = refFrame->m_fencPic;
|
||||
int width = refPic->m_picWidth >> cache.hshift;
|
||||
int height = refPic->m_picHeight >> cache.vshift;
|
||||
extendPicBorder(refPic->m_picOrg[1], refPic->m_strideC, width, height, refPic->m_chromaMarginX, refPic->m_chromaMarginY);
|
||||
extendPicBorder(refPic->m_picOrg[2], refPic->m_strideC, width, height, refPic->m_chromaMarginX, refPic->m_chromaMarginY);
|
||||
}
|
||||
}
|
||||
else
|
||||
mvs = 0;
|
||||
}
|
||||
|
||||
/* prepare inputs to weight analysis */
|
||||
pixel *orig;
|
||||
pixel *fref;
|
||||
intptr_t stride;
|
||||
int width, height;
|
||||
switch (plane)
|
||||
{
|
||||
case 0:
|
||||
orig = fenc.lowresPlane[0];
|
||||
stride = fenc.lumaStride;
|
||||
width = fenc.width;
|
||||
height = fenc.lines;
|
||||
fref = refLowres.lowresPlane[0];
|
||||
if (mvs)
|
||||
{
|
||||
mcLuma(mcbuf, refLowres, mvs);
|
||||
fref = mcbuf;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
orig = fencPic->m_picOrg[1];
|
||||
stride = fencPic->m_strideC;
|
||||
fref = refFrame->m_fencPic->m_picOrg[1];
|
||||
|
||||
/* Clamp the chroma dimensions to the nearest multiple of
|
||||
* 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
|
||||
* blocks and weightCost measures 8x8 blocks. This
|
||||
* potentially ignores some edge pixels, but simplifies the
|
||||
* logic and prevents reading uninitialized pixels. Lowres
|
||||
* planes are border extended and require no clamping. */
|
||||
width = ((fencPic->m_picWidth >> 4) << 4) >> cache.hshift;
|
||||
height = ((fencPic->m_picHeight >> 4) << 4) >> cache.vshift;
|
||||
if (mvs)
|
||||
{
|
||||
mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
|
||||
fref = mcbuf;
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
orig = fencPic->m_picOrg[2];
|
||||
stride = fencPic->m_strideC;
|
||||
fref = refFrame->m_fencPic->m_picOrg[2];
|
||||
width = ((fencPic->m_picWidth >> 4) << 4) >> cache.hshift;
|
||||
height = ((fencPic->m_picHeight >> 4) << 4) >> cache.vshift;
|
||||
if (mvs)
|
||||
{
|
||||
mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
|
||||
fref = mcbuf;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
slice.disableWeights();
|
||||
X265_FREE(mcbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t origscore = weightCost(orig, fref, weightTemp, stride, cache, width, height, NULL, !plane);
|
||||
if (!origscore)
|
||||
{
|
||||
SET_WEIGHT(weights[plane], 0, 1 << denom, denom, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t minscore = origscore;
|
||||
bool bFound = false;
|
||||
|
||||
/* x264 uses a table lookup here, selecting search range based on preset */
|
||||
static const int scaleDist = 4;
|
||||
static const int offsetDist = 2;
|
||||
|
||||
int startScale = x265_clip3(0, 127, minscale - scaleDist);
|
||||
int endScale = x265_clip3(0, 127, minscale + scaleDist);
|
||||
for (int scale = startScale; scale <= endScale; scale++)
|
||||
{
|
||||
int deltaWeight = scale - (1 << mindenom);
|
||||
if (deltaWeight > 127 || deltaWeight <= -128)
|
||||
continue;
|
||||
|
||||
x265_emms();
|
||||
int curScale = scale;
|
||||
int curOffset = (int)(fencMean[plane] - refMean[plane] * curScale / (1 << mindenom) + 0.5f);
|
||||
if (curOffset < -128 || curOffset > 127)
|
||||
{
|
||||
/* Rescale considering the constraints on curOffset. We do it in this order
|
||||
* because scale has a much wider range than offset (because of denom), so
|
||||
* it should almost never need to be clamped. */
|
||||
curOffset = x265_clip3(-128, 127, curOffset);
|
||||
curScale = (int)((1 << mindenom) * (fencMean[plane] - curOffset) / refMean[plane] + 0.5f);
|
||||
curScale = x265_clip3(0, 127, curScale);
|
||||
}
|
||||
|
||||
int startOffset = x265_clip3(-128, 127, curOffset - offsetDist);
|
||||
int endOffset = x265_clip3(-128, 127, curOffset + offsetDist);
|
||||
for (int off = startOffset; off <= endOffset; off++)
|
||||
{
|
||||
WeightParam wsp;
|
||||
SET_WEIGHT(wsp, true, curScale, mindenom, off);
|
||||
uint32_t s = weightCost(orig, fref, weightTemp, stride, cache, width, height, &wsp, !plane) +
|
||||
sliceHeaderCost(&wsp, lambda, !!plane);
|
||||
COPY4_IF_LT(minscore, s, minscale, curScale, minoff, off, bFound, true);
|
||||
|
||||
/* Don't check any more offsets if the previous one had a lower cost than the current one */
|
||||
if (minoff == startOffset && off != startOffset)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Use a smaller luma denominator if possible */
|
||||
if (!(plane || list))
|
||||
{
|
||||
while (mindenom > 0 && !(minscale & 1))
|
||||
{
|
||||
mindenom--;
|
||||
minscale >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
|
||||
{
|
||||
SET_WEIGHT(weights[plane], false, 1 << denom, denom, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
SET_WEIGHT(weights[plane], true, minscale, mindenom, minoff);
|
||||
}
|
||||
}
|
||||
|
||||
if (weights[0].bPresentFlag)
|
||||
{
|
||||
// Make sure both chroma channels match
|
||||
if (weights[1].bPresentFlag != weights[2].bPresentFlag)
|
||||
{
|
||||
if (weights[1].bPresentFlag)
|
||||
weights[2] = weights[1];
|
||||
else
|
||||
weights[1] = weights[2];
|
||||
}
|
||||
}
|
||||
|
||||
lumaDenom = weights[0].log2WeightDenom;
|
||||
chromaDenom = weights[1].log2WeightDenom;
|
||||
|
||||
/* reset weight states */
|
||||
for (int ref = 1; ref < slice.m_numRefIdx[list]; ref++)
|
||||
{
|
||||
SET_WEIGHT(wp[list][ref][0], false, 1 << lumaDenom, lumaDenom, 0);
|
||||
SET_WEIGHT(wp[list][ref][1], false, 1 << chromaDenom, chromaDenom, 0);
|
||||
SET_WEIGHT(wp[list][ref][2], false, 1 << chromaDenom, chromaDenom, 0);
|
||||
}
|
||||
}
|
||||
|
||||
X265_FREE(mcbuf);
|
||||
|
||||
memcpy(slice.m_weightPredTable, wp, sizeof(WeightParam) * 2 * MAX_NUM_REF * 3);
|
||||
|
||||
if (param.logLevel >= X265_LOG_FULL)
|
||||
{
|
||||
char buf[1024];
|
||||
int p = 0;
|
||||
bool bWeighted = false;
|
||||
|
||||
p = sprintf(buf, "poc: %d weights:", slice.m_poc);
|
||||
int numPredDir = slice.isInterP() ? 1 : 2;
|
||||
for (int list = 0; list < numPredDir; list++)
|
||||
{
|
||||
WeightParam* w = &wp[list][0][0];
|
||||
if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
|
||||
{
|
||||
bWeighted = true;
|
||||
p += sprintf(buf + p, " [L%d:R0 ", list);
|
||||
if (w[0].bPresentFlag)
|
||||
p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
|
||||
if (w[1].bPresentFlag)
|
||||
p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
|
||||
if (w[2].bPresentFlag)
|
||||
p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
|
||||
p += sprintf(buf + p, "]");
|
||||
}
|
||||
}
|
||||
|
||||
if (bWeighted)
|
||||
{
|
||||
if (p < 80) // pad with spaces to ensure progress line overwritten
|
||||
sprintf(buf + p, "%*s", 80 - p, " ");
|
||||
x265_log(¶m, X265_LOG_FULL, "%s\n", buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue