forked from mirror/libbpg
441 lines
14 KiB
C++
441 lines
14 KiB
C++
/*****************************************************************************
|
|
* Copyright (C) 2013 x265 project
|
|
*
|
|
* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
|
*
|
|
* This program is also available under a commercial proprietary license.
|
|
* For more information, contact us at license @ x265.com.
|
|
*****************************************************************************/
|
|
|
|
#ifndef X265_COMMON_H
|
|
#define X265_COMMON_H
|
|
|
|
#include <algorithm>
|
|
#include <climits>
|
|
#include <cmath>
|
|
#include <cstdarg>
|
|
#include <cstddef>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <cctype>
|
|
#include <ctime>
|
|
|
|
#include <stdint.h>
|
|
#include <memory.h>
|
|
#include <assert.h>
|
|
|
|
#include "x265.h"
|
|
|
|
#if ENABLE_PPA && ENABLE_VTUNE
|
|
#error "PPA and VTUNE cannot both be enabled. Disable one of them."
|
|
#endif
|
|
#if ENABLE_PPA
|
|
#include "profile/PPA/ppa.h"
|
|
#define ProfileScopeEvent(x) PPAScopeEvent(x)
|
|
#define THREAD_NAME(n,i)
|
|
#define PROFILE_INIT() PPA_INIT()
|
|
#define PROFILE_PAUSE()
|
|
#define PROFILE_RESUME()
|
|
#elif ENABLE_VTUNE
|
|
#include "profile/vtune/vtune.h"
|
|
#define ProfileScopeEvent(x) VTuneScopeEvent _vtuneTask(x)
|
|
#define THREAD_NAME(n,i) vtuneSetThreadName(n, i)
|
|
#define PROFILE_INIT() vtuneInit()
|
|
#define PROFILE_PAUSE() __itt_pause()
|
|
#define PROFILE_RESUME() __itt_resume()
|
|
#else
|
|
#define ProfileScopeEvent(x)
|
|
#define THREAD_NAME(n,i)
|
|
#define PROFILE_INIT()
|
|
#define PROFILE_PAUSE()
|
|
#define PROFILE_RESUME()
|
|
#endif
|
|
|
|
#define FENC_STRIDE 64
|
|
#define NUM_INTRA_MODE 35
|
|
|
|
#if defined(__GNUC__)
|
|
#define ALIGN_VAR_8(T, var) T var __attribute__((aligned(8)))
|
|
#define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
|
|
#define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
|
|
|
|
#if defined(__MINGW32__)
|
|
#define fseeko fseeko64
|
|
#endif
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
#define ALIGN_VAR_8(T, var) __declspec(align(8)) T var
|
|
#define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
|
|
#define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
|
|
#define fseeko _fseeki64
|
|
|
|
#endif // if defined(__GNUC__)
|
|
|
|
#if HAVE_INT_TYPES_H
|
|
#define __STDC_FORMAT_MACROS
|
|
#include <inttypes.h>
|
|
#define X265_LL "%" PRIu64
|
|
#else
|
|
#define X265_LL "%lld"
|
|
#endif
|
|
|
|
#if _DEBUG && defined(_MSC_VER)
|
|
#define DEBUG_BREAK() __debugbreak()
|
|
#elif __APPLE_CC__
|
|
#define DEBUG_BREAK() __builtin_trap()
|
|
#else
|
|
#define DEBUG_BREAK() abort()
|
|
#endif
|
|
|
|
/* If compiled with CHECKED_BUILD perform run-time checks and log any that
|
|
* fail, both to stderr and to a file */
|
|
#if CHECKED_BUILD || _DEBUG
|
|
namespace X265_NS { extern int g_checkFailures; }
|
|
#define X265_CHECK(expr, ...) if (!(expr)) { \
|
|
x265_log(NULL, X265_LOG_ERROR, __VA_ARGS__); \
|
|
FILE *fp = fopen("x265_check_failures.txt", "a"); \
|
|
if (fp) { fprintf(fp, "%s:%d\n", __FILE__, __LINE__); fprintf(fp, __VA_ARGS__); fclose(fp); } \
|
|
g_checkFailures++; DEBUG_BREAK(); \
|
|
}
|
|
#if _MSC_VER
|
|
#pragma warning(disable: 4127) // some checks have constant conditions
|
|
#endif
|
|
#else
|
|
#define X265_CHECK(expr, ...)
|
|
#endif
|
|
|
|
#if HIGH_BIT_DEPTH
|
|
typedef uint16_t pixel;
|
|
typedef uint32_t sum_t;
|
|
typedef uint64_t sum2_t;
|
|
typedef uint64_t pixel4;
|
|
typedef int64_t ssum2_t;
|
|
#else
|
|
typedef uint8_t pixel;
|
|
typedef uint16_t sum_t;
|
|
typedef uint32_t sum2_t;
|
|
typedef uint32_t pixel4;
|
|
typedef int32_t ssum2_t; // Signed sum
|
|
#endif // if HIGH_BIT_DEPTH
|
|
|
|
#if X265_DEPTH <= 10
|
|
typedef uint32_t sse_ret_t;
|
|
#else
|
|
typedef uint64_t sse_ret_t;
|
|
#endif
|
|
|
|
#ifndef NULL
|
|
#define NULL 0
|
|
#endif
|
|
|
|
#define MAX_UINT 0xFFFFFFFFU // max. value of unsigned 32-bit integer
|
|
#define MAX_INT 2147483647 // max. value of signed 32-bit integer
|
|
#define MAX_INT64 0x7FFFFFFFFFFFFFFFLL // max. value of signed 64-bit integer
|
|
#define MAX_DOUBLE 1.7e+308 // max. value of double-type value
|
|
|
|
#define QP_MIN 0
|
|
#define QP_MAX_SPEC 51 /* max allowed signaled QP in HEVC */
|
|
#define QP_MAX_MAX 69 /* max allowed QP to be output by rate control */
|
|
|
|
#define MIN_QPSCALE 0.21249999999999999
|
|
#define MAX_MAX_QPSCALE 615.46574234477100
|
|
|
|
#define BITS_FOR_POC 8
|
|
|
|
template<typename T>
|
|
inline T x265_min(T a, T b) { return a < b ? a : b; }
|
|
|
|
template<typename T>
|
|
inline T x265_max(T a, T b) { return a > b ? a : b; }
|
|
|
|
template<typename T>
|
|
inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); }
|
|
|
|
template<typename T> /* clip to pixel range, 0..255 or 0..1023 */
|
|
inline pixel x265_clip(T x) { return (pixel)x265_min<T>(T((1 << X265_DEPTH) - 1), x265_max<T>(T(0), x)); }
|
|
|
|
typedef int16_t coeff_t; // transform coefficient
|
|
|
|
#define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
#define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
|
|
#define COPY2_IF_LT(x, y, a, b) \
|
|
if ((y) < (x)) \
|
|
{ \
|
|
(x) = (y); \
|
|
(a) = (b); \
|
|
}
|
|
#define COPY3_IF_LT(x, y, a, b, c, d) \
|
|
if ((y) < (x)) \
|
|
{ \
|
|
(x) = (y); \
|
|
(a) = (b); \
|
|
(c) = (d); \
|
|
}
|
|
#define COPY4_IF_LT(x, y, a, b, c, d, e, f) \
|
|
if ((y) < (x)) \
|
|
{ \
|
|
(x) = (y); \
|
|
(a) = (b); \
|
|
(c) = (d); \
|
|
(e) = (f); \
|
|
}
|
|
#define X265_MIN3(a, b, c) X265_MIN((a), X265_MIN((b), (c)))
|
|
#define X265_MAX3(a, b, c) X265_MAX((a), X265_MAX((b), (c)))
|
|
#define X265_MIN4(a, b, c, d) X265_MIN((a), X265_MIN3((b), (c), (d)))
|
|
#define X265_MAX4(a, b, c, d) X265_MAX((a), X265_MAX3((b), (c), (d)))
|
|
#define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
|
|
#define MAX_CHROMA_LAMBDA_OFFSET 36
|
|
|
|
// arbitrary, but low because SATD scores are 1/4 normal
|
|
#define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
|
|
#define X265_LOOKAHEAD_MAX 250
|
|
|
|
// Use the same size blocks as x264. Using larger blocks seems to give artificially
|
|
// high cost estimates (intra and inter both suffer)
|
|
#define X265_LOWRES_CU_SIZE 8
|
|
#define X265_LOWRES_CU_BITS 3
|
|
|
|
#define X265_MALLOC(type, count) (type*)x265_malloc(sizeof(type) * (count))
|
|
#define X265_FREE(ptr) x265_free(ptr)
|
|
#define CHECKED_MALLOC(var, type, count) \
|
|
{ \
|
|
var = (type*)x265_malloc(sizeof(type) * (count)); \
|
|
if (!var) \
|
|
{ \
|
|
x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
|
|
goto fail; \
|
|
} \
|
|
}
|
|
#define CHECKED_MALLOC_ZERO(var, type, count) \
|
|
{ \
|
|
var = (type*)x265_malloc(sizeof(type) * (count)); \
|
|
if (var) \
|
|
memset((void*)var, 0, sizeof(type) * (count)); \
|
|
else \
|
|
{ \
|
|
x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
|
|
goto fail; \
|
|
} \
|
|
}
|
|
|
|
#if defined(_MSC_VER)
|
|
#define X265_LOG2F(x) (logf((float)(x)) * 1.44269504088896405f)
|
|
#define X265_LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172)
|
|
#else
|
|
#define X265_LOG2F(x) log2f(x)
|
|
#define X265_LOG2(x) log2(x)
|
|
#endif
|
|
|
|
#define NUM_CU_DEPTH 4 // maximum number of CU depths
|
|
#define NUM_FULL_DEPTH 5 // maximum number of full depths
|
|
#define MIN_LOG2_CU_SIZE 3 // log2(minCUSize)
|
|
#define MAX_LOG2_CU_SIZE 6 // log2(maxCUSize)
|
|
#define MIN_CU_SIZE (1 << MIN_LOG2_CU_SIZE) // minimum allowable size of CU
|
|
#define MAX_CU_SIZE (1 << MAX_LOG2_CU_SIZE) // maximum allowable size of CU
|
|
|
|
#define LOG2_UNIT_SIZE 2 // log2(unitSize)
|
|
#define UNIT_SIZE (1 << LOG2_UNIT_SIZE) // unit size of CU partition
|
|
|
|
#define MAX_NUM_PARTITIONS 256
|
|
#define NUM_4x4_PARTITIONS (1U << (g_unitSizeDepth << 1)) // number of 4x4 units in max CU size
|
|
|
|
#define MIN_PU_SIZE 4
|
|
#define MIN_TU_SIZE 4
|
|
#define MAX_NUM_SPU_W (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
|
|
|
|
#define MAX_LOG2_TR_SIZE 5
|
|
#define MAX_LOG2_TS_SIZE 2 // TODO: RExt
|
|
#define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
|
|
#define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
|
|
|
|
#define COEF_REMAIN_BIN_REDUCTION 3 // indicates the level at which the VLC
|
|
// transitions from Golomb-Rice to TU+EG(k)
|
|
|
|
#define SBH_THRESHOLD 4 // fixed sign bit hiding controlling threshold
|
|
|
|
#define C1FLAG_NUMBER 8 // maximum number of largerThan1 flag coded in one chunk: 16 in HM5
|
|
#define C2FLAG_NUMBER 1 // maximum number of largerThan2 flag coded in one chunk: 16 in HM5
|
|
|
|
#define SAO_ENCODING_RATE 0.75
|
|
#define SAO_ENCODING_RATE_CHROMA 0.5
|
|
|
|
#define MLS_GRP_NUM 64 // Max number of coefficient groups, max(16, 64)
|
|
#define MLS_CG_SIZE 4 // Coefficient group size of 4x4
|
|
#define MLS_CG_BLK_SIZE (MLS_CG_SIZE * MLS_CG_SIZE)
|
|
#define MLS_CG_LOG2_SIZE 2
|
|
|
|
#define QUANT_IQUANT_SHIFT 20 // Q(QP%6) * IQ(QP%6) = 2^20
|
|
#define QUANT_SHIFT 14 // Q(4) = 2^14
|
|
#define SCALE_BITS 15 // Inherited from TMuC, presumably for fractional bit estimates in RDOQ
|
|
#define MAX_TR_DYNAMIC_RANGE 15 // Maximum transform dynamic range (excluding sign bit)
|
|
|
|
#define SHIFT_INV_1ST 7 // Shift after first inverse transform stage
|
|
#define SHIFT_INV_2ND 12 // Shift after second inverse transform stage
|
|
|
|
#define AMVP_DECIMATION_FACTOR 4
|
|
|
|
#define SCAN_SET_SIZE 16
|
|
#define LOG2_SCAN_SET_SIZE 4
|
|
|
|
#define ALL_IDX -1
|
|
#define PLANAR_IDX 0
|
|
#define VER_IDX 26 // index for intra VERTICAL mode
|
|
#define HOR_IDX 10 // index for intra HORIZONTAL mode
|
|
#define DC_IDX 1 // index for intra DC mode
|
|
#define NUM_CHROMA_MODE 5 // total number of chroma modes
|
|
#define DM_CHROMA_IDX 36 // chroma mode index for derived from luma intra mode
|
|
|
|
#define MDCS_ANGLE_LIMIT 4 // distance from true angle that horiz or vertical scan is allowed
|
|
#define MDCS_LOG2_MAX_SIZE 3 // TUs with log2 of size greater than this can only use diagonal scan
|
|
|
|
#define MAX_NUM_REF_PICS 16 // max. number of pictures used for reference
|
|
#define MAX_NUM_REF 16 // max. number of entries in picture reference list
|
|
|
|
#define REF_NOT_VALID -1
|
|
|
|
#define AMVP_NUM_CANDS 2 // number of AMVP candidates
|
|
#define MRG_MAX_NUM_CANDS 5 // max number of final merge candidates
|
|
|
|
#define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
|
|
#define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
|
|
#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
|
|
|
|
#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform
|
|
#define MAX_NUM_TR_CATEGORIES 16 // 32, 16, 8, 4 transform categories each for luma and chroma
|
|
|
|
namespace X265_NS {
|
|
|
|
enum { SAO_NUM_OFFSET = 4 };
|
|
|
|
enum SaoMergeMode
|
|
{
|
|
SAO_MERGE_NONE,
|
|
SAO_MERGE_LEFT,
|
|
SAO_MERGE_UP
|
|
};
|
|
|
|
struct SaoCtuParam
|
|
{
|
|
SaoMergeMode mergeMode;
|
|
int typeIdx;
|
|
uint32_t bandPos; // BO band position
|
|
int offset[SAO_NUM_OFFSET];
|
|
|
|
void reset()
|
|
{
|
|
mergeMode = SAO_MERGE_NONE;
|
|
typeIdx = -1;
|
|
bandPos = 0;
|
|
offset[0] = 0;
|
|
offset[1] = 0;
|
|
offset[2] = 0;
|
|
offset[3] = 0;
|
|
}
|
|
};
|
|
|
|
struct SAOParam
|
|
{
|
|
SaoCtuParam* ctuParam[3];
|
|
bool bSaoFlag[2];
|
|
int numCuInWidth;
|
|
|
|
SAOParam()
|
|
{
|
|
for (int i = 0; i < 3; i++)
|
|
ctuParam[i] = NULL;
|
|
}
|
|
|
|
~SAOParam()
|
|
{
|
|
delete[] ctuParam[0];
|
|
delete[] ctuParam[1];
|
|
delete[] ctuParam[2];
|
|
}
|
|
};
|
|
|
|
/* Stores inter analysis data for a single frame */
|
|
struct analysis_inter_data
|
|
{
|
|
int32_t* ref;
|
|
uint8_t* depth;
|
|
uint8_t* modes;
|
|
uint32_t* bestMergeCand;
|
|
};
|
|
|
|
/* Stores intra analysis data for a single frame. This struct needs better packing */
|
|
struct analysis_intra_data
|
|
{
|
|
uint8_t* depth;
|
|
uint8_t* modes;
|
|
char* partSizes;
|
|
uint8_t* chromaModes;
|
|
};
|
|
|
|
enum TextType
|
|
{
|
|
TEXT_LUMA = 0, // luma
|
|
TEXT_CHROMA_U = 1, // chroma U
|
|
TEXT_CHROMA_V = 2, // chroma V
|
|
MAX_NUM_COMPONENT = 3
|
|
};
|
|
|
|
// coefficient scanning type used in ACS
|
|
enum ScanType
|
|
{
|
|
SCAN_DIAG = 0, // up-right diagonal scan
|
|
SCAN_HOR = 1, // horizontal first scan
|
|
SCAN_VER = 2, // vertical first scan
|
|
NUM_SCAN_TYPE = 3
|
|
};
|
|
|
|
enum SignificanceMapContextType
|
|
{
|
|
CONTEXT_TYPE_4x4 = 0,
|
|
CONTEXT_TYPE_8x8 = 1,
|
|
CONTEXT_TYPE_NxN = 2,
|
|
CONTEXT_NUMBER_OF_TYPES = 3
|
|
};
|
|
|
|
/* located in pixel.cpp */
|
|
void extendPicBorder(pixel* recon, intptr_t stride, int width, int height, int marginX, int marginY);
|
|
|
|
/* located in common.cpp */
|
|
int64_t x265_mdate(void);
|
|
#define x265_log(param, ...) general_log(param, "x265", __VA_ARGS__)
|
|
void general_log(const x265_param* param, const char* caller, int level, const char* fmt, ...);
|
|
int x265_exp2fix8(double x);
|
|
|
|
double x265_ssim2dB(double ssim);
|
|
double x265_qScale2qp(double qScale);
|
|
double x265_qp2qScale(double qp);
|
|
uint32_t x265_picturePlaneSize(int csp, int width, int height, int plane);
|
|
|
|
void* x265_malloc(size_t size);
|
|
void x265_free(void *ptr);
|
|
char* x265_slurp_file(const char *filename);
|
|
|
|
/* located in primitives.cpp */
|
|
void x265_setup_primitives(x265_param* param);
|
|
void x265_report_simd(x265_param* param);
|
|
}
|
|
|
|
#include "constants.h"
|
|
|
|
#endif // ifndef X265_COMMON_H
|