1
0
Fork 0
mirror of https://github.com/AquariaOSE/Aquaria.git synced 2025-01-24 17:26:41 +00:00

tbsp: split into alloc, prep, generation steps

This commit is contained in:
fgenesis 2024-07-05 02:10:27 +02:00
parent fc3580ca64
commit 7d2f961573
3 changed files with 181 additions and 97 deletions

View file

@ -1006,6 +1006,7 @@ void AnimationEditor::editStripKey()
BoneKeyframe *bk = a->getKeyframe(currentKey)->getBoneKeyframe(editingBone->boneIdx);
assert(bk->controlpoints.size() == interp->bsp.ctrlX() * interp->bsp.ctrlY());
assert(!splinegrid);
splinegrid = new SplineGrid;
DynamicRenderGrid *rgrid = splinegrid->resize(interp->bsp.ctrlX(), interp->bsp.ctrlY(), grid->width(), grid->height(), interp->bsp.degX(), interp->bsp.degY());

View file

@ -119,11 +119,12 @@ void BSpline2D::resize(size_t cx, size_t cy, unsigned degx, unsigned degy)
const size_t interpStorageSizeX = tbsp__getInterpolatorStorageSize(cx, cx);
const size_t interpStorageSizeY = tbsp__getInterpolatorStorageSize(cy, cy);
const size_t interpInitTempSize = tbsp__getInterpolatorInitTempSize(maxCp, maxCp);
const size_t interpRefreshTempSize = tbsp__getInterpolatorRefreshTempSize(maxCp, maxCp);
const size_t interpStorageNeeded = interpStorageSizeX + interpStorageSizeY;
if(_ext && _ext->capacity < interpStorageNeeded)
{
_ext->~Extended();
free(_ext);
_ext = NULL;
}
@ -139,13 +140,17 @@ void BSpline2D::resize(size_t cx, size_t cy, unsigned degx, unsigned degy)
if(_ext)
{
// Some extra temp memory is required during init, but can be discarded right afterward
std::vector<float> interptmp(interpInitTempSize);
std::vector<float> interptmp(interpRefreshTempSize);
float *mx = _ext->floats();
float *my = mx + interpStorageSizeX;
_ext->interp.x = tbsp::initInterpolator(mx, &interptmp[0], degx, cx, cx, &knotsX[0]);
_ext->interp.y = tbsp::initInterpolator(my, &interptmp[0], degy, cy, cy, &knotsY[0]);
_ext->interp.x.init(mx, cx, cx);
_ext->interp.x.refresh(&interptmp[0], &knotsX[0], degx);
_ext->interp.y.init(my, cy, cy);
_ext->interp.y.refresh(&interptmp[0], &knotsY[0], degy);
_ext->tmp2d.init(cx, cy);
}
}
@ -168,7 +173,7 @@ void BSpline2D::recalc(Vector* dst, size_t xres, size_t yres, const Vector *cont
for(size_t i = 0; i < _cpy; ++i, src += _cpx)
tmpin[i] = *src;
tbsp::generateControlPoints<Vector>(&tmpcp[0], NULL, _ext->interp.y, &tmpin[0]);
_ext->interp.y.generateControlPoints<Vector>(&tmpcp[0], NULL, &tmpin[0]);
for(size_t y = 0; y < _cpy; ++y)
tmp2d(x, y) = tmpcp[y];
@ -179,7 +184,7 @@ void BSpline2D::recalc(Vector* dst, size_t xres, size_t yres, const Vector *cont
{
Vector *row = tmp2d.row(y);
memcpy(&tmpin[0], row, sizeof(Vector) * _cpx);
tbsp::generateControlPoints<Vector>(row, NULL, _ext->interp.x, &tmpin[0]);
_ext->interp.x.generateControlPoints<Vector>(row, NULL, &tmpin[0]);
}
controlpoints = tmp2d.data();

View file

@ -17,7 +17,7 @@ Requirements:
Design notes:
- C++98 compatible. Stand-alone.
- No memory allocation; all memory is caller-controlled.
- No memory allocation; all memory is caller-controlled and caller-owned.
In cases where this is needed, the caller needs to pass appropriately-sized working memory.
@ -298,6 +298,8 @@ namespace detail {
struct Size2d
{
size_t w, h;
inline bool operator==(const Size2d& o) const { return w == o.w && h == o.h; }
};
// Wrap any pointer to access it like a vector (with proper bounds checking)
@ -460,19 +462,28 @@ struct Cholesky
// Initializes a solver in the given memory.
// On success, bumps ptr forward by the memory it needs;
// on failure, returns NULL. Note that this may also fail if A is not well-formed.
// on failure, returns NULL.
// Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h))
T *init(T *mem, const Mat& A)
T *init(T *mem, Size2d size)
{
T * const pdiag = mem + (A.size.w * A.size.h);
T * const myend = pdiag + A.size.w;
T * const pdiag = mem + (size.w * size.h);
T * const myend = pdiag + size.w;
TBSP_ASSERT(A.size.w >= A.size.h);
const size_t n = A.size.w;
L.p = mem;
L.size = size;
idiag = pdiag; // T[n]
return myend;
}
L.p = mem;
L.size = A.size;
// Sets the matrix A used for solving later. The solver is preconditioned
// for that matrix so that in (A * x + b) b is the only variable.
// May fail if A is not well formed.
bool refresh(const Mat& A)
{
TBSP_ASSERT(A.size == L.size);
TBSP_ASSERT(A.size.w >= A.size.h);
const size_t n = L.size.w;
// Fill the lower left triangle, storing the diagonal separately
for(size_t y = 0; y < n; ++y)
@ -491,7 +502,7 @@ struct Cholesky
else
{
TBSP_ASSERT(0 && "Cholesky decomposition failed");
return NULL;
return false;
}
}
}
@ -505,7 +516,7 @@ struct Cholesky
row[x] = zero;
}
return myend;
return true;
}
// Solves x for A * x + b. Both b and x must have length n.
@ -545,21 +556,28 @@ struct LUDecomp
// Initializes a solver in the given memory.
// On success, bumps ptr forward by the memory it needs;
// on failure, returns NULL. Note that this may also fail if A is not well-formed.
// on failure, returns NULL.
// Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h))
T *init(T *mem, const Mat& A)
T *init(T *mem, Size2d size)
{
T * const pdiag = mem + (A.size.w * A.size.h);
T * const myend = pdiag + A.size.w;
T * const pdiag = mem + (size.w * size.h);
T * const myend = pdiag + size.w;
TBSP_ASSERT(A.size.w == A.size.h);
TBSP_ASSERT(size.w == size.h);
MatrixAcc<T> LU;
LU.p = mem;
LU.size = size;
idiag = pdiag;
LU.size = A.size;
return myend;
}
// Sets the matrix A used for solving later. The solver is preconditioned
// for that matrix so that in (A * x + b) b is the only variable.
void refresh(const Mat& A)
{
TBSP_ASSERT(A.size == LU.size);
const size_t n = A.size.w;
const size_t n = LU.size.w;
// Note: This doesn't do pivoting.
for (size_t y = 0; y < n; ++y)
@ -581,9 +599,6 @@ struct LUDecomp
LU(y, x) = idiagval * e;
}
}
this->LU = LU;
return myend;
}
// Solves x for A * x + b. Both b and x must have length n.
@ -670,14 +685,20 @@ void computeCoeffVector(T * TBSP_RESTRICT Nrow, size_t numcp, T t01, const T * T
}
}
template<typename T>
T* computeCoeffMatrix(MatrixAcc<T>& N, T *mem, const T *knots, size_t nump, size_t numcp, size_t degree)
T *allocateCoeffMatrix(MatrixAcc<T>& N, T *mem, size_t nump, size_t numcp)
{
N.size.w = numcp;
N.size.h = nump;
N.p = mem;
T * const pend = N.p + (nump * numcp);
return mem + (nump * numcp);
}
template<typename T>
void computeCoeffMatrix(MatrixAcc<T>& N, const T *knots, size_t degree)
{
const size_t numcp = N.size.w;
const size_t nump = N.size.h;
const T invsz = T(1) / T(nump - 1);
@ -689,34 +710,95 @@ T* computeCoeffMatrix(MatrixAcc<T>& N, T *mem, const T *knots, size_t nump, size
typename MatrixAcc<T>::RowAcc row = N.row(i);
computeCoeffVector(&row[0], numcp, t01, knots, degree); // row 0 stores all coefficients for _t[0], etc
}
}
return pend;
template<typename T>
Size2d getLeastSquaresSize(const MatrixAcc<T>& N)
{
Size2d sz;
sz.w = N.size.w - 2;
sz.h = N.size.w - 2; // (h = w) is intended, M is a square matrix!
return sz;
}
// Generate least-squares approximator for spline approximation
template<typename T>
T* generateLeastSquares(MatrixAcc<T>& M, T *mem, const MatrixAcc<T>& N)
MatrixAcc<T> generateLeastSquares(T *mem, const MatrixAcc<T>& N)
{
MatrixAcc<T> M;
M.p = mem;
M.size.w = N.size.w - 2;
M.size.h = N.size.w - 2; // (h = w) is intended, M is a square matrix!
M.size == getLeastSquaresSize(N);
T * const pend = M.p + (M.size.w * M.size.h);
matMultCenterCutTransposeWithSelf(M, N);
return pend;
return M;
}
} // end namespace detail
// --------------------------------------------------
// Calculate how many elements of type T are needed as storage for the interpolator
// These should ideally be constexpr, but we want C++98 compatibility.
// For Interpolator::init()
#define tbsp__getInterpolatorStorageSize(numControlPoints, numPoints) \
( ((numControlPoints) == (numControlPoints)) \
? (((numControlPoints)+1) * (numPoints)) /* solver(N) */ \
: ( \
((numControlPoints) * (numPoints)) /* N */ \
+ (((numControlPoints)-1) * ((numControlPoints)-2)) /* solver(M) */ \
) \
)
// For Interpolator::refresh()
#define tbsp__getInterpolatorRefreshTempSize(numControlPoints, numPoints) \
( (numControlPoints) == (numControlPoints) \
? ((numControlPoints) * (numPoints)) /* N */ \
: (((numControlPoints)-2) * ((numControlPoints)-2)) /* M */ \
)
// For Interpolator::generateControlPoints()
#define tbsp_getInterpolatorWorkTempSize(numControlPoints, numPoints) \
( ((numControlPoints) == (numPoints)) ? 0 : ((numControlPoints) - 2) )
// --------------------------------------------------
// One interpolator is precomputed for a specific number of input points and output control points
template<typename T>
struct Interpolator
{
inline Interpolator() : numcp(0), nump(0) {}
// Inits an Interpolator in the passed mem[]. This memory must outlive the Interpolator
// and be at least tbsp__getInterpolatorStorageSize() elements.
// You may move or copy the memory elsewhere and then call init() again to update internal
// pointers to point to the new location.
// This does not access mem at all, it only stores pointers.
// When the number of points or control points change, the memory requirements change too;
// in this case you must re-init(), generate a new knot vector, and refresh().
T *init(T *mem, size_t nump, size_t numcp);
// When the knot vector or degree changes, call this to update the internal matrix solver.
// Must be called before generateControlPoints() can be used.
// tmp[] must have space for at least tbsp__getInterpolatorRefreshTempSize() elements
// and can be discarded after the call returns.
bool refresh(T * TBSP_RESTRICT const tmp, const T * TBSP_RESTRICT knots, unsigned degree);
// Given a set of points P[nump], generate set of control points cp[numcp].
// nump, numcp are defined by the numbers passed to init().
// workmem can be NULL if only interpolation is used (#controlpoints == #points).
// Approximation (#controlpoints < #points) needs extra working memory though,
// so you must pass at least tbsp_getInterpolatorWorkTempSize() elements of P.
// Returns how many control points were generated, for convenience.
// The working memory can be discarded after the call returns.
template<typename P>
size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const P *points) const;
// ------------------------
protected:
size_t numcp, nump;
union
{
@ -733,120 +815,116 @@ struct Interpolator
#endif
};
// Calculate how many elements of type T are needed as storage for the interpolator
// This should ideally be constexpr, but we want C++98 compatibility.
#define tbsp__getInterpolatorStorageSize(numControlPoints, numPoints) \
( ((numControlPoints) == (numControlPoints)) \
? (((numControlPoints)+1) * (numPoints)) /* solver(N) */ \
: ( \
((numControlPoints) * (numPoints)) /* N */ \
+ (((numControlPoints)-1) * ((numControlPoints)-2)) /* solver(M) */ \
) \
)
#define tbsp__getInterpolatorInitTempSize(numControlPoints, numPoints) \
( (numControlPoints) == (numControlPoints) \
? ((numControlPoints) * (numPoints)) /* N */ \
: (((numControlPoints)-2) * ((numControlPoints)-2)) /* M */ \
)
template<typename T>
Interpolator<T> initInterpolator(T * TBSP_RESTRICT const mem, T * TBSP_RESTRICT const tmp, size_t degree, size_t nump, size_t numcp, const T * TBSP_RESTRICT knots)
T *Interpolator<T>::init(T * TBSP_RESTRICT const mem, size_t nump, size_t numcp)
{
Interpolator<T> interp;
TBSP_ASSERT(mem);
// Calling this with 2 points is pointless, < 2 is mathematically impossible
if(nump < 2 || numcp < 2)
return interp;
if(nump < 2 || numcp < 2 || !mem)
return false;
// Can only generate less or equal control points than points
TBSP_ASSERT(numcp <= nump);
if(!(numcp <= nump))
return interp;
return false;
T *p = mem;
if(nump == numcp)
{
detail::allocateCoeffMatrix<T>(N, NULL, nump, numcp); // just to get the size
p = solver.ludecomp.init(p, N.size);
}
else
{
p = detail::allocateCoeffMatrix<T>(N, p, nump, numcp);
p = solver.cholesky.init(p, detail::getLeastSquaresSize(N));
}
if(p)
{
TBSP_ASSERT(p <= mem + tbsp__getInterpolatorStorageSize(numcp, nump));
this->numcp = numcp;
this->nump = nump;
}
return p;
}
// Need storage memory
TBSP_ASSERT(mem && tmp);
template<typename T>
bool Interpolator<T>::refresh(T * TBSP_RESTRICT const tmp, const T * TBSP_RESTRICT knots, unsigned degree)
{
// Need temporary memory
TBSP_ASSERT(tmp);
const size_t numcp = N.size.w;
const size_t nump = N.size.h;
TBSP_ASSERT(numcp && nump);
T *p = numcp == nump ? tmp : mem;
p = detail::computeCoeffMatrix(interp.N, p, knots, nump, numcp, degree);
if(!p)
return interp;
TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorInitTempSize(numcp, nump));
bool ret = false;
if(nump == numcp)
{
// N allocated in tmp, solver(N) in mem
T *p = detail::allocateCoeffMatrix(N, tmp, nump, numcp);
TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorRefreshTempSize(numcp, nump));
detail::computeCoeffMatrix(N, knots, degree);
// N is point-symmetric, ie. NOT diagonally symmetric.
// This means we can't use Cholesky decomposition, but LU decomposition is fine.
// Note: Cholesky decomposition will at first appear to work,
// but the solutions calculated with it are wrong. Don't use this here.
p = interp.solver.ludecomp.init(mem, interp.N);
solver.ludecomp.refresh(N);
// N is no longer needed
// solver(N) stays
interp.N.p = NULL;
interp.N.size.w = interp.N.size.h = 0;
N.p = NULL;
ret = true;
}
else
{
// N allocated in mem, solver(M) in mem, M in tmp
detail::computeCoeffMatrix(N, knots, degree);
// This calculates M = T(N') x N', where N' is N with its borders removed.
// A nice property is that M is diagonally symmetric,
// means it can be efficiently solved via cholesky decomposition.
detail::MatrixAcc<T> M;
p = detail::generateLeastSquares(M, tmp, interp.N);
if(!p)
return interp;
TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorInitTempSize(numcp, nump));
const detail::MatrixAcc<T> M = detail::generateLeastSquares(tmp, N);
TBSP_ASSERT(M.p + (M.size.w * M.size.h) <= tmp + tbsp__getInterpolatorRefreshTempSize(numcp, nump));
p = interp.solver.cholesky.init(mem, M);
ret = solver.cholesky.refresh(M);
// M is no longer needed
// solver(M) stays
// N stays
}
if(p)
{
TBSP_ASSERT(p <= mem + tbsp__getInterpolatorStorageSize(numcp, nump));
interp.numcp = numcp;
interp.nump = nump;
}
return interp;
return ret;
}
#define tbsp_getInterpolatorWorkSize(numControlPoints, numPoints) \
( ((numControlPoints) == (numPoints)) ? 0 : ((numControlPoints) - 2) )
// Pass workmem[] with at least tbsp_getInterpolatorWorkSize() elements;
// workmem can be NULL if only interpolation is used (#controlpoints == #points).
// Approximation (#controlpoints < #points) needs extra working memory though.
// Returns how many control points were generated, for convenience.
template<typename P, typename T>
size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const Interpolator<T>& interp, const P *points)
template<typename T> template<typename P>
size_t Interpolator<T>::generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const P *points) const
{
const size_t numcp = interp.numcp;
if(numcp == interp.nump)
if(numcp == nump)
{
// This does not need extra working memory
interp.solver.ludecomp.solve(cp, points);
solver.ludecomp.solve(cp, points);
}
else
{
// Approximate points with less control points, this is more costly
TBSP_ASSERT(workmem); // ... and needs extra memory
const size_t h = numcp - 1;
const size_t n = interp.nump - 1;
const size_t n = nump - 1;
const P p0 = points[0];
const P pn = points[n];
// Wrap the least squares estimator somehow together with the points to approximate...
// Unfortunately I forgot how this works, so no explanation of mathemagics here, sorry :<
const detail::MatrixAcc<T> N = interp.N;
const P initial = points[1] - (p0 * N(0,1)) - (pn * N(h,1));
for(size_t i = 1; i < h; ++i)
{
@ -864,8 +942,8 @@ size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const Interpolat
cp[h] = pn;
// Solve for all points that are not endpoints
TBSP_ASSERT(interp.solver.cholesky.L.size.w == h - 1);
interp.solver.cholesky.solve(cp + 1, workmem);
TBSP_ASSERT(solver.cholesky.L.size.w == h - 1);
solver.cholesky.solve(cp + 1, workmem);
}
return numcp;
}