mirror of
https://github.com/AquariaOSE/Aquaria.git
synced 2025-01-24 17:26:41 +00:00
tbsp: split into alloc, prep, generation steps
This commit is contained in:
parent
fc3580ca64
commit
7d2f961573
3 changed files with 181 additions and 97 deletions
|
@ -1006,6 +1006,7 @@ void AnimationEditor::editStripKey()
|
||||||
|
|
||||||
BoneKeyframe *bk = a->getKeyframe(currentKey)->getBoneKeyframe(editingBone->boneIdx);
|
BoneKeyframe *bk = a->getKeyframe(currentKey)->getBoneKeyframe(editingBone->boneIdx);
|
||||||
assert(bk->controlpoints.size() == interp->bsp.ctrlX() * interp->bsp.ctrlY());
|
assert(bk->controlpoints.size() == interp->bsp.ctrlX() * interp->bsp.ctrlY());
|
||||||
|
assert(!splinegrid);
|
||||||
|
|
||||||
splinegrid = new SplineGrid;
|
splinegrid = new SplineGrid;
|
||||||
DynamicRenderGrid *rgrid = splinegrid->resize(interp->bsp.ctrlX(), interp->bsp.ctrlY(), grid->width(), grid->height(), interp->bsp.degX(), interp->bsp.degY());
|
DynamicRenderGrid *rgrid = splinegrid->resize(interp->bsp.ctrlX(), interp->bsp.ctrlY(), grid->width(), grid->height(), interp->bsp.degX(), interp->bsp.degY());
|
||||||
|
|
|
@ -119,11 +119,12 @@ void BSpline2D::resize(size_t cx, size_t cy, unsigned degx, unsigned degy)
|
||||||
|
|
||||||
const size_t interpStorageSizeX = tbsp__getInterpolatorStorageSize(cx, cx);
|
const size_t interpStorageSizeX = tbsp__getInterpolatorStorageSize(cx, cx);
|
||||||
const size_t interpStorageSizeY = tbsp__getInterpolatorStorageSize(cy, cy);
|
const size_t interpStorageSizeY = tbsp__getInterpolatorStorageSize(cy, cy);
|
||||||
const size_t interpInitTempSize = tbsp__getInterpolatorInitTempSize(maxCp, maxCp);
|
const size_t interpRefreshTempSize = tbsp__getInterpolatorRefreshTempSize(maxCp, maxCp);
|
||||||
const size_t interpStorageNeeded = interpStorageSizeX + interpStorageSizeY;
|
const size_t interpStorageNeeded = interpStorageSizeX + interpStorageSizeY;
|
||||||
|
|
||||||
if(_ext && _ext->capacity < interpStorageNeeded)
|
if(_ext && _ext->capacity < interpStorageNeeded)
|
||||||
{
|
{
|
||||||
|
_ext->~Extended();
|
||||||
free(_ext);
|
free(_ext);
|
||||||
_ext = NULL;
|
_ext = NULL;
|
||||||
}
|
}
|
||||||
|
@ -139,13 +140,17 @@ void BSpline2D::resize(size_t cx, size_t cy, unsigned degx, unsigned degy)
|
||||||
if(_ext)
|
if(_ext)
|
||||||
{
|
{
|
||||||
// Some extra temp memory is required during init, but can be discarded right afterward
|
// Some extra temp memory is required during init, but can be discarded right afterward
|
||||||
std::vector<float> interptmp(interpInitTempSize);
|
std::vector<float> interptmp(interpRefreshTempSize);
|
||||||
|
|
||||||
float *mx = _ext->floats();
|
float *mx = _ext->floats();
|
||||||
float *my = mx + interpStorageSizeX;
|
float *my = mx + interpStorageSizeX;
|
||||||
|
|
||||||
_ext->interp.x = tbsp::initInterpolator(mx, &interptmp[0], degx, cx, cx, &knotsX[0]);
|
_ext->interp.x.init(mx, cx, cx);
|
||||||
_ext->interp.y = tbsp::initInterpolator(my, &interptmp[0], degy, cy, cy, &knotsY[0]);
|
_ext->interp.x.refresh(&interptmp[0], &knotsX[0], degx);
|
||||||
|
|
||||||
|
_ext->interp.y.init(my, cy, cy);
|
||||||
|
_ext->interp.y.refresh(&interptmp[0], &knotsY[0], degy);
|
||||||
|
|
||||||
_ext->tmp2d.init(cx, cy);
|
_ext->tmp2d.init(cx, cy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,7 +173,7 @@ void BSpline2D::recalc(Vector* dst, size_t xres, size_t yres, const Vector *cont
|
||||||
for(size_t i = 0; i < _cpy; ++i, src += _cpx)
|
for(size_t i = 0; i < _cpy; ++i, src += _cpx)
|
||||||
tmpin[i] = *src;
|
tmpin[i] = *src;
|
||||||
|
|
||||||
tbsp::generateControlPoints<Vector>(&tmpcp[0], NULL, _ext->interp.y, &tmpin[0]);
|
_ext->interp.y.generateControlPoints<Vector>(&tmpcp[0], NULL, &tmpin[0]);
|
||||||
|
|
||||||
for(size_t y = 0; y < _cpy; ++y)
|
for(size_t y = 0; y < _cpy; ++y)
|
||||||
tmp2d(x, y) = tmpcp[y];
|
tmp2d(x, y) = tmpcp[y];
|
||||||
|
@ -179,7 +184,7 @@ void BSpline2D::recalc(Vector* dst, size_t xres, size_t yres, const Vector *cont
|
||||||
{
|
{
|
||||||
Vector *row = tmp2d.row(y);
|
Vector *row = tmp2d.row(y);
|
||||||
memcpy(&tmpin[0], row, sizeof(Vector) * _cpx);
|
memcpy(&tmpin[0], row, sizeof(Vector) * _cpx);
|
||||||
tbsp::generateControlPoints<Vector>(row, NULL, _ext->interp.x, &tmpin[0]);
|
_ext->interp.x.generateControlPoints<Vector>(row, NULL, &tmpin[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
controlpoints = tmp2d.data();
|
controlpoints = tmp2d.data();
|
||||||
|
|
|
@ -17,7 +17,7 @@ Requirements:
|
||||||
|
|
||||||
Design notes:
|
Design notes:
|
||||||
- C++98 compatible. Stand-alone.
|
- C++98 compatible. Stand-alone.
|
||||||
- No memory allocation; all memory is caller-controlled.
|
- No memory allocation; all memory is caller-controlled and caller-owned.
|
||||||
In cases where this is needed, the caller needs to pass appropriately-sized working memory.
|
In cases where this is needed, the caller needs to pass appropriately-sized working memory.
|
||||||
|
|
||||||
|
|
||||||
|
@ -298,6 +298,8 @@ namespace detail {
|
||||||
struct Size2d
|
struct Size2d
|
||||||
{
|
{
|
||||||
size_t w, h;
|
size_t w, h;
|
||||||
|
|
||||||
|
inline bool operator==(const Size2d& o) const { return w == o.w && h == o.h; }
|
||||||
};
|
};
|
||||||
|
|
||||||
// Wrap any pointer to access it like a vector (with proper bounds checking)
|
// Wrap any pointer to access it like a vector (with proper bounds checking)
|
||||||
|
@ -460,19 +462,28 @@ struct Cholesky
|
||||||
|
|
||||||
// Initializes a solver in the given memory.
|
// Initializes a solver in the given memory.
|
||||||
// On success, bumps ptr forward by the memory it needs;
|
// On success, bumps ptr forward by the memory it needs;
|
||||||
// on failure, returns NULL. Note that this may also fail if A is not well-formed.
|
// on failure, returns NULL.
|
||||||
// Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h))
|
// Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h))
|
||||||
T *init(T *mem, const Mat& A)
|
T *init(T *mem, Size2d size)
|
||||||
{
|
{
|
||||||
T * const pdiag = mem + (A.size.w * A.size.h);
|
T * const pdiag = mem + (size.w * size.h);
|
||||||
T * const myend = pdiag + A.size.w;
|
T * const myend = pdiag + size.w;
|
||||||
|
|
||||||
TBSP_ASSERT(A.size.w >= A.size.h);
|
L.p = mem;
|
||||||
const size_t n = A.size.w;
|
L.size = size;
|
||||||
idiag = pdiag; // T[n]
|
idiag = pdiag; // T[n]
|
||||||
|
return myend;
|
||||||
|
}
|
||||||
|
|
||||||
L.p = mem;
|
// Sets the matrix A used for solving later. The solver is preconditioned
|
||||||
L.size = A.size;
|
// for that matrix so that in (A * x + b) b is the only variable.
|
||||||
|
// May fail if A is not well formed.
|
||||||
|
bool refresh(const Mat& A)
|
||||||
|
{
|
||||||
|
TBSP_ASSERT(A.size == L.size);
|
||||||
|
TBSP_ASSERT(A.size.w >= A.size.h);
|
||||||
|
|
||||||
|
const size_t n = L.size.w;
|
||||||
|
|
||||||
// Fill the lower left triangle, storing the diagonal separately
|
// Fill the lower left triangle, storing the diagonal separately
|
||||||
for(size_t y = 0; y < n; ++y)
|
for(size_t y = 0; y < n; ++y)
|
||||||
|
@ -491,7 +502,7 @@ struct Cholesky
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
TBSP_ASSERT(0 && "Cholesky decomposition failed");
|
TBSP_ASSERT(0 && "Cholesky decomposition failed");
|
||||||
return NULL;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -505,7 +516,7 @@ struct Cholesky
|
||||||
row[x] = zero;
|
row[x] = zero;
|
||||||
}
|
}
|
||||||
|
|
||||||
return myend;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Solves x for A * x + b. Both b and x must have length n.
|
// Solves x for A * x + b. Both b and x must have length n.
|
||||||
|
@ -545,21 +556,28 @@ struct LUDecomp
|
||||||
|
|
||||||
// Initializes a solver in the given memory.
|
// Initializes a solver in the given memory.
|
||||||
// On success, bumps ptr forward by the memory it needs;
|
// On success, bumps ptr forward by the memory it needs;
|
||||||
// on failure, returns NULL. Note that this may also fail if A is not well-formed.
|
// on failure, returns NULL.
|
||||||
// Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h))
|
// Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h))
|
||||||
T *init(T *mem, const Mat& A)
|
T *init(T *mem, Size2d size)
|
||||||
{
|
{
|
||||||
T * const pdiag = mem + (A.size.w * A.size.h);
|
T * const pdiag = mem + (size.w * size.h);
|
||||||
T * const myend = pdiag + A.size.w;
|
T * const myend = pdiag + size.w;
|
||||||
|
|
||||||
TBSP_ASSERT(A.size.w == A.size.h);
|
TBSP_ASSERT(size.w == size.h);
|
||||||
|
|
||||||
MatrixAcc<T> LU;
|
|
||||||
LU.p = mem;
|
LU.p = mem;
|
||||||
|
LU.size = size;
|
||||||
idiag = pdiag;
|
idiag = pdiag;
|
||||||
LU.size = A.size;
|
return myend;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sets the matrix A used for solving later. The solver is preconditioned
|
||||||
|
// for that matrix so that in (A * x + b) b is the only variable.
|
||||||
|
void refresh(const Mat& A)
|
||||||
|
{
|
||||||
|
TBSP_ASSERT(A.size == LU.size);
|
||||||
|
|
||||||
const size_t n = A.size.w;
|
const size_t n = LU.size.w;
|
||||||
|
|
||||||
// Note: This doesn't do pivoting.
|
// Note: This doesn't do pivoting.
|
||||||
for (size_t y = 0; y < n; ++y)
|
for (size_t y = 0; y < n; ++y)
|
||||||
|
@ -581,9 +599,6 @@ struct LUDecomp
|
||||||
LU(y, x) = idiagval * e;
|
LU(y, x) = idiagval * e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this->LU = LU;
|
|
||||||
|
|
||||||
return myend;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Solves x for A * x + b. Both b and x must have length n.
|
// Solves x for A * x + b. Both b and x must have length n.
|
||||||
|
@ -670,14 +685,20 @@ void computeCoeffVector(T * TBSP_RESTRICT Nrow, size_t numcp, T t01, const T * T
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T* computeCoeffMatrix(MatrixAcc<T>& N, T *mem, const T *knots, size_t nump, size_t numcp, size_t degree)
|
T *allocateCoeffMatrix(MatrixAcc<T>& N, T *mem, size_t nump, size_t numcp)
|
||||||
{
|
{
|
||||||
N.size.w = numcp;
|
N.size.w = numcp;
|
||||||
N.size.h = nump;
|
N.size.h = nump;
|
||||||
N.p = mem;
|
N.p = mem;
|
||||||
T * const pend = N.p + (nump * numcp);
|
return mem + (nump * numcp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void computeCoeffMatrix(MatrixAcc<T>& N, const T *knots, size_t degree)
|
||||||
|
{
|
||||||
|
const size_t numcp = N.size.w;
|
||||||
|
const size_t nump = N.size.h;
|
||||||
|
|
||||||
const T invsz = T(1) / T(nump - 1);
|
const T invsz = T(1) / T(nump - 1);
|
||||||
|
|
||||||
|
@ -689,34 +710,95 @@ T* computeCoeffMatrix(MatrixAcc<T>& N, T *mem, const T *knots, size_t nump, size
|
||||||
typename MatrixAcc<T>::RowAcc row = N.row(i);
|
typename MatrixAcc<T>::RowAcc row = N.row(i);
|
||||||
computeCoeffVector(&row[0], numcp, t01, knots, degree); // row 0 stores all coefficients for _t[0], etc
|
computeCoeffVector(&row[0], numcp, t01, knots, degree); // row 0 stores all coefficients for _t[0], etc
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return pend;
|
template<typename T>
|
||||||
|
Size2d getLeastSquaresSize(const MatrixAcc<T>& N)
|
||||||
|
{
|
||||||
|
Size2d sz;
|
||||||
|
sz.w = N.size.w - 2;
|
||||||
|
sz.h = N.size.w - 2; // (h = w) is intended, M is a square matrix!
|
||||||
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate least-squares approximator for spline approximation
|
// Generate least-squares approximator for spline approximation
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T* generateLeastSquares(MatrixAcc<T>& M, T *mem, const MatrixAcc<T>& N)
|
MatrixAcc<T> generateLeastSquares(T *mem, const MatrixAcc<T>& N)
|
||||||
{
|
{
|
||||||
|
MatrixAcc<T> M;
|
||||||
M.p = mem;
|
M.p = mem;
|
||||||
M.size.w = N.size.w - 2;
|
M.size == getLeastSquaresSize(N);
|
||||||
M.size.h = N.size.w - 2; // (h = w) is intended, M is a square matrix!
|
|
||||||
|
|
||||||
T * const pend = M.p + (M.size.w * M.size.h);
|
T * const pend = M.p + (M.size.w * M.size.h);
|
||||||
|
|
||||||
matMultCenterCutTransposeWithSelf(M, N);
|
matMultCenterCutTransposeWithSelf(M, N);
|
||||||
|
|
||||||
return pend;
|
return M;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace detail
|
} // end namespace detail
|
||||||
// --------------------------------------------------
|
// --------------------------------------------------
|
||||||
|
|
||||||
|
// Calculate how many elements of type T are needed as storage for the interpolator
|
||||||
|
// These should ideally be constexpr, but we want C++98 compatibility.
|
||||||
|
|
||||||
|
// For Interpolator::init()
|
||||||
|
#define tbsp__getInterpolatorStorageSize(numControlPoints, numPoints) \
|
||||||
|
( ((numControlPoints) == (numControlPoints)) \
|
||||||
|
? (((numControlPoints)+1) * (numPoints)) /* solver(N) */ \
|
||||||
|
: ( \
|
||||||
|
((numControlPoints) * (numPoints)) /* N */ \
|
||||||
|
+ (((numControlPoints)-1) * ((numControlPoints)-2)) /* solver(M) */ \
|
||||||
|
) \
|
||||||
|
)
|
||||||
|
|
||||||
|
// For Interpolator::refresh()
|
||||||
|
#define tbsp__getInterpolatorRefreshTempSize(numControlPoints, numPoints) \
|
||||||
|
( (numControlPoints) == (numControlPoints) \
|
||||||
|
? ((numControlPoints) * (numPoints)) /* N */ \
|
||||||
|
: (((numControlPoints)-2) * ((numControlPoints)-2)) /* M */ \
|
||||||
|
)
|
||||||
|
|
||||||
|
// For Interpolator::generateControlPoints()
|
||||||
|
#define tbsp_getInterpolatorWorkTempSize(numControlPoints, numPoints) \
|
||||||
|
( ((numControlPoints) == (numPoints)) ? 0 : ((numControlPoints) - 2) )
|
||||||
|
|
||||||
|
// --------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
// One interpolator is precomputed for a specific number of input points and output control points
|
// One interpolator is precomputed for a specific number of input points and output control points
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct Interpolator
|
struct Interpolator
|
||||||
{
|
{
|
||||||
inline Interpolator() : numcp(0), nump(0) {}
|
inline Interpolator() : numcp(0), nump(0) {}
|
||||||
|
|
||||||
|
// Inits an Interpolator in the passed mem[]. This memory must outlive the Interpolator
|
||||||
|
// and be at least tbsp__getInterpolatorStorageSize() elements.
|
||||||
|
// You may move or copy the memory elsewhere and then call init() again to update internal
|
||||||
|
// pointers to point to the new location.
|
||||||
|
// This does not access mem at all, it only stores pointers.
|
||||||
|
// When the number of points or control points change, the memory requirements change too;
|
||||||
|
// in this case you must re-init(), generate a new knot vector, and refresh().
|
||||||
|
T *init(T *mem, size_t nump, size_t numcp);
|
||||||
|
|
||||||
|
// When the knot vector or degree changes, call this to update the internal matrix solver.
|
||||||
|
// Must be called before generateControlPoints() can be used.
|
||||||
|
// tmp[] must have space for at least tbsp__getInterpolatorRefreshTempSize() elements
|
||||||
|
// and can be discarded after the call returns.
|
||||||
|
bool refresh(T * TBSP_RESTRICT const tmp, const T * TBSP_RESTRICT knots, unsigned degree);
|
||||||
|
|
||||||
|
// Given a set of points P[nump], generate set of control points cp[numcp].
|
||||||
|
// nump, numcp are defined by the numbers passed to init().
|
||||||
|
// workmem can be NULL if only interpolation is used (#controlpoints == #points).
|
||||||
|
// Approximation (#controlpoints < #points) needs extra working memory though,
|
||||||
|
// so you must pass at least tbsp_getInterpolatorWorkTempSize() elements of P.
|
||||||
|
// Returns how many control points were generated, for convenience.
|
||||||
|
// The working memory can be discarded after the call returns.
|
||||||
|
template<typename P>
|
||||||
|
size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const P *points) const;
|
||||||
|
|
||||||
|
// ------------------------
|
||||||
|
protected:
|
||||||
size_t numcp, nump;
|
size_t numcp, nump;
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
|
@ -733,120 +815,116 @@ struct Interpolator
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
// Calculate how many elements of type T are needed as storage for the interpolator
|
|
||||||
// This should ideally be constexpr, but we want C++98 compatibility.
|
|
||||||
#define tbsp__getInterpolatorStorageSize(numControlPoints, numPoints) \
|
|
||||||
( ((numControlPoints) == (numControlPoints)) \
|
|
||||||
? (((numControlPoints)+1) * (numPoints)) /* solver(N) */ \
|
|
||||||
: ( \
|
|
||||||
((numControlPoints) * (numPoints)) /* N */ \
|
|
||||||
+ (((numControlPoints)-1) * ((numControlPoints)-2)) /* solver(M) */ \
|
|
||||||
) \
|
|
||||||
)
|
|
||||||
|
|
||||||
#define tbsp__getInterpolatorInitTempSize(numControlPoints, numPoints) \
|
|
||||||
( (numControlPoints) == (numControlPoints) \
|
|
||||||
? ((numControlPoints) * (numPoints)) /* N */ \
|
|
||||||
: (((numControlPoints)-2) * ((numControlPoints)-2)) /* M */ \
|
|
||||||
)
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Interpolator<T> initInterpolator(T * TBSP_RESTRICT const mem, T * TBSP_RESTRICT const tmp, size_t degree, size_t nump, size_t numcp, const T * TBSP_RESTRICT knots)
|
T *Interpolator<T>::init(T * TBSP_RESTRICT const mem, size_t nump, size_t numcp)
|
||||||
{
|
{
|
||||||
Interpolator<T> interp;
|
TBSP_ASSERT(mem);
|
||||||
|
|
||||||
// Calling this with 2 points is pointless, < 2 is mathematically impossible
|
// Calling this with 2 points is pointless, < 2 is mathematically impossible
|
||||||
if(nump < 2 || numcp < 2)
|
if(nump < 2 || numcp < 2 || !mem)
|
||||||
return interp;
|
return false;
|
||||||
|
|
||||||
// Can only generate less or equal control points than points
|
// Can only generate less or equal control points than points
|
||||||
TBSP_ASSERT(numcp <= nump);
|
TBSP_ASSERT(numcp <= nump);
|
||||||
if(!(numcp <= nump))
|
if(!(numcp <= nump))
|
||||||
return interp;
|
return false;
|
||||||
|
|
||||||
|
T *p = mem;
|
||||||
|
|
||||||
|
if(nump == numcp)
|
||||||
|
{
|
||||||
|
detail::allocateCoeffMatrix<T>(N, NULL, nump, numcp); // just to get the size
|
||||||
|
p = solver.ludecomp.init(p, N.size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p = detail::allocateCoeffMatrix<T>(N, p, nump, numcp);
|
||||||
|
p = solver.cholesky.init(p, detail::getLeastSquaresSize(N));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(p)
|
||||||
|
{
|
||||||
|
TBSP_ASSERT(p <= mem + tbsp__getInterpolatorStorageSize(numcp, nump));
|
||||||
|
|
||||||
|
this->numcp = numcp;
|
||||||
|
this->nump = nump;
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
// Need storage memory
|
template<typename T>
|
||||||
TBSP_ASSERT(mem && tmp);
|
bool Interpolator<T>::refresh(T * TBSP_RESTRICT const tmp, const T * TBSP_RESTRICT knots, unsigned degree)
|
||||||
|
{
|
||||||
|
// Need temporary memory
|
||||||
|
TBSP_ASSERT(tmp);
|
||||||
|
|
||||||
|
const size_t numcp = N.size.w;
|
||||||
|
const size_t nump = N.size.h;
|
||||||
|
TBSP_ASSERT(numcp && nump);
|
||||||
|
|
||||||
T *p = numcp == nump ? tmp : mem;
|
bool ret = false;
|
||||||
p = detail::computeCoeffMatrix(interp.N, p, knots, nump, numcp, degree);
|
|
||||||
if(!p)
|
|
||||||
return interp;
|
|
||||||
TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorInitTempSize(numcp, nump));
|
|
||||||
|
|
||||||
if(nump == numcp)
|
if(nump == numcp)
|
||||||
{
|
{
|
||||||
// N allocated in tmp, solver(N) in mem
|
// N allocated in tmp, solver(N) in mem
|
||||||
|
T *p = detail::allocateCoeffMatrix(N, tmp, nump, numcp);
|
||||||
|
TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorRefreshTempSize(numcp, nump));
|
||||||
|
detail::computeCoeffMatrix(N, knots, degree);
|
||||||
|
|
||||||
// N is point-symmetric, ie. NOT diagonally symmetric.
|
// N is point-symmetric, ie. NOT diagonally symmetric.
|
||||||
// This means we can't use Cholesky decomposition, but LU decomposition is fine.
|
// This means we can't use Cholesky decomposition, but LU decomposition is fine.
|
||||||
// Note: Cholesky decomposition will at first appear to work,
|
// Note: Cholesky decomposition will at first appear to work,
|
||||||
// but the solutions calculated with it are wrong. Don't use this here.
|
// but the solutions calculated with it are wrong. Don't use this here.
|
||||||
p = interp.solver.ludecomp.init(mem, interp.N);
|
solver.ludecomp.refresh(N);
|
||||||
|
|
||||||
// N is no longer needed
|
// N is no longer needed
|
||||||
// solver(N) stays
|
// solver(N) stays
|
||||||
interp.N.p = NULL;
|
N.p = NULL;
|
||||||
interp.N.size.w = interp.N.size.h = 0;
|
ret = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// N allocated in mem, solver(M) in mem, M in tmp
|
// N allocated in mem, solver(M) in mem, M in tmp
|
||||||
|
detail::computeCoeffMatrix(N, knots, degree);
|
||||||
|
|
||||||
// This calculates M = T(N') x N', where N' is N with its borders removed.
|
// This calculates M = T(N') x N', where N' is N with its borders removed.
|
||||||
// A nice property is that M is diagonally symmetric,
|
// A nice property is that M is diagonally symmetric,
|
||||||
// means it can be efficiently solved via cholesky decomposition.
|
// means it can be efficiently solved via cholesky decomposition.
|
||||||
detail::MatrixAcc<T> M;
|
const detail::MatrixAcc<T> M = detail::generateLeastSquares(tmp, N);
|
||||||
p = detail::generateLeastSquares(M, tmp, interp.N);
|
TBSP_ASSERT(M.p + (M.size.w * M.size.h) <= tmp + tbsp__getInterpolatorRefreshTempSize(numcp, nump));
|
||||||
if(!p)
|
|
||||||
return interp;
|
|
||||||
TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorInitTempSize(numcp, nump));
|
|
||||||
|
|
||||||
p = interp.solver.cholesky.init(mem, M);
|
ret = solver.cholesky.refresh(M);
|
||||||
|
|
||||||
// M is no longer needed
|
// M is no longer needed
|
||||||
// solver(M) stays
|
// solver(M) stays
|
||||||
// N stays
|
// N stays
|
||||||
}
|
}
|
||||||
|
|
||||||
if(p)
|
return ret;
|
||||||
{
|
|
||||||
TBSP_ASSERT(p <= mem + tbsp__getInterpolatorStorageSize(numcp, nump));
|
|
||||||
|
|
||||||
interp.numcp = numcp;
|
|
||||||
interp.nump = nump;
|
|
||||||
}
|
|
||||||
|
|
||||||
return interp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define tbsp_getInterpolatorWorkSize(numControlPoints, numPoints) \
|
template<typename T> template<typename P>
|
||||||
( ((numControlPoints) == (numPoints)) ? 0 : ((numControlPoints) - 2) )
|
size_t Interpolator<T>::generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const P *points) const
|
||||||
|
|
||||||
// Pass workmem[] with at least tbsp_getInterpolatorWorkSize() elements;
|
|
||||||
// workmem can be NULL if only interpolation is used (#controlpoints == #points).
|
|
||||||
// Approximation (#controlpoints < #points) needs extra working memory though.
|
|
||||||
// Returns how many control points were generated, for convenience.
|
|
||||||
template<typename P, typename T>
|
|
||||||
size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const Interpolator<T>& interp, const P *points)
|
|
||||||
{
|
{
|
||||||
const size_t numcp = interp.numcp;
|
if(numcp == nump)
|
||||||
if(numcp == interp.nump)
|
|
||||||
{
|
{
|
||||||
// This does not need extra working memory
|
// This does not need extra working memory
|
||||||
interp.solver.ludecomp.solve(cp, points);
|
solver.ludecomp.solve(cp, points);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Approximate points with less control points, this is more costly
|
// Approximate points with less control points, this is more costly
|
||||||
TBSP_ASSERT(workmem); // ... and needs extra memory
|
TBSP_ASSERT(workmem); // ... and needs extra memory
|
||||||
const size_t h = numcp - 1;
|
const size_t h = numcp - 1;
|
||||||
const size_t n = interp.nump - 1;
|
const size_t n = nump - 1;
|
||||||
const P p0 = points[0];
|
const P p0 = points[0];
|
||||||
const P pn = points[n];
|
const P pn = points[n];
|
||||||
|
|
||||||
// Wrap the least squares estimator somehow together with the points to approximate...
|
// Wrap the least squares estimator somehow together with the points to approximate...
|
||||||
// Unfortunately I forgot how this works, so no explanation of mathemagics here, sorry :<
|
// Unfortunately I forgot how this works, so no explanation of mathemagics here, sorry :<
|
||||||
const detail::MatrixAcc<T> N = interp.N;
|
|
||||||
const P initial = points[1] - (p0 * N(0,1)) - (pn * N(h,1));
|
const P initial = points[1] - (p0 * N(0,1)) - (pn * N(h,1));
|
||||||
for(size_t i = 1; i < h; ++i)
|
for(size_t i = 1; i < h; ++i)
|
||||||
{
|
{
|
||||||
|
@ -864,8 +942,8 @@ size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const Interpolat
|
||||||
cp[h] = pn;
|
cp[h] = pn;
|
||||||
|
|
||||||
// Solve for all points that are not endpoints
|
// Solve for all points that are not endpoints
|
||||||
TBSP_ASSERT(interp.solver.cholesky.L.size.w == h - 1);
|
TBSP_ASSERT(solver.cholesky.L.size.w == h - 1);
|
||||||
interp.solver.cholesky.solve(cp + 1, workmem);
|
solver.cholesky.solve(cp + 1, workmem);
|
||||||
}
|
}
|
||||||
return numcp;
|
return numcp;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue