From fc3580ca64c2bd2dfe29d278b1ad5925d1cd8be3 Mon Sep 17 00:00:00 2001 From: fgenesis Date: Thu, 4 Jul 2024 21:40:31 +0200 Subject: [PATCH] new tbsp version; working spline control point generation prototype --- BBGE/Interpolators.cpp | 96 ++++- BBGE/Interpolators.h | 28 ++ BBGE/SkeletalSprite.cpp | 21 +- BBGE/SplineGrid.h | 5 - ExternalLibs/tbsp.hh | 813 ++++++++++++++++++++++++++++++++++++---- 5 files changed, 876 insertions(+), 87 deletions(-) diff --git a/BBGE/Interpolators.cpp b/BBGE/Interpolators.cpp index 92ec687..ad58153 100644 --- a/BBGE/Interpolators.cpp +++ b/BBGE/Interpolators.cpp @@ -1,6 +1,5 @@ #include "Interpolators.h" #include -#include "tbsp.hh" // usually one would expect that a bspline goes from t=0 to t=1. // here, splines eval between these -0.5 .. +0.5. @@ -79,7 +78,24 @@ tail: } BSpline2D::BSpline2D() - : _cpx(0), _cpy(0), _degx(0), _degy(0), _tmin(0), _tmax(0) + : _cpx(0), _cpy(0), _degx(0), _degy(0), _tmin(0), _tmax(0), _ext(NULL) +{ +} + +BSpline2D::~BSpline2D() +{ + if(_ext) + { + _ext->~Extended(); + free(_ext); + } +} + +BSpline2D::BSpline2D(const BSpline2D& o) + : _cpx(o._cpx), _cpy(o._cpy), _degx(o._degx), _degy(o._degy) + , _tmin(o._tmin), _tmax(o._tmax) + , knotsX(o.knotsX), knotsY(o.knotsY) + , _ext(NULL) // VERY important { } @@ -97,13 +113,83 @@ void BSpline2D::resize(size_t cx, size_t cy, unsigned degx, unsigned degy) _degy = degy; _tmin = tmin; _tmax = tmax; + + + const size_t maxCp = std::max(cx, cy); + + const size_t interpStorageSizeX = tbsp__getInterpolatorStorageSize(cx, cx); + const size_t interpStorageSizeY = tbsp__getInterpolatorStorageSize(cy, cy); + const size_t interpInitTempSize = tbsp__getInterpolatorInitTempSize(maxCp, maxCp); + const size_t interpStorageNeeded = interpStorageSizeX + interpStorageSizeY; + + if(_ext && _ext->capacity < interpStorageNeeded) + { + free(_ext); + _ext = NULL; + } + + if(!_ext) + { + void *extmem = malloc(sizeof(Extended) + sizeof(float) * interpStorageNeeded); + Extended *ext = new (extmem) Extended; + ext->capacity = interpStorageNeeded; + _ext = ext; + } + + if(_ext) + { + // Some extra temp memory is required during init, but can be discarded right afterward + std::vector interptmp(interpInitTempSize); + + float *mx = _ext->floats(); + float *my = mx + interpStorageSizeX; + + _ext->interp.x = tbsp::initInterpolator(mx, &interptmp[0], degx, cx, cx, &knotsX[0]); + _ext->interp.y = tbsp::initInterpolator(my, &interptmp[0], degy, cy, cy, &knotsY[0]); + _ext->tmp2d.init(cx, cy); + } } void BSpline2D::recalc(Vector* dst, size_t xres, size_t yres, const Vector *controlpoints) { + if(_ext) + { + const size_t maxCp = std::max(_cpx, _cpy); + + Array2d& tmp2d = _ext->tmp2d; + std::vector tmpcp(maxCp), tmpin(maxCp); + + // FIXME: should have in/out stride in the generator function + + // y direction first + for(size_t x = 0; x < _cpx; ++x) + { + const Vector *src = &controlpoints[x]; + for(size_t i = 0; i < _cpy; ++i, src += _cpx) + tmpin[i] = *src; + + tbsp::generateControlPoints(&tmpcp[0], NULL, _ext->interp.y, &tmpin[0]); + + for(size_t y = 0; y < _cpy; ++y) + tmp2d(x, y) = tmpcp[y]; + } + + // x direction + for(size_t y = 0; y < _cpy; ++y) + { + Vector *row = tmp2d.row(y); + memcpy(&tmpin[0], row, sizeof(Vector) * _cpx); + tbsp::generateControlPoints(row, NULL, _ext->interp.x, &tmpin[0]); + } + + controlpoints = tmp2d.data(); + } + + + const unsigned maxDeg = std::max(_degx, _degy); + std::vector tmpv; - size_t degn = std::max(_degx, _degy); - size_t tmpn = (yres * _cpx) + degn; + size_t tmpn = (yres * _cpx) + maxDeg; size_t tmpsz = tmpn * sizeof(Vector); Vector *tmp; if(tmpsz < 17*1024) @@ -113,7 +199,7 @@ void BSpline2D::recalc(Vector* dst, size_t xres, size_t yres, const Vector *cont tmpv.resize(tmpn); tmp = &tmpv[0]; } - Vector *work = tmp + (tmpn - degn); + Vector *work = tmp + (tmpn - maxDeg); // Each column -> Y-axis interpolation for(size_t x = 0; x < _cpx; ++x) diff --git a/BBGE/Interpolators.h b/BBGE/Interpolators.h index f118d71..b2cbc7f 100644 --- a/BBGE/Interpolators.h +++ b/BBGE/Interpolators.h @@ -4,6 +4,15 @@ #include // std::pair #include #include "Vector.h" +#include "tbsp.hh" +#include "DataStructures.h" + +enum SplineType +{ + INTERPOLATOR_BSPLINE, + INTERPOLATOR_COSINE, + INTERPOLATOR_BSPLINE_EXT +}; class CosineInterpolator { @@ -23,6 +32,9 @@ class BSpline2D { public: BSpline2D(); + BSpline2D(const BSpline2D&); + ~BSpline2D(); + // # of control points on each axis void resize(size_t cx, size_t cy, unsigned degx, unsigned degy); @@ -37,10 +49,26 @@ public: inline unsigned degY() const { return _degy; } private: + size_t _cpx, _cpy; // # of control points unsigned _degx, _degy; float _tmin, _tmax; std::vector knotsX, knotsY; + + // always allocated on heap, with extra space at the end + struct Extended + { + Array2d tmp2d; + struct + { + tbsp::Interpolator x, y; + } interp; + size_t capacity; + float *floats() { return reinterpret_cast(this + 1); } + // space for n floats follows + }; + + Extended *_ext; }; diff --git a/BBGE/SkeletalSprite.cpp b/BBGE/SkeletalSprite.cpp index d6ada54..f011f67 100644 --- a/BBGE/SkeletalSprite.cpp +++ b/BBGE/SkeletalSprite.cpp @@ -1660,7 +1660,9 @@ void SkeletalSprite::loadSkeletal(const std::string &fn) XMLElement *animation = animations->FirstChildElement("Animation"); while(animation) { - Animation newAnimation; + this->animations.push_back(Animation()); + Animation& newAnimation = this->animations.back(); + newAnimation.name = animation->Attribute("name"); if(animation->Attribute("resetOnEnd")) newAnimation.resetOnEnd = animation->BoolAttribute("resetOnEnd"); @@ -1786,8 +1788,15 @@ void SkeletalSprite::loadSkeletal(const std::string &fn) } // + size_t numInterp = 0; XMLElement *interp = animation->FirstChildElement("Interpolator"); for( ; interp; interp = interp->NextSiblingElement("Interpolator")) + ++numInterp; + + newAnimation.interpolators.resize(numInterp); + + interp = animation->FirstChildElement("Interpolator"); + for(numInterp = 0 ; interp; interp = interp->NextSiblingElement("Interpolator"), ++numInterp) { Bone *bi = NULL; const char *sbone = interp->Attribute("bone"); @@ -1817,17 +1826,16 @@ void SkeletalSprite::loadSkeletal(const std::string &fn) continue; } - SplineType spline = SPLINE_BSPLINE; + SplineType spline = INTERPOLATOR_BSPLINE; unsigned cx = 3, cy = 3, degx = 3, degy = 3; if(const char *stype = interp->Attribute("type")) { SimpleIStringStream is(stype, SimpleIStringStream::REUSE); std::string ty; is >> ty; - BoneGridInterpolator bgip; if(ty == "bspline") { - spline = SPLINE_BSPLINE; + spline = INTERPOLATOR_BSPLINE; if(!(is >> cx >> cy >> degx >> degy)) { if(!degx) @@ -1851,9 +1859,7 @@ void SkeletalSprite::loadSkeletal(const std::string &fn) // bone grid should have been created via earlier const char *idata = interp->Attribute("data"); - newAnimation.interpolators.push_back(BoneGridInterpolator()); - BoneGridInterpolator& bgip = newAnimation.interpolators.back(); - //bgip.type = spline; + BoneGridInterpolator& bgip = newAnimation.interpolators[numInterp]; bgip.idx = bi->boneIdx; bgip.storeBoneByIdx = boneByIdx; @@ -1895,7 +1901,6 @@ void SkeletalSprite::loadSkeletal(const std::string &fn) } animation = animation->NextSiblingElement("Animation"); - this->animations.push_back(newAnimation); } } } diff --git a/BBGE/SplineGrid.h b/BBGE/SplineGrid.h index 33106da..b6086bf 100644 --- a/BBGE/SplineGrid.h +++ b/BBGE/SplineGrid.h @@ -7,11 +7,6 @@ #include "Quad.h" #include "Interpolators.h" -enum SplineType -{ - SPLINE_BSPLINE, - SPLINE_COSINE, -}; class SplineGridCtrlPoint : public Quad { diff --git a/ExternalLibs/tbsp.hh b/ExternalLibs/tbsp.hh index bacec09..beb76e8 100644 --- a/ExternalLibs/tbsp.hh +++ b/ExternalLibs/tbsp.hh @@ -1,65 +1,125 @@ -/* Tiny B-spline evaluation library +/* Tiny B-spline evaluation and interpolation library License: Public domain, WTFPL, CC0 or your favorite permissive license; whatever is available in your country. -Dependencies: -- Requires C++98 without libc - Origin: https://github.com/fgenesis/tinypile +This library is split into two parts: +- (1) Bspline evaluation (given a set of control points, generate points along the spline) +- (2) Bspline interpolation (given some points, generate control points so that the resulting spline + goes through these points. Needs part (1) to function.) ---- Example usage: --- +Requirements: +- Part (1) has zero dependencies (not even libc) +- Part (2) requires sqrt() from the libc. Change TBSP_SQRT() to use your own. + +Design notes: +- C++98 compatible. Stand-alone. +- No memory allocation; all memory is caller-controlled. + In cases where this is needed, the caller needs to pass appropriately-sized working memory. + + +This is a template library and your types must fulfil certain criteria: + +- Scalar (T): Same semantics as float or double. Should be POD. Best use float. + +- Point (P): Interpolated type. Must support the following operators: + Point operator+(const Point& o) const // Element addition + Point operator-(const Point& o) const // Element subtraction + Point& operator+=(const Point& o) // Add to self + Point& operator-=(const Point& o) // Subtract from self + Point operator*(Scalar m) const // Scalar multiplication + Point& operator=(const Point& o) // Assignment + No constructors are required and the code does not need the type to be zero-inited. + + +--- Example usage, Bspline evaluation: --- enum { DEGREE = 3 }; // Cubic -// You can interpolate anything as long as it supports element addition and scalar multiplication -struct Point { ... operator+(Point) and operator*(float) overloaded ... }; -const Point ps[N] = {...}; // Control points for the spline +const Point cp[NCP] = {...}; // Control points for the spline -float knots[tbsp__getNumKnots(N, DEGREE)]; // knot vector; used for evaluating the spline +float knots[tbsp__getNumKnots(NCP, DEGREE)]; // knot vector; used for evaluating the spline Point tmp[DEGREE]; // Temporary working memory must be provided by the caller. // This is just a tiny array. Must have as many elements as the degree of the spline. // This must be done once for each B-spline; the spline is then defined by the knot vector. // In particular, this inits a knot vector with end points [L..R], // ie. the spline will interpolate values for t = [L..R]. -// (You can use any boundary values, eg. [-4..+5], but [0..1] is the most common) -tbsp::fillKnotVector(knots, N, DEGREE, L, R); +// (You can use any boundary values L < R, eg. [-4..+5], but [0..1] is the most common) +// Note that this depends only on the number of the control points, but not their values. +// This means you only need to compute this when NCP changes! +tbsp::fillKnotVector(knots, NCP, DEGREE, L, R); // Evaluate the spline at point t -// Returns ps[0] if t <= L; ps[N-1] if t >= R; otherwise an interpolated point -Point p = tbsp::evalOne(tmp, knots, ps, N, DEGREE, t); +// Returns cp[0] if t <= L; cp[NCP-1] if t >= R; otherwise an interpolated point +Point p = tbsp::evalOne(tmp, knots, cp, NCP, DEGREE, t); -// Evaluate A points between t=0.2 .. t=0.5, equidistantly spaced, and write to a[]. -// (If you have multiple points to evaluate, this is faster than multiple evalOne() calls) -Point a[A]; -tbsp::evalRange(out, A, tmp, knots, ps, N, DEGREE, 0.2f, 0.5f); +// Evaluate NP points between t=0.2 .. t=0.5, equidistantly spaced, and write to p[]. +// (If you have multiple points to evaluate, this is much faster than multiple evalOne() calls) +Point p[NP]; +tbsp::evalRange(p, NP, tmp, knots, cp, NCP, DEGREE, 0.2f, 0.5f); */ #pragma once #include // size_t +// ---- Compile-time config ------ -#ifndef TBSP_ASSERT -# include -# define TBSP_ASSERT(x) assert(x) + +#ifndef TBSP_SQRT // Needed for part (2) only +# include +# define TBSP_SQRT(x) sqrt(x) #endif -// ---- B-Spline eval part begin ---- +#if !defined(NDEBUG) || defined(_DEBUG) || defined(DEBUG) +# ifndef TBSP_ASSERT +# include +# define TBSP_ASSERT(x) assert(x) +# endif +#endif + + + +// ---- Generic defines and stuff we need ---- + + +// Should be constexpr, but we want to stay C++98-compatible +#define tbsp__getNumKnots(numControlPoints, degree) ((numControlPoints) + (degree) + 1) + +#ifndef TBSP_ASSERT +# define TBSP_ASSERT(x) +#endif + +#ifdef _MSC_VER +# define TBSP_RESTRICT __restrict +#else +# define TBSP_RESTRICT restrict +#endif + +#ifndef TBSP_HAS_CPP11 +# if (__cplusplus > 201103L) || (defined(_MSC_VER) && ((_MSC_VER+0) >= 1900)) +# define TBSP_HAS_CPP11 1 +# else +# define TBSP_HAS_CPP11 0 +# endif +#endif + +// ----------------------------------------------------------------------------------- +// ---- Part (1) begin: B-Spline eval ---- +// Given some control points, calculate points along the spline. +// ----------------------------------------------------------------------------------- + namespace tbsp { -// These should be constexpr, but we want to stay C++98-compatible -#define tbsp__getNumKnots(points, degree) ((points) + (degree) + 1) -#define tbsp__getKnotVectorAllocSize(K, points, degree) (sizeof(K) * tbsp__getNumKnots((points), (degree))) - namespace detail { -// returns index of first element strictly less than t -template -static size_t findKnotIndexOffs(K val, const K *p, size_t n) +// returns index of first element strictly less than val +template +static size_t findKnotIndexOffs(T val, const T *p, size_t n) { // Binary search to find leftmost element that is < val size_t L = 0; @@ -73,17 +133,21 @@ static size_t findKnotIndexOffs(K val, const K *p, size_t n) else R = m; } + // FIXME: can we just return m at this point? + if(L && !(p[L] < val)) + --L; + TBSP_ASSERT(!L || p[L] < val); return L; } -template -static inline size_t findKnotIndex(K val, const K *knots, size_t n, size_t degree) +template +static inline size_t findKnotIndex(T val, const T *knots, size_t numknots, size_t degree) { - TBSP_ASSERT(n > degree); - TBSP_ASSERT(val < knots[n - degree - 1]); // beyond right end? should have been caught by caller + TBSP_ASSERT(numknots > degree); + TBSP_ASSERT(val < knots[numknots - degree - 1]); // beyond right end? should have been caught by caller // skip endpoints - return degree + findKnotIndexOffs(val, knots + degree, n - degree); + return degree + findKnotIndexOffs(val, knots + degree, numknots - (degree * 2u)); } template @@ -94,10 +158,10 @@ static void genKnotsUniform(K *knots, size_t nn, K mink, K maxk) knots[i] = mink + K(i+1) * m; } -template -static T deBoor(T *work, const T *src, const K *knots, const size_t r, const size_t k, const K t, size_t inputStride) +template +static P deBoor(P * TBSP_RESTRICT work, const P * TBSP_RESTRICT src, const T * TBSP_RESTRICT knots, const size_t r, const size_t k, const T t, size_t inputStride) { - T last = src[0]; // init so that it works correctly even with degree == 0 + P last = src[0]; // init so that it works correctly even with degree == 0 for(size_t worksize = k; worksize > 1; --worksize) { const size_t j = k - worksize + 1; // iteration number, starting with 1, going up to k @@ -105,12 +169,12 @@ static T deBoor(T *work, const T *src, const K *knots, const size_t r, const siz for(size_t w = 0, wr = 0; w < worksize - 1; ++w, wr += inputStride) { const size_t i = w + tmp; - const K ki = knots[i]; + const T ki = knots[i]; TBSP_ASSERT(ki <= t); - const K div = knots[i+k-j] - ki; + const T div = knots[i+k-j] - ki; TBSP_ASSERT(div > 0); - const K a = (t - ki) / div; - const K a1 = K(1) - a; + const T a = (t - ki) / div; + const T a1 = T(1) - a; work[w] = last = (src[wr] * a1) + (src[wr + inputStride] * a); // lerp } src = work; // done writing the initial data to work, now use that as input for further iterations @@ -122,11 +186,13 @@ static T deBoor(T *work, const T *src, const K *knots, const size_t r, const siz } // end namespace detail //-------------------------------------- -template -static size_t fillKnotVector(K *knots, size_t points, size_t degree, K mink, K maxk) +template +static size_t fillKnotVector(T *knots, size_t numcp, size_t degree, T mink, T maxk) { - const size_t n = points - 1; - if(n < degree) // lower degree if not enough points + TBSP_ASSERT(mink < maxk); + + const size_t n = numcp - 1; + if(n < degree) // lower degree if not enough control points degree = n; TBSP_ASSERT(n >= degree); @@ -149,17 +215,17 @@ static size_t fillKnotVector(K *knots, size_t points, size_t degree, K mink, K m } // evaluate single point at t -template -static T evalOne(T *work, const K *knots, const T *points, size_t numpoints, size_t degree, K t) +template +static P evalOne(P * TBSP_RESTRICT work, const T * TBSP_RESTRICT knots, const P * TBSP_RESTRICT controlpoints, size_t numcp, size_t degree, T t, size_t inputStride = 1) { if(t < knots[0]) - return points[0]; // left out-of-bounds + return controlpoints[0]; // left out-of-bounds - if(numpoints - 1 < degree) - degree = numpoints - 1; + if(numcp - 1 < degree) + degree = numcp - 1; - const size_t numknots = tbsp__getNumKnots(numpoints, degree); - const K maxknot = knots[numknots - 1]; + const size_t numknots = tbsp__getNumKnots(numcp, degree); + const T maxknot = knots[numknots - 1]; if(t < maxknot) { const size_t r = detail::findKnotIndex(t, knots, numknots, degree); @@ -167,29 +233,29 @@ static T evalOne(T *work, const K *knots, const T *points, size_t numpoints, siz const size_t k = degree + 1; TBSP_ASSERT(r + k < numknots); // check that the copy below stays in bounds - const T* const src = &points[r - degree]; - return detail::deBoor(work, src, knots, r, k, t); + const P* const src = &controlpoints[r - degree]; + return detail::deBoor(work, src, knots, r, k, t, inputStride); } - return points[numpoints - 1]; // right out-of-bounds + return controlpoints[numcp - 1]; // right out-of-bounds } // evaluate numdst points in range [tmin..tmax], equally spaced -template -static void evalRange(T *dst, size_t numdst, T *work, const K *knots, const T *points, size_t numpoints, size_t degree, K tmin, K tmax, size_t inputStride = 1, size_t outputStride = 1) +template +static void evalRange(P * TBSP_RESTRICT dst, size_t numdst, P * TBSP_RESTRICT work, const T * TBSP_RESTRICT knots, const P * TBSP_RESTRICT controlpoints, size_t numcp, size_t degree, T tmin, T tmax, size_t inputStride = 1, size_t outputStride = 1) { TBSP_ASSERT(tmin <= tmax); - if(numpoints - 1 < degree) - degree = numpoints - 1; + if(numcp - 1 < degree) + degree = numcp - 1; - const size_t numknots = tbsp__getNumKnots(numpoints, degree); + const size_t numknots = tbsp__getNumKnots(numcp, degree); size_t r = detail::findKnotIndex(tmin, knots, numknots, degree); TBSP_ASSERT(r >= degree); const size_t k = degree + 1; TBSP_ASSERT(r + k < numknots); // check that the copy below stays in bounds - const K step = (tmax - tmin) / K(numdst - 1); - K t = tmin; + const T step = (tmax - tmin) / T(numdst - 1); + T t = tmin; const size_t maxidx = numknots - k; @@ -198,33 +264,642 @@ static void evalRange(T *dst, size_t numdst, T *work, const K *knots, const T *p // left out-of-bounds for( ; i < numdst && t < knots[0]; ++i, t += step) { - *dst = points[0]; + *dst = controlpoints[0]; dst += outputStride; } // actually interpolated points - const K maxknot = knots[numknots - 1]; + const T maxknot = knots[numknots - 1]; for( ; i < numdst && t < maxknot; ++i, t += step) { while(r < maxidx && knots[r+1] < t) // find new index; don't need to do binary search again ++r; - const T* const src = &points[(r - degree) * inputStride]; + const P * const src = &controlpoints[(r - degree) * inputStride]; *dst = detail::deBoor(work, src, knots, r, k, t, inputStride); dst += outputStride; } // right out-of-bounds - if(i < numdst) + for( ; i < numdst; ++i) { - T last = points[(numpoints - 1) * inputStride]; - for( ; i < numdst; ++i) + *dst = controlpoints[numcp - 1]; + dst += outputStride; + } +} + +// ----------------------------------------------------------------------------------- +// --- Part (2) begin: B-spline interpolation ---- +// Given some points that should lie on a spline, calculate its control points +// ----------------------------------------------------------------------------------- + +namespace detail { + +struct Size2d +{ + size_t w, h; +}; + +// Wrap any pointer to access it like a vector (with proper bounds checking) +template +class VecAcc +{ +public: + typedef T value_type; + VecAcc(T *p, const size_t n) + : p(p), n(n) {} + + inline T& operator[](size_t i) + { + TBSP_ASSERT(i < n); + return p[i]; + } + inline const T& operator[](size_t i) const + { + TBSP_ASSERT(i < n); + return p[i]; + } + template + VecAcc& operator=(const V& o) + { + TBSP_ASSERT(n == o.size()); + for(size_t i = 0; i < n; ++i) + p[i] = o[i]; + return *this; + } + VecAcc& operator=(const VecAcc& o) // Required for C++11 + { + TBSP_ASSERT(n == o.size()); + for(size_t i = 0; i < n; ++i) + p[i] = o[i]; + return *this; + } + template + VecAcc& operator+=(const V& o) + { + TBSP_ASSERT(n == o.size()); + for(size_t i = 0; i < n; ++i) + p[i] += o[i]; + return *this; + } + template + VecAcc& operator-=(const V& o) + { + TBSP_ASSERT(n == o.size()); + for(size_t i = 0; i < n; ++i) + p[i] -= o[i]; + return *this; + } + + T dot(VecAcc& o) const // vector dot product + { + T s = 0; + const size_t n = this->n; + tbsp__ASSERT(n == o.n); + for(size_t i = 0; i < n; ++i) + s += a.p[i] * b.p[i]; + return s; + } + + inline size_t size() const { return n; } + inline T *data() { return p; } + inline const T *data() const { return p; } + + T *p; + size_t n; +}; + +// Wraps any pointer to access it like a row matrix. +// Intentionally a dumb POD struct, do NOT put ctors! +template +struct MatrixAcc +{ + typedef T value_type; + typedef VecAcc RowAcc; + + inline T& operator()(size_t x, size_t y) + { + TBSP_ASSERT(x < size.w); + TBSP_ASSERT(y < size.h); + return p[y * size.w + x]; + } + inline const T& operator()(size_t x, size_t y) const + { + TBSP_ASSERT(x < size.w); + TBSP_ASSERT(y < size.h); + return p[y * size.w + x]; + } + + inline T *rowPtr(size_t y) const + { + return p + (y * size.w); + } + + // w,h of (this * o) + inline Size2d multSize(const MatrixAcc& o) const + { + // in math notation: (n x m) * (m x p) -> (n x p) + // and because math is backwards: + // (h x w) * (H x W) -> (h x W) + TBSP_ASSERT(size.w == o.size.h); + Size2d wh { o.size.w, size.h }; + return wh; + } + + inline RowAcc row(size_t y) const + { + TBSP_ASSERT(y < size.h); + return RowAcc(p + (y * size.w), size.w); + } + + T *p; + Size2d size; +}; + +// Cut away the borders from A, so that the new matrix A' size is (A.size.w - 2, A.size.h - 2): +// ( xxxxx ) (where x means cut and any other letter means keep) +// ( xabcx ) ( abc ) +// A = ( xdefx ), A' = ( def ) +// ( xxxxx ) +// then compute T(A') x A', ie. +// ( ad ) ( abc ) +// R = ( be ) X ( def ) +// ( cf ) +// R must be already the correct size, ie. (A.size.w - 2, A.size.w - 2)! +template +static void matMultCenterCutTransposeWithSelf(MatrixAcc& TBSP_RESTRICT R, const MatrixAcc& TBSP_RESTRICT A) +{ + TBSP_ASSERT(A.size.w > 2 && A.size.h > 2); + + const size_t w = A.size.w - 2; // also the final size of R: (w x w); + const size_t h = A.size.h - 2; // when iterating over w/h it stops before the last element in that row/col + TBSP_ASSERT(R.size.w == w && R.size.h == w); + + T *out = R.p; + for (size_t y = 0; y < w; ++y) + { + for (size_t x = 0; x < w; ++x) { - *dst = last; - dst += outputStride; + const T *row = A.p + y + 1; // skip first elem (+1) + const T *col = A.p + x + w; // skip first row (+w) + T temp = 0; + for (size_t k = 0; k < h; ++k, row += w, col += w) + temp += *row * *col; + *out++ = temp; } } } +// Linear system solver via Cholesky decomposition +template +struct Cholesky +{ + typedef T value_type; + typedef MatrixAcc Mat; + + // Initializes a solver in the given memory. + // On success, bumps ptr forward by the memory it needs; + // on failure, returns NULL. Note that this may also fail if A is not well-formed. + // Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h)) + T *init(T *mem, const Mat& A) + { + T * const pdiag = mem + (A.size.w * A.size.h); + T * const myend = pdiag + A.size.w; + + TBSP_ASSERT(A.size.w >= A.size.h); + const size_t n = A.size.w; + idiag = pdiag; // T[n] + + L.p = mem; + L.size = A.size; + + // Fill the lower left triangle, storing the diagonal separately + for(size_t y = 0; y < n; ++y) + { + const typename Mat::RowAcc rrow = A.row(y); + typename Mat::RowAcc wrow = L.row(y); + for(size_t x = y; x < n; ++x) + { + T s = rrow[x]; + for(size_t k = 0; k < y; ++k) + s -= wrow[k] * L(k,x); + if(x != y) + L(y,x) = s * idiag[y]; + else if(s > 0) + idiag[y] = T(1) / (wrow[y] = TBSP_SQRT(s)); + else + { + TBSP_ASSERT(0 && "Cholesky decomposition failed"); + return NULL; + } + } + } + + // Fill the upper right triangle with zeros + const T zero = T(0); + for(size_t y = 0; y < n; ++y) + { + typename Mat::RowAcc row = L.row(y); + for(size_t x = y+1; x < n; ++x) + row[x] = zero; + } + + return myend; + } + + // Solves x for A * x + b. Both b and x must have length n. + template + void solve(P * TBSP_RESTRICT const xv, const P * TBSP_RESTRICT const bv) const + { + const size_t n = L.size.w; + + for(size_t y = 0; y < n; ++y) + { + const typename Mat::RowAcc Lrow = L.row(y); + P p = bv[y]; + for(size_t x = 0; x < y; ++x) + p -= xv[x] * Lrow[x]; + xv[y] = p * idiag[y]; + } + for(size_t y = n; y--; ) + { + P p = xv[y]; + for(size_t x = y+1; x < n; ++x) + p -= xv[x] * L(y,x); + xv[y] = p * idiag[y]; + } + } + + Mat L; // lower left triangle matrix + T *idiag; // 1 / diag; length is L.size.w +}; + +// Linear system solver via LU decomposition +// (Slower than Cholesky, but a bit more general ie. applicable to more cases) +template +struct LUDecomp +{ + typedef T value_type; + typedef MatrixAcc Mat; + + // Initializes a solver in the given memory. + // On success, bumps ptr forward by the memory it needs; + // on failure, returns NULL. Note that this may also fail if A is not well-formed. + // Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h)) + T *init(T *mem, const Mat& A) + { + T * const pdiag = mem + (A.size.w * A.size.h); + T * const myend = pdiag + A.size.w; + + TBSP_ASSERT(A.size.w == A.size.h); + + MatrixAcc LU; + LU.p = mem; + idiag = pdiag; + LU.size = A.size; + + const size_t n = A.size.w; + + // Note: This doesn't do pivoting. + for (size_t y = 0; y < n; ++y) + { + for (size_t x = y; x < n; ++x) + { + T e = A(x, y); + for (size_t k = 0; k < y; ++k) + e -= LU(k, y) * LU(x, k); + LU(x, y) = e; + } + const T idiagval = T(1) / LU(y,y); + idiag[y] = idiagval; + for (size_t x = y + 1; x < n; ++x) + { + T e = A(y,x); + for (size_t k = 0; k < y; ++k) + e -= LU(k, x) * LU(y, k); + LU(y, x) = idiagval * e; + } + } + this->LU = LU; + + return myend; + } + + // Solves x for A * x + b. Both b and x must have length n. + template + void solve(P * TBSP_RESTRICT const xv, const P * TBSP_RESTRICT const bv) const + { + const size_t n = LU.size.w; + + for (size_t y = 0; y < n; ++y) + { + P p = bv[y]; + const typename Mat::RowAcc LUrow = LU.row(y); + for (size_t x = 0; x < y; ++x) + p -= xv[x] * LUrow[x]; + xv[y] = p; + } + for (size_t y = n; y--; ) + { + P p = xv[y]; + const typename Mat::RowAcc LUrow = LU.row(y); + for (size_t x = y + 1; x < n; ++x) + p -= xv[x] * LUrow[x]; + xv[y] = p * idiag[y]; + } + } + + Mat LU; // combined both lower right & upper left triangles matrix + T *idiag; // 1 / diag; length is L.size.w +}; + +// The coeff vector for a given position u on the spline describes the influence of each control point +// towards the final result, ie.: +// resultPoint(u) = SUM(Nrow[i] * controlpoint[i]) for i in [0..Nrow), +// where Nrow[] are the coefficients for u. +// And to keep the parametrization simple, u is computed from t=0..1 +template +void computeCoeffVector(T * TBSP_RESTRICT Nrow, size_t numcp, T t01, const T * TBSP_RESTRICT knots, size_t degree) +{ + for(size_t i = 0; i < numcp; ++i) + Nrow[i] = T(0); + + const size_t n = numcp - 1; + + // special cases + if(t01 <= T(0)) + { + Nrow[0] = T(1); + return; + } + else if(t01 >= T(1)) + { + Nrow[n] = T(1); + return; + } + + const size_t numknots = tbsp__getNumKnots(numcp, degree); + const size_t m = numknots - 1; + const T mink = knots[0]; + const T maxk = knots[m]; + + // Position on the knot vector aka transform t=0..1 into knot vector space + const T u = mink + t01 * (maxk - mink); + + // find index k, so that u is in [knots[k], knots[k+1]) + const size_t k = detail::findKnotIndex(u, knots, numknots, degree); + Nrow[k] = T(1); + + // Coefficient computation + // See also: https://pages.mtu.edu/~shene/COURSES/cs3621/NOTES/spline/B-spline/bspline-curve-coef.html + for(size_t d = 1; d <= degree; ++d) + { + TBSP_ASSERT(d <= k); + const T q = (knots[k+1] - u) / (knots[k+1] - knots[k-d+1]); + Nrow[k-d] = q * Nrow[k-d+1]; + + for(size_t i = k-d+1; i < k; ++i) + { + const T a = (u - knots[i]) / (knots[i+d] - knots[i]); + const T b = (knots[i+d+1] - u) / (knots[i+d+1] - knots[i+1]); + Nrow[i] = a * Nrow[i] + b * Nrow[i+1]; + } + + Nrow[k] *= ((u - knots[k]) / (knots[k+d] - knots[k])); + } +} + + +template +T* computeCoeffMatrix(MatrixAcc& N, T *mem, const T *knots, size_t nump, size_t numcp, size_t degree) +{ + N.size.w = numcp; + N.size.h = nump; + N.p = mem; + T * const pend = N.p + (nump * numcp); + + const T invsz = T(1) / T(nump - 1); + + for(size_t i = 0; i < nump; ++i) // rows + { + // TODO: this is the parametrization. currently, this is equidistant. allow more. + const T t01 = float(i) * invsz; // position of point assuming uniform parametrization + + typename MatrixAcc::RowAcc row = N.row(i); + computeCoeffVector(&row[0], numcp, t01, knots, degree); // row 0 stores all coefficients for _t[0], etc + } + + return pend; +} + +// Generate least-squares approximator for spline approximation +template +T* generateLeastSquares(MatrixAcc& M, T *mem, const MatrixAcc& N) +{ + M.p = mem; + M.size.w = N.size.w - 2; + M.size.h = N.size.w - 2; // (h = w) is intended, M is a square matrix! + + T * const pend = M.p + (M.size.w * M.size.h); + + matMultCenterCutTransposeWithSelf(M, N); + + return pend; +} + +} // end namespace detail +// -------------------------------------------------- + +// One interpolator is precomputed for a specific number of input points and output control points +template +struct Interpolator +{ + inline Interpolator() : numcp(0), nump(0) {} + + size_t numcp, nump; + union + { + detail::Cholesky cholesky; + detail::LUDecomp ludecomp; + } solver; + detail::MatrixAcc N; + + // To make it checkable in if() +#if TBSP_HAS_CPP11 + explicit inline operator bool() const { return !!numcp; } +#else // Explicit conversion operator is not supported in C++98 + inline operator const void*() const { return numcp ? this : NULL; } +#endif +}; + +// Calculate how many elements of type T are needed as storage for the interpolator +// This should ideally be constexpr, but we want C++98 compatibility. +#define tbsp__getInterpolatorStorageSize(numControlPoints, numPoints) \ + ( ((numControlPoints) == (numControlPoints)) \ + ? (((numControlPoints)+1) * (numPoints)) /* solver(N) */ \ + : ( \ + ((numControlPoints) * (numPoints)) /* N */ \ + + (((numControlPoints)-1) * ((numControlPoints)-2)) /* solver(M) */ \ + ) \ + ) + +#define tbsp__getInterpolatorInitTempSize(numControlPoints, numPoints) \ + ( (numControlPoints) == (numControlPoints) \ + ? ((numControlPoints) * (numPoints)) /* N */ \ + : (((numControlPoints)-2) * ((numControlPoints)-2)) /* M */ \ + ) + +template +Interpolator initInterpolator(T * TBSP_RESTRICT const mem, T * TBSP_RESTRICT const tmp, size_t degree, size_t nump, size_t numcp, const T * TBSP_RESTRICT knots) +{ + Interpolator interp; + + // Calling this with 2 points is pointless, < 2 is mathematically impossible + if(nump < 2 || numcp < 2) + return interp; + + // Can only generate less or equal control points than points + TBSP_ASSERT(numcp <= nump); + if(!(numcp <= nump)) + return interp; + + // Need storage memory + TBSP_ASSERT(mem && tmp); + + T *p = numcp == nump ? tmp : mem; + p = detail::computeCoeffMatrix(interp.N, p, knots, nump, numcp, degree); + if(!p) + return interp; + TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorInitTempSize(numcp, nump)); + + if(nump == numcp) + { + // N allocated in tmp, solver(N) in mem + + // N is point-symmetric, ie. NOT diagonally symmetric. + // This means we can't use Cholesky decomposition, but LU decomposition is fine. + // Note: Cholesky decomposition will at first appear to work, + // but the solutions calculated with it are wrong. Don't use this here. + p = interp.solver.ludecomp.init(mem, interp.N); + + // N is no longer needed + // solver(N) stays + interp.N.p = NULL; + interp.N.size.w = interp.N.size.h = 0; + } + else + { + // N allocated in mem, solver(M) in mem, M in tmp + + // This calculates M = T(N') x N', where N' is N with its borders removed. + // A nice property is that M is diagonally symmetric, + // means it can be efficiently solved via cholesky decomposition. + detail::MatrixAcc M; + p = detail::generateLeastSquares(M, tmp, interp.N); + if(!p) + return interp; + TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorInitTempSize(numcp, nump)); + + p = interp.solver.cholesky.init(mem, M); + + // M is no longer needed + // solver(M) stays + // N stays + } + + if(p) + { + TBSP_ASSERT(p <= mem + tbsp__getInterpolatorStorageSize(numcp, nump)); + + interp.numcp = numcp; + interp.nump = nump; + } + + return interp; +} + +#define tbsp_getInterpolatorWorkSize(numControlPoints, numPoints) \ + ( ((numControlPoints) == (numPoints)) ? 0 : ((numControlPoints) - 2) ) + +// Pass workmem[] with at least tbsp_getInterpolatorWorkSize() elements; +// workmem can be NULL if only interpolation is used (#controlpoints == #points). +// Approximation (#controlpoints < #points) needs extra working memory though. +// Returns how many control points were generated, for convenience. +template +size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const Interpolator& interp, const P *points) +{ + const size_t numcp = interp.numcp; + if(numcp == interp.nump) + { + // This does not need extra working memory + interp.solver.ludecomp.solve(cp, points); + } + else + { + // Approximate points with less control points, this is more costly + TBSP_ASSERT(workmem); // ... and needs extra memory + const size_t h = numcp - 1; + const size_t n = interp.nump - 1; + const P p0 = points[0]; + const P pn = points[n]; + + // Wrap the least squares estimator somehow together with the points to approximate... + // Unfortunately I forgot how this works, so no explanation of mathemagics here, sorry :< + const detail::MatrixAcc N = interp.N; + const P initial = points[1] - (p0 * N(0,1)) - (pn * N(h,1)); + for(size_t i = 1; i < h; ++i) + { + P tmp = initial * N(i,1); + for(size_t k = 2; k < n; ++k) + { + const typename detail::MatrixAcc::RowAcc Nrow = N.row(k); + tmp += (points[k] - (p0 * Nrow[0]) - (pn * Nrow[h])) * Nrow[i]; + } + workmem[i-1] = tmp; + } + + // End points are not interpolated + cp[0] = p0; + cp[h] = pn; + + // Solve for all points that are not endpoints + TBSP_ASSERT(interp.solver.cholesky.L.size.w == h - 1); + interp.solver.cholesky.solve(cp + 1, workmem); + } + return numcp; +} + + +// Helper functions for parametrization + +template +void chordal(T *parm, const P *points, size_t n) +{ + // Start and end points are always 0 and 1, respectively + parm[0] = T(0); + parm[--n] = T(1); + + T totaldist = 0; + for(size_t i = 1; i < n; ++i) + { + const T dist = distance(points[i-1], points[i]); + totaldist += dist; + parm[i] = totaldist + } + + // Normalize to 0..1 + const T m = T(1) / totaldist; + for(size_t i = 1; i < n; ++i) + parm[i] *= m; +} + +template +void uniform(T *parm, const P *points, size_t n) +{ + const T m = T(1) / T(n - 1); + for(size_t i = 0; i < n; ++i) + parm[i] = T(i) * m; +} + } // end namespace tbsp