/* Tiny B-spline evaluation and interpolation library License: Public domain, WTFPL, CC0 or your favorite permissive license; whatever is available in your country. Origin: https://github.com/fgenesis/tinypile This library is split into two parts: - (1) Bspline evaluation (given a set of control points, generate points along the spline) - (2) Bspline interpolation (given some points, generate control points so that the resulting spline goes through these points. Needs part (1) to function.) Requirements: - Part (1) has zero dependencies (not even libc) - Part (2) requires sqrt() from the libc. Change TBSP_SQRT() to use your own. Design notes: - C++98 compatible. Stand-alone. - No memory allocation; all memory is caller-controlled and caller-owned. In cases where this is needed, the caller needs to pass appropriately-sized working memory. This is a template library and your types must fulfil certain criteria: - Scalar (T): Same semantics as float or double. Should be POD. Best use float. - Point (P): Interpolated type. Must support the following operators: Point operator+(const Point& o) const // Element addition Point operator-(const Point& o) const // Element subtraction Point& operator+=(const Point& o) // Add to self Point& operator-=(const Point& o) // Subtract from self Point operator*(Scalar m) const // Scalar multiplication Point& operator=(const Point& o) // Assignment No constructors are required and the code does not need the type to be zero-inited. --- Example usage, Bspline evaluation: --- enum { DEGREE = 3 }; // Cubic const Point cp[NCP] = {...}; // Control points for the spline float knots[tbsp__getNumKnots(NCP, DEGREE)]; // knot vector; used for evaluating the spline Point tmp[DEGREE]; // Temporary working memory must be provided by the caller. // This is just a tiny array. Must have as many elements as the degree of the spline. // This must be done once for each B-spline; the spline is then defined by the knot vector. // In particular, this inits a knot vector with end points [L..R], // ie. the spline will interpolate values for t = [L..R]. // (You can use any boundary values L < R, eg. [-4..+5], but [0..1] is the most common) // Note that this depends only on the number of the control points, but not their values. // This means you only need to compute this when NCP changes! tbsp::fillKnotVector(knots, NCP, DEGREE, L, R); // Evaluate the spline at point t // Returns cp[0] if t <= L; cp[NCP-1] if t >= R; otherwise an interpolated point Point p = tbsp::evalOne(tmp, knots, cp, NCP, DEGREE, t); // Evaluate NP points between t=0.2 .. t=0.5, equidistantly spaced, and write to p[]. // (If you have multiple points to evaluate, this is much faster than multiple evalOne() calls) Point p[NP]; tbsp::evalRange(p, NP, tmp, knots, cp, NCP, DEGREE, 0.2f, 0.5f); */ #pragma once #include // size_t // ---- Compile-time config ------ #ifndef TBSP_SQRT // Needed for part (2) only # include # define TBSP_SQRT(x) sqrt(x) #endif #if !defined(NDEBUG) || defined(_DEBUG) || defined(DEBUG) # ifndef TBSP_ASSERT # include # define TBSP_ASSERT(x) assert(x) # endif #endif // ---- Generic defines and stuff we need ---- // Should be constexpr, but we want to stay C++98-compatible #define tbsp__getNumKnots(numControlPoints, degree) ((numControlPoints) + (degree) + 1) #ifndef TBSP_ASSERT # define TBSP_ASSERT(x) #endif #ifdef _MSC_VER # define TBSP_RESTRICT __restrict #else # define TBSP_RESTRICT restrict #endif #ifndef TBSP_HAS_CPP11 # if (__cplusplus > 201103L) || (defined(_MSC_VER) && ((_MSC_VER+0) >= 1900)) # define TBSP_HAS_CPP11 1 # else # define TBSP_HAS_CPP11 0 # endif #endif // ----------------------------------------------------------------------------------- // ---- Part (1) begin: B-Spline eval ---- // Given some control points, calculate points along the spline. // ----------------------------------------------------------------------------------- namespace tbsp { namespace detail { // returns index of first element strictly less than val template static size_t findKnotIndexOffs(T val, const T *p, size_t n) { // Binary search to find leftmost element that is < val size_t L = 0; size_t R = n; size_t m; while(L < R) { m = (L + R) / 2u; if(p[m] < val) L = m + 1; else R = m; } // FIXME: can we just return m at this point? if(L && !(p[L] < val)) --L; TBSP_ASSERT(!L || p[L] < val); return L; } template static inline size_t findKnotIndex(T val, const T *knots, size_t numknots, size_t degree) { TBSP_ASSERT(numknots > degree); TBSP_ASSERT(val < knots[numknots - degree - 1]); // beyond right end? should have been caught by caller // skip endpoints return degree + findKnotIndexOffs(val, knots + degree, numknots - (degree * 2u)); } template static void genKnotsUniform(K *knots, size_t nn, K mink, K maxk) { const K m = (maxk - mink) / K(nn + 1); for(size_t i = 0; i < nn; ++i) knots[i] = mink + K(i+1) * m; } template static P deBoor(P * TBSP_RESTRICT work, const P * TBSP_RESTRICT src, const T * TBSP_RESTRICT knots, const size_t r, const size_t k, const T t, size_t inputStride) { P last = src[0]; // init so that it works correctly even with degree == 0 for(size_t worksize = k; worksize > 1; --worksize) { const size_t j = k - worksize + 1; // iteration number, starting with 1, going up to k const size_t tmp = r - k + 1 + j; for(size_t w = 0, wr = 0; w < worksize - 1; ++w, wr += inputStride) { const size_t i = w + tmp; const T ki = knots[i]; TBSP_ASSERT(ki <= t); const T div = knots[i+k-j] - ki; TBSP_ASSERT(div > 0); const T a = (t - ki) / div; const T a1 = T(1) - a; work[w] = last = (src[wr] * a1) + (src[wr + inputStride] * a); // lerp } src = work; // done writing the initial data to work, now use that as input for further iterations inputStride = 1; } return last; } } // end namespace detail //-------------------------------------- template static size_t fillKnotVector(T *knots, size_t numcp, size_t degree, T mink, T maxk) { TBSP_ASSERT(mink < maxk); const size_t n = numcp - 1; if(n < degree) // lower degree if not enough control points degree = n; TBSP_ASSERT(n >= degree); const size_t ep = degree + 1; // ep knots on each end const size_t ne = n - degree; // non-endpoint knots in the middle // endpoint interpolation, beginning for(size_t i = 0; i < ep; ++i) *knots++ = mink; // TODO: allow more parametrizations detail::genKnotsUniform(knots, ne, mink, maxk); knots += ne; // endpoint interpolation, end for(size_t i = 0; i < ep; ++i) *knots++ = maxk; return degree; } // evaluate single point at t template static P evalOne(P * TBSP_RESTRICT work, const T * TBSP_RESTRICT knots, const P * TBSP_RESTRICT controlpoints, size_t numcp, size_t degree, T t, size_t inputStride = 1) { if(t < knots[0]) return controlpoints[0]; // left out-of-bounds if(numcp - 1 < degree) degree = numcp - 1; const size_t numknots = tbsp__getNumKnots(numcp, degree); const T maxknot = knots[numknots - 1]; if(t < maxknot) { const size_t r = detail::findKnotIndex(t, knots, numknots, degree); TBSP_ASSERT(r >= degree); const size_t k = degree + 1; TBSP_ASSERT(r + k < numknots); // check that the copy below stays in bounds const P* const src = &controlpoints[r - degree]; return detail::deBoor(work, src, knots, r, k, t, inputStride); } return controlpoints[numcp - 1]; // right out-of-bounds } // evaluate numdst points in range [tmin..tmax], equally spaced template static void evalRange(P * TBSP_RESTRICT dst, size_t numdst, P * TBSP_RESTRICT work, const T * TBSP_RESTRICT knots, const P * TBSP_RESTRICT controlpoints, size_t numcp, size_t degree, T tmin, T tmax, size_t inputStride = 1, size_t outputStride = 1) { TBSP_ASSERT(tmin <= tmax); if(numcp - 1 < degree) degree = numcp - 1; const size_t numknots = tbsp__getNumKnots(numcp, degree); size_t r = detail::findKnotIndex(tmin, knots, numknots, degree); TBSP_ASSERT(r >= degree); const size_t k = degree + 1; TBSP_ASSERT(r + k < numknots); // check that the copy below stays in bounds const T step = (tmax - tmin) / T(numdst - 1); T t = tmin; const size_t maxidx = numknots - k; size_t i = 0; // left out-of-bounds for( ; i < numdst && t < knots[0]; ++i, t += step) { *dst = controlpoints[0]; dst += outputStride; } // actually interpolated points const T maxknot = knots[numknots - 1]; for( ; i < numdst && t < maxknot; ++i, t += step) { while(r < maxidx && knots[r+1] < t) // find new index; don't need to do binary search again ++r; const P * const src = &controlpoints[(r - degree) * inputStride]; *dst = detail::deBoor(work, src, knots, r, k, t, inputStride); dst += outputStride; } // right out-of-bounds if(i < numdst) { const P p = controlpoints[(numcp - 1) * inputStride]; do { *dst = p; dst += outputStride; ++i; } while(i < numdst); } } // ----------------------------------------------------------------------------------- // --- Part (2) begin: B-spline interpolation ---- // Given some points that should lie on a spline, calculate its control points // ----------------------------------------------------------------------------------- namespace detail { struct Size2d { size_t w, h; inline bool operator==(const Size2d& o) const { return w == o.w && h == o.h; } }; // Wrap any pointer to access it like a vector (with proper bounds checking) template class VecAcc { public: typedef T value_type; VecAcc(T *p, const size_t n) : p(p), n(n) {} inline T& operator[](size_t i) { TBSP_ASSERT(i < n); return p[i]; } inline const T& operator[](size_t i) const { TBSP_ASSERT(i < n); return p[i]; } template VecAcc& operator=(const V& o) { TBSP_ASSERT(n == o.size()); for(size_t i = 0; i < n; ++i) p[i] = o[i]; return *this; } VecAcc& operator=(const VecAcc& o) // Required for C++11 { TBSP_ASSERT(n == o.size()); for(size_t i = 0; i < n; ++i) p[i] = o[i]; return *this; } template VecAcc& operator+=(const V& o) { TBSP_ASSERT(n == o.size()); for(size_t i = 0; i < n; ++i) p[i] += o[i]; return *this; } template VecAcc& operator-=(const V& o) { TBSP_ASSERT(n == o.size()); for(size_t i = 0; i < n; ++i) p[i] -= o[i]; return *this; } T dot(VecAcc& o) const // vector dot product { T s = 0; const size_t n = this->n; tbsp__ASSERT(n == o.n); for(size_t i = 0; i < n; ++i) s += a.p[i] * b.p[i]; return s; } inline size_t size() const { return n; } inline T *data() { return p; } inline const T *data() const { return p; } T *p; size_t n; }; // Wraps any pointer to access it like a row matrix. // Intentionally a dumb POD struct, do NOT put ctors! template struct MatrixAcc { typedef T value_type; typedef VecAcc RowAcc; inline T& operator()(size_t x, size_t y) { TBSP_ASSERT(x < size.w); TBSP_ASSERT(y < size.h); return p[y * size.w + x]; } inline const T& operator()(size_t x, size_t y) const { TBSP_ASSERT(x < size.w); TBSP_ASSERT(y < size.h); return p[y * size.w + x]; } inline T *rowPtr(size_t y) const { return p + (y * size.w); } // w,h of (this * o) inline Size2d multSize(const MatrixAcc& o) const { // in math notation: (n x m) * (m x p) -> (n x p) // and because math is backwards: // (h x w) * (H x W) -> (h x W) TBSP_ASSERT(size.w == o.size.h); Size2d wh { o.size.w, size.h }; return wh; } inline RowAcc row(size_t y) const { TBSP_ASSERT(y < size.h); return RowAcc(p + (y * size.w), size.w); } T *p; Size2d size; }; // Cut away the borders from A, so that the new matrix A' size is (A.size.w - 2, A.size.h - 2): // ( xxxxx ) (where x means cut and any other letter means keep) // ( xabcx ) ( abc ) // A = ( xdefx ), A' = ( def ) // ( xxxxx ) // then compute T(A') x A', ie. // ( ad ) ( abc ) // R = ( be ) X ( def ) // ( cf ) // R must be already the correct size, ie. (A.size.w - 2, A.size.w - 2)! template static void matMultCenterCutTransposeWithSelf(MatrixAcc& TBSP_RESTRICT R, const MatrixAcc& TBSP_RESTRICT A) { TBSP_ASSERT(A.size.w > 2 && A.size.h > 2); const size_t w = A.size.w - 2; // also the final size of R: (w x w); const size_t h = A.size.h - 2; // when iterating over w/h it stops before the last element in that row/col TBSP_ASSERT(R.size.w == w && R.size.h == w); T *out = R.p; for (size_t y = 0; y < w; ++y) { for (size_t x = 0; x < w; ++x) { const T *row = A.p + y + 1; // skip first elem (+1) const T *col = A.p + x + w; // skip first row (+w) T temp = 0; for (size_t k = 0; k < h; ++k, row += w, col += w) temp += *row * *col; *out++ = temp; } } } // Linear system solver via Cholesky decomposition template struct Cholesky { typedef T value_type; typedef MatrixAcc Mat; // Initializes a solver in the given memory. // On success, bumps ptr forward by the memory it needs; // on failure, returns NULL. // Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h)) T *init(T *mem, Size2d size) { T * const pdiag = mem + (size.w * size.h); T * const myend = pdiag + size.w; L.p = mem; L.size = size; idiag = pdiag; // T[n] return myend; } // Sets the matrix A used for solving later. The solver is preconditioned // for that matrix so that in (A * x + b) b is the only variable. // May fail if A is not well formed. bool refresh(const Mat& A) { TBSP_ASSERT(A.size == L.size); TBSP_ASSERT(A.size.w >= A.size.h); const size_t n = L.size.w; // Fill the lower left triangle, storing the diagonal separately for(size_t y = 0; y < n; ++y) { const typename Mat::RowAcc rrow = A.row(y); typename Mat::RowAcc wrow = L.row(y); for(size_t x = y; x < n; ++x) { T s = rrow[x]; for(size_t k = 0; k < y; ++k) s -= wrow[k] * L(k,x); if(x != y) L(y,x) = s * idiag[y]; else if(s > 0) idiag[y] = T(1) / (wrow[y] = TBSP_SQRT(s)); else { TBSP_ASSERT(0 && "Cholesky decomposition failed"); return false; } } } // Fill the upper right triangle with zeros const T zero = T(0); for(size_t y = 0; y < n; ++y) { typename Mat::RowAcc row = L.row(y); for(size_t x = y+1; x < n; ++x) row[x] = zero; } return true; } // Solves x for A * x + b. Both b and x must have length n. // Can solve in-place, ie. you may pass xv == bv. template void solve(P * const xv, const P * const bv) const { const size_t n = L.size.w; for(size_t y = 0; y < n; ++y) { const typename Mat::RowAcc Lrow = L.row(y); P p = bv[y]; for(size_t x = 0; x < y; ++x) p -= xv[x] * Lrow[x]; xv[y] = p * idiag[y]; } for(size_t y = n; y--; ) { P p = xv[y]; for(size_t x = y+1; x < n; ++x) p -= xv[x] * L(y,x); xv[y] = p * idiag[y]; } } Mat L; // lower left triangle matrix T *idiag; // 1 / diag; length is L.size.w }; // Linear system solver via LU decomposition // (Slower than Cholesky, but a bit more general ie. applicable to more cases) template struct LUDecomp { typedef T value_type; typedef MatrixAcc Mat; // Initializes a solver in the given memory. // On success, bumps ptr forward by the memory it needs; // on failure, returns NULL. // Memory needed: (sizeof(T) * ((A.size.w + 1) * A.size.h)) T *init(T *mem, Size2d size) { T * const pdiag = mem + (size.w * size.h); T * const myend = pdiag + size.w; TBSP_ASSERT(size.w == size.h); LU.p = mem; LU.size = size; idiag = pdiag; return myend; } // Sets the matrix A used for solving later. The solver is preconditioned // for that matrix so that in (A * x + b) b is the only variable. void refresh(const Mat& A) { TBSP_ASSERT(A.size == LU.size); const size_t n = LU.size.w; // Note: This doesn't do pivoting. for (size_t y = 0; y < n; ++y) { for (size_t x = y; x < n; ++x) { T e = A(x, y); for (size_t k = 0; k < y; ++k) e -= LU(k, y) * LU(x, k); LU(x, y) = e; } const T idiagval = T(1) / LU(y,y); idiag[y] = idiagval; for (size_t x = y + 1; x < n; ++x) { T e = A(y,x); for (size_t k = 0; k < y; ++k) e -= LU(k, x) * LU(y, k); LU(y, x) = idiagval * e; } } } // Solves x for A * x + b. Both b and x must have length n. // Can solve in-place, ie. you may pass xv == bv. template void solve(P * const xv, const P * const bv) const { const size_t n = LU.size.w; for (size_t y = 0; y < n; ++y) { P p = bv[y]; const typename Mat::RowAcc LUrow = LU.row(y); for (size_t x = 0; x < y; ++x) p -= xv[x] * LUrow[x]; xv[y] = p; } for (size_t y = n; y--; ) { P p = xv[y]; const typename Mat::RowAcc LUrow = LU.row(y); for (size_t x = y + 1; x < n; ++x) p -= xv[x] * LUrow[x]; xv[y] = p * idiag[y]; } } Mat LU; // combined both lower right & upper left triangles matrix T *idiag; // 1 / diag; length is L.size.w }; // The coeff vector for a given position u on the spline describes the influence of each control point // towards the final result, ie.: // resultPoint(u) = SUM(Nrow[i] * controlpoint[i]) for i in [0..Nrow), // where Nrow[] are the coefficients for u. // And to keep the parametrization simple, u is computed from t=0..1 template void computeCoeffVector(T * TBSP_RESTRICT Nrow, size_t numcp, T t01, const T * TBSP_RESTRICT knots, size_t degree) { for(size_t i = 0; i < numcp; ++i) Nrow[i] = T(0); const size_t n = numcp - 1; // special cases if(t01 <= T(0)) { Nrow[0] = T(1); return; } else if(t01 >= T(1)) { Nrow[n] = T(1); return; } const size_t numknots = tbsp__getNumKnots(numcp, degree); const size_t m = numknots - 1; const T mink = knots[0]; const T maxk = knots[m]; // Position on the knot vector aka transform t=0..1 into knot vector space const T u = mink + t01 * (maxk - mink); // find index k, so that u is in [knots[k], knots[k+1]) const size_t k = detail::findKnotIndex(u, knots, numknots, degree); Nrow[k] = T(1); // Coefficient computation // See also: https://pages.mtu.edu/~shene/COURSES/cs3621/NOTES/spline/B-spline/bspline-curve-coef.html for(size_t d = 1; d <= degree; ++d) { TBSP_ASSERT(d <= k); const T q = (knots[k+1] - u) / (knots[k+1] - knots[k-d+1]); Nrow[k-d] = q * Nrow[k-d+1]; for(size_t i = k-d+1; i < k; ++i) { const T a = (u - knots[i]) / (knots[i+d] - knots[i]); const T b = (knots[i+d+1] - u) / (knots[i+d+1] - knots[i+1]); Nrow[i] = a * Nrow[i] + b * Nrow[i+1]; } Nrow[k] *= ((u - knots[k]) / (knots[k+d] - knots[k])); } } template T *allocateCoeffMatrix(MatrixAcc& N, T *mem, size_t nump, size_t numcp) { N.size.w = numcp; N.size.h = nump; N.p = mem; return mem + (nump * numcp); } template void computeCoeffMatrix(MatrixAcc& N, const T *knots, size_t degree) { const size_t numcp = N.size.w; const size_t nump = N.size.h; const T invsz = T(1) / T(nump - 1); for(size_t i = 0; i < nump; ++i) // rows { // TODO: this is the parametrization. currently, this is equidistant. allow more. const T t01 = float(i) * invsz; // position of point assuming uniform parametrization typename MatrixAcc::RowAcc row = N.row(i); computeCoeffVector(&row[0], numcp, t01, knots, degree); // row 0 stores all coefficients for _t[0], etc } } template Size2d getLeastSquaresSize(const MatrixAcc& N) { Size2d sz; sz.w = N.size.w - 2; sz.h = N.size.w - 2; // (h = w) is intended, M is a square matrix! return sz; } // Generate least-squares approximator for spline approximation template MatrixAcc generateLeastSquares(T *mem, const MatrixAcc& N) { MatrixAcc M; M.p = mem; M.size == getLeastSquaresSize(N); T * const pend = M.p + (M.size.w * M.size.h); matMultCenterCutTransposeWithSelf(M, N); return M; } } // end namespace detail // -------------------------------------------------- // Calculate how many elements of type T are needed as storage for the interpolator // These should ideally be constexpr, but we want C++98 compatibility. // For Interpolator::init() #define tbsp__getInterpolatorStorageSize(numControlPoints, numPoints) \ ( ((numControlPoints) == (numControlPoints)) \ ? (((numControlPoints)+1) * (numPoints)) /* solver(N) */ \ : ( \ ((numControlPoints) * (numPoints)) /* N */ \ + (((numControlPoints)-1) * ((numControlPoints)-2)) /* solver(M) */ \ ) \ ) // For Interpolator::refresh() #define tbsp__getInterpolatorRefreshTempSize(numControlPoints, numPoints) \ ( (numControlPoints) == (numControlPoints) \ ? ((numControlPoints) * (numPoints)) /* N */ \ : (((numControlPoints)-2) * ((numControlPoints)-2)) /* M */ \ ) // For Interpolator::generateControlPoints() #define tbsp_getInterpolatorWorkTempSize(numControlPoints, numPoints) \ ( ((numControlPoints) == (numPoints)) ? 0 : ((numControlPoints) - 2) ) // -------------------------------------------------- // One interpolator is precomputed for a specific number of input points and output control points template struct Interpolator { inline Interpolator() : numcp(0), nump(0) {} // Inits an Interpolator in the passed mem[]. This memory must outlive the Interpolator // and be at least tbsp__getInterpolatorStorageSize() elements. // You may move or copy the memory elsewhere and then call init() again to update internal // pointers to point to the new location. // This does not access mem at all, it only stores pointers. // When the number of points or control points change, the memory requirements change too; // in this case you must re-init(), generate a new knot vector, and refresh(). T *init(T *mem, size_t nump, size_t numcp); // When the knot vector or degree changes, call this to update the internal matrix solver. // Must be called before generateControlPoints() can be used. // tmp[] must have space for at least tbsp__getInterpolatorRefreshTempSize() elements // and can be discarded after the call returns. bool refresh(T * TBSP_RESTRICT const tmp, const T * TBSP_RESTRICT knots, unsigned degree); // Given a set of points P[nump], generate set of control points cp[numcp]. // nump, numcp are defined by the numbers passed to init(). // workmem can be NULL if only interpolation is used (#controlpoints == #points). // Approximation (#controlpoints < #points) needs extra working memory though, // so you must pass at least tbsp_getInterpolatorWorkTempSize() elements of P. // Returns how many control points were generated, for convenience. // The working memory can be discarded after the call returns. template size_t generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const P *points) const; inline size_t getNumGeneratedControlPoints() { return numcp; } inline size_t getNumInputPoints() { return nump; } // ------------------------ protected: size_t numcp, nump; union { detail::Cholesky cholesky; detail::LUDecomp ludecomp; } solver; detail::MatrixAcc N; // To make it checkable in if() #if TBSP_HAS_CPP11 explicit inline operator bool() const { return !!numcp; } #else // Explicit conversion operator is not supported in C++98 inline operator const void*() const { return numcp ? this : NULL; } #endif }; template T *Interpolator::init(T * TBSP_RESTRICT const mem, size_t nump, size_t numcp) { TBSP_ASSERT(mem); // Calling this with 2 points is pointless, < 2 is mathematically impossible if(nump < 2 || numcp < 2 || !mem) return NULL; // Can only generate less or equal control points than points TBSP_ASSERT(numcp <= nump); if(!(numcp <= nump)) return NULL; T *p = mem; if(nump == numcp) { detail::allocateCoeffMatrix(N, NULL, nump, numcp); // just to get the size p = solver.ludecomp.init(p, N.size); } else { p = detail::allocateCoeffMatrix(N, p, nump, numcp); p = solver.cholesky.init(p, detail::getLeastSquaresSize(N)); } if(p) { TBSP_ASSERT(p <= mem + tbsp__getInterpolatorStorageSize(numcp, nump)); this->numcp = numcp; this->nump = nump; } return p; } template bool Interpolator::refresh(T * TBSP_RESTRICT const tmp, const T * TBSP_RESTRICT knots, unsigned degree) { // Need temporary memory TBSP_ASSERT(tmp); const size_t numcp = N.size.w; const size_t nump = N.size.h; TBSP_ASSERT(numcp && nump); bool ret = false; if(nump == numcp) { // N allocated in tmp, solver(N) in mem T *p = detail::allocateCoeffMatrix(N, tmp, nump, numcp); TBSP_ASSERT(p <= tmp + tbsp__getInterpolatorRefreshTempSize(numcp, nump)); detail::computeCoeffMatrix(N, knots, degree); // N is point-symmetric, ie. NOT diagonally symmetric. // This means we can't use Cholesky decomposition, but LU decomposition is fine. // Note: Cholesky decomposition will at first appear to work, // but the solutions calculated with it are wrong. Don't use this here. solver.ludecomp.refresh(N); // N is no longer needed // solver(N) stays N.p = NULL; ret = true; } else { // N allocated in mem, solver(M) in mem, M in tmp detail::computeCoeffMatrix(N, knots, degree); // This calculates M = T(N') x N', where N' is N with its borders removed. // A nice property is that M is diagonally symmetric, // means it can be efficiently solved via cholesky decomposition. const detail::MatrixAcc M = detail::generateLeastSquares(tmp, N); TBSP_ASSERT(M.p + (M.size.w * M.size.h) <= tmp + tbsp__getInterpolatorRefreshTempSize(numcp, nump)); ret = solver.cholesky.refresh(M); // M is no longer needed // solver(M) stays // N stays } return ret; } template template size_t Interpolator::generateControlPoints(P * cp, P * TBSP_RESTRICT workmem, const P *points) const { if(numcp == nump) { // This does not need extra working memory solver.ludecomp.solve(cp, points); } else { // Approximate points with less control points, this is more costly TBSP_ASSERT(workmem); // ... and needs extra memory const size_t h = numcp - 1; const size_t n = nump - 1; const P p0 = points[0]; const P pn = points[n]; // Wrap the least squares estimator somehow together with the points to approximate... // Unfortunately I forgot how this works, so no explanation of mathemagics here, sorry :< const P initial = points[1] - (p0 * N(0,1)) - (pn * N(h,1)); for(size_t i = 1; i < h; ++i) { P tmp = initial * N(i,1); for(size_t k = 2; k < n; ++k) { const typename detail::MatrixAcc::RowAcc Nrow = N.row(k); tmp += (points[k] - (p0 * Nrow[0]) - (pn * Nrow[h])) * Nrow[i]; } workmem[i-1] = tmp; } // End points are not interpolated cp[0] = p0; cp[h] = pn; // Solve for all points that are not endpoints TBSP_ASSERT(solver.cholesky.L.size.w == h - 1); solver.cholesky.solve(cp + 1, workmem); } return numcp; } // Helper functions for parametrization template void chordal(T *parm, const P *points, size_t n) { // Start and end points are always 0 and 1, respectively parm[0] = T(0); parm[--n] = T(1); T totaldist = 0; for(size_t i = 1; i < n; ++i) { const T dist = distance(points[i-1], points[i]); totaldist += dist; parm[i] = totaldist } // Normalize to 0..1 const T m = T(1) / totaldist; for(size_t i = 1; i < n; ++i) parm[i] *= m; } template void uniform(T *parm, const P *points, size_t n) { const T m = T(1) / T(n - 1); for(size_t i = 0; i < n; ++i) parm[i] = T(i) * m; } } // end namespace tbsp