diff --git a/Aquaria/ScriptInterface.cpp b/Aquaria/ScriptInterface.cpp index 361a01e..5c0424a 100644 --- a/Aquaria/ScriptInterface.cpp +++ b/Aquaria/ScriptInterface.cpp @@ -19,15 +19,17 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "SDL.h" -#include "ScriptInterface.h" -#include "../BBGE/ScriptObject.h" extern "C" { - #include "lua.h" - #include "lauxlib.h" - #include "lualib.h" +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" } +#include "luaalloc.h" + +#include "SDL.h" +#include "ScriptInterface.h" +#include "../BBGE/ScriptObject.h" #include "ReadXML.h" @@ -11498,7 +11500,7 @@ static const struct { //============================================================================================ ScriptInterface::ScriptInterface() -: baseState(NULL), _sballoc(8, 128) +: baseState(NULL), _LA(NULL) { } @@ -11513,6 +11515,8 @@ void ScriptInterface::init() allowUnsafeFunctions = dsq->user.system.allowDangerousScriptFunctions; + if(!_LA) + _LA = luaalloc_create(NULL, NULL); if (!baseState) baseState = createLuaVM(); } @@ -11523,15 +11527,9 @@ void ScriptInterface::reset() init(); } -void *ScriptInterface::the_alloc(void *ud, void *ptr, size_t osize, size_t nsize) -{ - ScriptInterface *this_ = (ScriptInterface*)ud; - return this_->_sballoc.Alloc(ptr, nsize, osize); -} - lua_State *ScriptInterface::createLuaVM() { - lua_State *state = lua_newstate(the_alloc, this); /* opens Lua */ + lua_State *state = lua_newstate(_LA ? luaalloc : NULL, _LA); /* opens Lua */ luaL_openlibs(state); #ifdef LUAAPI_HAS_CLIPBOARD @@ -11612,12 +11610,6 @@ lua_State *ScriptInterface::createLuaVM() return state; } -void ScriptInterface::destroyLuaVM(lua_State *state) -{ - if (state) - lua_close(state); -} - // Initial value for the instance-local table should be on the stack of // the base Lua state; it will be popped when this function returns. lua_State *ScriptInterface::createLuaThread(const std::string &file) @@ -11721,8 +11713,16 @@ int ScriptInterface::gcGetStats() void ScriptInterface::shutdown() { - destroyLuaVM(baseState); - baseState = NULL; + if (baseState) + { + lua_close(baseState); + baseState = NULL; + } + if(_LA) + { + luaalloc_delete(_LA); + _LA = NULL; + } } Script *ScriptInterface::openScript(const std::string &file, bool ignoremissing /* = false */) diff --git a/Aquaria/ScriptInterface.h b/Aquaria/ScriptInterface.h index c20a496..ec37452 100644 --- a/Aquaria/ScriptInterface.h +++ b/Aquaria/ScriptInterface.h @@ -22,9 +22,9 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #define SCRIPTINTERFACE_H #include "../BBGE/Base.h" -#include "../BBGE/MemoryAllocatorSmallBlock.h" struct lua_State; +struct LuaAlloc; class Entity; class CollideEntity; @@ -115,13 +115,12 @@ public: protected: lua_State *createLuaVM(); - void destroyLuaVM(lua_State *state); lua_State *createLuaThread(const std::string &file); int destroyLuaThread(const std::string &file, lua_State *thread); static void *the_alloc(void *ud, void *ptr, size_t osize, size_t nsize); lua_State *baseState; - SmallBlockAllocator _sballoc; + LuaAlloc *_LA; }; #endif diff --git a/BBGE/MemoryAllocatorSmallBlock.cpp b/BBGE/MemoryAllocatorSmallBlock.cpp deleted file mode 100644 index 854d1e8..0000000 --- a/BBGE/MemoryAllocatorSmallBlock.cpp +++ /dev/null @@ -1,292 +0,0 @@ -// Public domain - -// Aquaria specific... -#include "Base.h" - -#include "algorithmx.h" -#include "MemoryAllocatorSmallBlock.h" -#include "bithacks.h" - -#include - - -#define DD(...) -#define logdev(...) -#define logerror(...) - -#ifdef NDEBUG -# define ASSERT(x) -#else -# define ASSERT(x) assert(x) -#endif - - -SmallBlockAllocator::SmallBlockAllocator(unsigned int blockSizeMin, - unsigned int blockSizeMax, - unsigned int blockSizeIncr /* = 8 */, - unsigned int elemsPerBlockMin /* = 64 */, - unsigned int elemsPerBlockMax /* = 2048 */) - : _blockSizeMin(blockSizeMin) - , _blockSizeMax(blockSizeMax) - , _blockSizeIncr(blockSizeIncr) - , _elemsPerBlockMin(elemsPerBlockMin) - , _elemsPerBlockMax(elemsPerBlockMax) -{ - ASSERT(_blockSizeIncr % 4 == 0); // less than 4 bytes makes no sense - ASSERT(_blockSizeMin % _blockSizeIncr == 0); - ASSERT(_blockSizeMax % _blockSizeIncr == 0); - ASSERT((_blockSizeMax - _blockSizeMin) % _blockSizeIncr == 0); - unsigned int c = ((_blockSizeMax - _blockSizeMin) / _blockSizeIncr) + 1; - logdev("SBA: Using %u distinct block sizes from %u - %u bytes", c, _blockSizeMin, _blockSizeMax); - _blocks = new Block*[c]; // TODO: Do we really want to use dynamic allocation here? - memset(_blocks, 0, c * sizeof(Block*)); -} - -SmallBlockAllocator::~SmallBlockAllocator() -{ - while(_allblocks.size()) - { - Block *blk = _allblocks.back(); - logerror("~SmallBlockAllocator(): Warning: Leftover block with %u/%u elements, %uB each", - blk->maxElems, blk->maxElems - blk->freeElems, blk->elemSize); - _FreeBlock(blk); - } - delete [] _blocks; -} - -void *SmallBlockAllocator::Alloc(void *ptr, size_t newsize, size_t oldsize) -{ - DD("SBA::Alloc() ptr = %p; newsize = %u, oldsize = %u", ptr, newsize, oldsize); - - if(ptr) - { - if(!newsize) - { - _Free(ptr, oldsize); - return NULL; - } - else if(newsize == oldsize) - return ptr; - else - return _Realloc(ptr, newsize, oldsize); - } - else - { - if(newsize) - return _Alloc(newsize); - } - return NULL; -} - -SmallBlockAllocator::Block *SmallBlockAllocator::_AllocBlock(unsigned int elemCount, unsigned int elemSize) -{ - DD("SBA: _AllocBlock: elemCount = %u, elemSize = %u", elemCount, elemSize); - - const unsigned int bitsPerInt = (sizeof(unsigned int) * 8); // 32 - unsigned int bitmapInts = (elemCount + (bitsPerInt - 1)) / bitsPerInt; - void *ptr = malloc( - (sizeof(Block) - sizeof(unsigned int)) // block header without bitmap[1] - + (bitmapInts * sizeof(unsigned int)) // actual bitmap size - + (elemCount * elemSize) // data size - ); - - if(!ptr) - return NULL; - Block *blk = (Block*)ptr; - memset(&blk->bitmap[0], 0xff, bitmapInts * sizeof(unsigned int)); // all free - blk->elemSize = elemSize; - blk->maxElems = elemCount; - blk->freeElems = elemCount; - blk->bitmapInts = bitmapInts; - blk->next = NULL; - blk->prev = NULL; - - // using insertion sort - std::vector::iterator insertit = std::lower_bound(_allblocks.begin(), _allblocks.end(), blk); - _allblocks.insert(insertit, blk); - - return blk; -} - -void SmallBlockAllocator::_FreeBlock(Block *blk) -{ - DD("SBA: _FreeBlock: elemCount = %u, elemSize = %u", blk->maxElems, blk->elemSize); - - if(blk->prev) - blk->prev->next = blk->next; - else - _blocks[GetIndexForElemSize(blk->elemSize)] = blk->next; - - if(blk->next) - blk->next->prev = blk->prev; - - free(blk); - - // keeps the vector sorted - std::vector::iterator where = std::remove(_allblocks.begin(), _allblocks.end(), blk); - _allblocks.erase(where, _allblocks.end()); -} - - -SmallBlockAllocator::Block *SmallBlockAllocator::_AppendBlock(unsigned int elemSize) -{ - unsigned int idx = GetIndexForElemSize(elemSize); - Block *blk = _blocks[idx]; - unsigned int elemsPerBlock = _elemsPerBlockMin; - if(blk) - { - while(blk->next) - blk = blk->next; - elemsPerBlock = blk->maxElems * 2; // new block is double the size - if(elemsPerBlock > _elemsPerBlockMax) - elemsPerBlock = _elemsPerBlockMax; - } - - unsigned int blockElemSize = ((elemSize + (_blockSizeIncr - 1)) / _blockSizeIncr) * _blockSizeIncr; - ASSERT(blockElemSize >= elemSize); - - Block *newblk = _AllocBlock(elemsPerBlock, blockElemSize); - if(!newblk) - return NULL; - - if(blk) - { - blk->next = newblk; // append to list - newblk->prev = blk; - } - else - _blocks[idx] = newblk; // list head - - return newblk; -} - -SmallBlockAllocator::Block *SmallBlockAllocator::_GetFreeBlock(unsigned int elemSize) -{ - unsigned int idx = GetIndexForElemSize(elemSize); - Block *blk = _blocks[idx]; - while(blk && !blk->freeElems) - blk = blk->next; - return blk; -} - -void *SmallBlockAllocator::Block::allocElem() -{ - ASSERT(freeElems); - unsigned int i = 0; - for( ; !bitmap[i]; ++i) // as soon as one isn't all zero, there's a free slot - ASSERT(i < bitmapInts); - ASSERT(i < bitmapInts); - int freeidx = bithacks::ctz(bitmap[i]); - ASSERT(bitmap[i] & (1 << freeidx)); // make sure this is '1' (= free) - bitmap[i] &= ~(1 << freeidx); // put '0' where '1' was (-> mark as non-free) - --freeElems; - const unsigned int offs = (i * sizeof(unsigned int) * 8 * elemSize); // skip forward i bitmaps (32 elems each) - unsigned char *ret = getPtr() + offs + (elemSize * freeidx); - ASSERT(contains(ret)); - return ret; -} - -bool SmallBlockAllocator::Block::contains(unsigned char *ptr) const -{ - const unsigned char *pp = getPtr(); - - if(ptr < pp) - return false; // pointer is out of range (1) - if(ptr >= pp + (maxElems * elemSize)) - return false; // pointer is out of range (2) - - return true; -} - -void SmallBlockAllocator::Block::freeElem(unsigned char *ptr) -{ - ASSERT(contains(ptr)); - ASSERT(freeElems < maxElems); // make sure the block is not all free - - const ptrdiff_t p = ptr - getPtr(); - ASSERT((p % elemSize) == 0); // make sure alignment is right - const unsigned int idx = p / elemSize; - const unsigned int bitsPerInt = sizeof(unsigned int) * 8; // 32 - const unsigned int bitmapIdx = idx / bitsPerInt; - const unsigned int bitIdx = idx % bitsPerInt; - ASSERT(bitmapIdx < bitmapInts); - ASSERT(!(bitmap[bitmapIdx] & (1 << bitIdx))); // make sure this is '0' (= used) - - bitmap[bitmapIdx] |= (1 << bitIdx); // put '1' where '0' was (-> mark as free) - ++freeElems; - -#ifdef _DEBUG - memset(ptr, 0xfa, elemSize); -#endif -} - -void *SmallBlockAllocator::_FallbackAlloc(unsigned int size) -{ - return malloc(size); -} - -void SmallBlockAllocator::_FallbackFree(void *ptr) -{ - free(ptr); -} - -void *SmallBlockAllocator::_Alloc(unsigned int size) -{ - if(size > _blockSizeMax) - return _FallbackAlloc(size); - - Block *blk = _GetFreeBlock(size); - ASSERT(!blk || blk->freeElems); - if(!blk) - { - blk = _AppendBlock(size); - if(!blk) - return _FallbackAlloc(size); - } - return blk->allocElem(); -} - -bool SmallBlockAllocator::Block_ptr_cmp(const Block *blk, const void *ptr) -{ - return blk->getEndPtr() < ((unsigned char*)ptr); -} - -SmallBlockAllocator::Block *SmallBlockAllocator::_FindBlockContainingPtr(void *ptr) -{ - // MSVC's std::lower_bound uses iterator debug checks in debug mode, - // which breaks Block_ptr_cmp() because the left and right types are different. - std::vector::iterator it = stdx_fg::lower_bound(_allblocks.begin(), _allblocks.end(), ptr, Block_ptr_cmp); - return (it != _allblocks.end() && (*it)->contains((unsigned char*)ptr)) ? *it : NULL; -} - -void SmallBlockAllocator::_Free(void *ptr, unsigned int size) -{ - if(size <= _blockSizeMax) - { - Block *blk = _FindBlockContainingPtr(ptr); - if(blk) - { - ASSERT(blk->elemSize >= size); // ptr might be from a larger block in case _Realloc() failed to shrink - blk->freeElem((unsigned char*)ptr); - if(blk->freeElems == blk->maxElems) - _FreeBlock(blk); // remove if completely unused - return; - } - } - _FallbackFree(ptr); -} - -void *SmallBlockAllocator::_Realloc(void *ptr, unsigned int newsize, unsigned int oldsize) -{ - void *newptr = _Alloc(newsize); - - // If the new allocation failed, just re-use the old pointer if it was a shrink request - // This also satisfies Lua, which assumes that realloc() shrink requests cannot fail - if(!newptr) - return newsize <= oldsize ? ptr : NULL; - - memcpy(newptr, ptr, std::min(oldsize, newsize)); - _Free(ptr, oldsize); - return newptr; -} - diff --git a/BBGE/MemoryAllocatorSmallBlock.h b/BBGE/MemoryAllocatorSmallBlock.h deleted file mode 100644 index c0fe1a0..0000000 --- a/BBGE/MemoryAllocatorSmallBlock.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef MEMORY_ALLOCATOR_SMALL_BLOCK_H -#define MEMORY_ALLOCATOR_SMALL_BLOCK_H - -/* Optimized memory allocator for small & frequent (de-)allocations. - * Low memory overhead. Used for Lua. - * Inspired by http://dns.achurch.org/cgi-bin/hg/aquaria-psp/file/tip/PSP/src/lalloc.c -*/ - -// Originally made for LV3proj_ng (https://github.com/fgenesis/lv3proj_ng) -// Hacked in shape for use in Aquaria -// Public domain - - -#include - -class SmallBlockAllocator -{ -public: - - SmallBlockAllocator(unsigned int blockSizeMin, unsigned int blockSizeMax, unsigned int blockSizeIncr = 8, - unsigned int elemsPerBlockMin = 64, unsigned int elemsPerBlockMax = 2048); - - ~SmallBlockAllocator(); - - void *Alloc(void *ptr, size_t newsize, size_t oldsize); - - -private: - - void *_Alloc(unsigned int size); - void *_Realloc(void *ptr, unsigned int newsize, unsigned int oldsize); - void _Free(void* ptr, unsigned int size); - - void *_FallbackAlloc(unsigned int size); - void _FallbackFree(void *ptr); - - struct Block - { - // block header start - Block *next; - Block *prev; - unsigned short maxElems; - unsigned short freeElems; - unsigned short elemSize; - unsigned short bitmapInts; - // block header end - - unsigned int bitmap[1]; // variable sized - // actual storage memory starts after bitmap[bitmapInts] - - inline unsigned char *getPtr() - { - return reinterpret_cast(&bitmap[bitmapInts]); - } - inline const unsigned char *getPtr() const - { - return reinterpret_cast(&bitmap[bitmapInts]); - } - inline unsigned char *getEndPtr() - { - return getPtr() + (maxElems * elemSize); - } - inline const unsigned char *getEndPtr() const - { - return getPtr() + (maxElems * elemSize); - } - - void *allocElem(); - void freeElem(unsigned char *ptr); - bool contains(unsigned char *ptr) const; - }; - - Block *_AllocBlock(unsigned int elemCount, unsigned int elemSize); - void _FreeBlock(Block *blk); - Block *_AppendBlock(unsigned int elemSize); - Block *_GetFreeBlock(unsigned int elemSize); // NULL if none free - - Block *_FindBlockContainingPtr(void *ptr); - - inline unsigned int GetIndexForElemSize(unsigned int elemSize) - { - - - return ((elemSize + (_blockSizeIncr - 1)) / _blockSizeIncr) - 1; - } - - static bool Block_ptr_cmp(const Block *blk, const void *ptr); - - Block **_blocks; - std::vector _allblocks; // always sorted by pointer address - - unsigned int _blockSizeMin; - unsigned int _blockSizeMax; - unsigned int _blockSizeIncr; - - unsigned int _elemsPerBlockMin; - unsigned int _elemsPerBlockMax; -}; - -#endif diff --git a/CMakeLists.txt b/CMakeLists.txt index 63359c2..e3c3f2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -510,7 +510,6 @@ SET(BBGE_SRCS ${BBGEDIR}/Joystick.cpp ${BBGEDIR}/LensFlare.cpp ${BBGEDIR}/Localization.cpp - ${BBGEDIR}/MemoryAllocatorSmallBlock.cpp ${BBGEDIR}/MT.cpp ${BBGEDIR}/OSFunctions.cpp ${BBGEDIR}/ParticleEffect.cpp @@ -544,6 +543,7 @@ SET(BBGE_SRCS ${BBGEDIR}/FmodOpenALBridge.cpp ${COCOA_SRCS} ${EXTLIBDIR}/DeflateCompressor.cpp + ${EXTLIBDIR}/luaalloc.c ${EXTLIBDIR}/glfont2/glfont2.cpp ${EXTLIBDIR}/minihttp.cpp ${EXTLIBDIR}/JPS.h diff --git a/ExternalLibs/luaalloc.cpp b/ExternalLibs/luaalloc.cpp new file mode 100644 index 0000000..c0d9339 --- /dev/null +++ b/ExternalLibs/luaalloc.cpp @@ -0,0 +1,675 @@ +/* Small and fast memory allocator tailored for Lua. + +License: + Public domain, WTFPL, CC0 or your favorite permissive license; whatever is available in your country. + +Dependencies: + libc by default, change defines below to use your own functions + Compiles as C99 or C++ code. + +Thread safety: + No global state. LuaAlloc instances are not thread-safe (same as Lua). + +Background: + Lua tends to make tiny allocations (4, 8, 16, generally less than 100 bytes) most of the time. + malloc() & friends tend to be rather slow and also add some bytes of overhead for bookkeeping (typically 8 or 16 bytes), + so a large percentage of the actually allocated memory is wasted. + This allocator groups allocations of the same (small) size into blocks and passes through larger allocations. + Small allocations have an overhead of 1 bit plus some bookkeeping information for each block. + This allocator is also rather fast; in the typical case a block known to contain free slots is cached, + and inside of this block, finding a free slot is a tiny loop checking 32 slots at once, + followed by a CTZ (count trailing zeros) to locate the exact slot out of the 32. + Freeing is similar, first do a binary search to locate the block containing the pointer to be freed, + then flip the bit for that slot to mark it as unused. (Bitmap position and bit index is computed from the address, no loop there.) + Once a block for a given size bin is full, other blocks in this bin are filled. A new block is allocated from the system if there is no free block. + Unused blocks are free()d as soon as they are completely empty. + +Origin: + https://github.com/fgenesis/tinypile/blob/master/luaalloc.c + +Inspired by: + http://dns.achurch.org/cgi-bin/hg/aquaria-psp/file/tip/PSP/src/lalloc.c + http://wiki.luajit.org/New-Garbage-Collector#arenas (--> LuaJIT has its own allocator. Don't use this one for LuaJIT.) + +*/ + +/* ---- Configuration begin ---- */ + +/* Track allocation stats to get an overview of your memory usage. By default disabled in release mode. */ +#ifndef NDEBUG +# define LA_TRACK_STATS +#endif + +/* Internal consistency checks. By default disabled in release mode. */ +#ifdef NDEBUG +# define LA_ASSERT(x) +#else +# include +# define LA_ASSERT(x) assert(x) +#endif + +/* Required libc functions. Use your own if needed */ +#include /* for memcpy, memmove, memset */ +#define LA_MEMCPY(dst, src, n) memcpy((dst), (src), (n)) +#define LA_MEMMOVE(dst, src, n) memmove((dst), (src), (n)) +#define LA_MEMSET(dst, val, n) memset((dst), (val), (n)) + +/* If you want to turn off the internal default system allocator, comment out the next line. + If the default sysalloc is disabled, symbols for realloc()/free() won't be pulled in. */ +#define LA_ENABLE_DEFAULT_ALLOC + +/* Maximum size of allocations to handle. Any size beyond that will be redirected to the system allocator. + Must be a multiple of LA_ALLOC_STEP */ +#define LA_MAX_ALLOC 128 + +/* Provide pools in increments of this size, up to LA_MAX_ALLOC. 4 or 8 are good values. */ +/* E.g. A value of 4 will create pools for size 4, 8, 12, ... 128; which is 32 distinct sizes. */ +#define LA_ALLOC_STEP 4 + +/* Initial/Max. # of elements per block. Default growing behavior is to double the size for each full block until hitting LA_ELEMS_MAX. + Note that each element requires 1 bit in the bitmap, the number of elements is rounded up so that no bit is unused, + and the bitmap array is sized accordingly. Best is to use powers of 2. */ +#define LA_ELEMS_MIN 64 +#define LA_ELEMS_MAX 2048 /* Stored in u16, don't go higher than 0x8000 */ +#define LA_GROW_BLOCK_SIZE(n) (n * 2) + +typedef unsigned int u32; +typedef unsigned short u16; + +/* Bitmap type. Default u32. If you want to use another unsigned type (e.g. uint64_t) + you must provide a count-trailing-zeroes function. + Note that the bitmap implicitly controls the data alignment -- the data area starts directly after the bitmap array, + there is no explicit padding in between. */ +typedef u32 ubitmap; + +/* CTZ for your bitmap type. */ +#define bitmap_CTZ(x) ctz32(x) + +/* ---- Configuration end ---- */ + + +#include "luaalloc.h" + +#include /* for size_t, ptrdiff_t */ +#include /* for CHAR_BIT */ + +#ifdef LA_ENABLE_DEFAULT_ALLOC +#include /* for realloc, free */ +#endif + +/* ---- Intrinsics ---- */ + +#define LA_RESTRICT __restrict + +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM)) +# include +# define HAS_BITSCANFORWARD +#elif defined(__clang__) +# if __has_builtin(__builtin_ctz) +# define HAS_BUILTIN_CTZ +# endif +#elif defined(__GNUC__) +# define HAS_BUILTIN_CTZ +#endif + +inline static unsigned ctz32(u32 x) +{ +#if defined(HAS_BUILTIN_CTZ) + return __builtin_ctz(x); +#elif defined(HAS_BITSCANFORWARD) + unsigned long r = 0; + _BitScanForward(&r, x); + return r; +#else /* bit magic */ + x = (x & -x) - 1; + /* begin popcount32 */ + x -= ((x >> 1) & 0x55555555); + x = (((x >> 2) & 0x33333333) + (x & 0x33333333)); + x = (((x >> 4) + x) & 0x0f0f0f0f); + x += (x >> 8); + x += (x >> 16); + x &= 0x0000003f; + /* end popcount32 */ + return x; +#endif +} + +/* ---- Structs for internal book-keeping ---- */ + +#define BLOCK_ARRAY_SIZE (LA_MAX_ALLOC / LA_ALLOC_STEP) + +typedef struct Block Block; + +struct Block +{ + u16 elemsfree; /* dynamic */ + u16 elemstotal; /* const */ + u16 elemSize; /* const */ + u16 bitmapInts; /* const */ + Block *next; /* dynamic */ + Block *prev; /* dynamic */ + + ubitmap bitmap[1]; + /* bitmap area */ + /* data area */ +}; + +typedef struct LuaAlloc +{ + Block *active[BLOCK_ARRAY_SIZE]; /* current work block for each size, that serves allocations until full */ + Block *chain[BLOCK_ARRAY_SIZE]; /* newest allocated block for each size (follow ->prev to get older block) */ + Block **all; /* All blocks in use, sorted by address */ + size_t allnum; /* number of blocks in use */ + size_t allcap; /* capacity of array */ + LuaSysAlloc sysalloc; + void *user; +#ifdef LA_TRACK_STATS + struct + { + /* Extra entry is for large allocations outside of this allocator */ + size_t alive[BLOCK_ARRAY_SIZE + 1]; /* How many allocations of each size bin are currently in use */ + size_t total[BLOCK_ARRAY_SIZE + 1]; /* How many allocations of each size bin were done in total */ + size_t blocks_alive[BLOCK_ARRAY_SIZE + 1]; /* How many blocks for each size bin do currently exist */ + } stats; +#endif +} LuaAlloc; + +/* ---- Helper functions ---- */ + +static const u16 BITMAP_ELEM_SIZE = sizeof(ubitmap) * CHAR_BIT; + +inline static ubitmap *getbitmap(Block *b) +{ + return &b->bitmap[0]; +} + +inline static void *getdata(Block *b) +{ + return ((char*)getbitmap(b)) + (b->bitmapInts * sizeof(ubitmap)); +} + +inline static void *getdataend(Block *b) +{ + return ((char*)getdata(b)) + ((size_t)b->elemSize * b->elemstotal); +} + +inline static unsigned sizeindex(u16 elemSize) +{ + LA_ASSERT(elemSize && elemSize <= LA_MAX_ALLOC); + return (elemSize - 1) / LA_ALLOC_STEP; +} + +inline static unsigned bsizeindex(const Block *b) +{ + return sizeindex(b->elemSize); +} + +static int contains(Block * b, const void *p) +{ + return getdata(b) <= p && p < getdataend(b); +} + +inline static u16 roundToFullBitmap(u16 n) +{ +#if CHAR_BIT == 8 + return (n + BITMAP_ELEM_SIZE - 1) & -BITMAP_ELEM_SIZE; /* Fast round if BITMAP_ELEM_SIZE is a power of 2 */ +#else +# error Weird hardware detected! CHAR_BIT != 8, does this mean BITMAP_ELEM_SIZE is not a power of 2? Check this, and CTZ function. +#endif +} + +inline static void checkblock(Block *b) +{ + LA_ASSERT(b->elemSize && (b->elemSize % LA_ALLOC_STEP) == 0); + LA_ASSERT(b->bitmapInts * BITMAP_ELEM_SIZE == b->elemstotal); + LA_ASSERT(b->elemsfree <= b->elemstotal); + LA_ASSERT(b->elemstotal >= LA_ELEMS_MIN); + LA_ASSERT(b->elemstotal <= LA_ELEMS_MAX); +} + +inline static size_t blocksize(Block *b) +{ + return (char*)getdataend(b) - (char*)b; +} + +inline static u16 nextblockelems(Block *b) +{ + if(!b) + return LA_ELEMS_MIN; + u32 n = LA_GROW_BLOCK_SIZE(b->elemstotal); + return (u16)(n < LA_ELEMS_MAX ? n : LA_ELEMS_MAX); +} + +/* ---- System allocator interface ---- */ + +inline static void *sysmalloc(LuaAlloc *LA, size_t osize, size_t nsize) +{ + LA_ASSERT(nsize); + return LA->sysalloc(LA->user, NULL, osize, nsize); +} + +inline static void sysfree(LuaAlloc * LA, void * p, size_t osize) +{ + LA_ASSERT(p && osize); + LA->sysalloc(LA->user, p, osize, 0); /* ignore return value */ +} + +inline static void *sysrealloc(LuaAlloc * LA_RESTRICT LA, void * LA_RESTRICT p, size_t osize, size_t nsize) +{ + LA_ASSERT(osize && nsize); /* This assert is correct even if an AllocType enum value is passed as osize. */ + return LA->sysalloc(LA->user, p, osize, nsize); +} + +/* ---- Allocator internals ---- */ + +static Block *_allocblock(LuaAlloc *LA, u16 nelems, u16 elemsz) +{ + elemsz = ((elemsz + LA_ALLOC_STEP-1) / LA_ALLOC_STEP) * LA_ALLOC_STEP; /* round up */ + nelems = roundToFullBitmap(nelems); /* The bitmap array must not have any unused bits */ + const u16 nbitmap = nelems / BITMAP_ELEM_SIZE; + + void *ptr = sysmalloc(LA, LUAALLOC_TYPE_BLOCK, + (sizeof(Block) - sizeof(ubitmap)) /* block header without bitmap[1] */ + + (nbitmap * sizeof(ubitmap)) /* actual bitmap size */ + + (nelems * (size_t)elemsz) /* data size */ + ); + + if(!ptr) + return NULL; + + Block *b = (Block*)ptr; + b->elemsfree = nelems; + b->elemstotal = nelems; + b->elemSize = elemsz; + b->bitmapInts = nbitmap; + b->next = NULL; + b->prev = NULL; + LA_MEMSET(b->bitmap, -1, nbitmap * sizeof(ubitmap)); /* mark all as free */ + + return b; +} + +/* Given the sorting order of LA->all, find the right spot to insert p that preserves the sorting order. + Returns the address of the block that is <= p, or one past the end if no such block was found. + Use cases: + 1) Pass a block to get the address where this block is stored + 2) Pass any other pointer to get ONE BLOCK PAST the address of the block that would contain it (this is not checked) +*/ +static Block **findspot(LuaAlloc * LA_RESTRICT LA, void * LA_RESTRICT p) +{ + Block **all = LA->all; + + /* Binary search to find leftmost element */ + size_t L = 0; + size_t R = LA->allnum; + while(L < R) + { + size_t m = (L + R) / 2u; + if((void*)all[m] < p) + L = m + 1; + else + R = m; + } + return all + L; +} + +static size_t enlarge(LuaAlloc *LA) +{ + const size_t incr = (LA->allcap / 2) + 16; + const size_t newcap = LA->allcap + incr; /* Rough guess */ + Block **newall = (Block**)sysrealloc(LA, LA->all, LA->all ? LA->allcap : LUAALLOC_TYPE_INTERNAL, sizeof(Block*) * newcap); + if(newall) + { + LA->all = newall; + LA->allcap = newcap; + return newcap; + } + return 0; +} + +static Block *insertblock(LuaAlloc * LA_RESTRICT LA, Block * LA_RESTRICT b) +{ + /* Enlarge central block storage if necessary */ + if(LA->allcap == LA->allnum && !enlarge(LA)) + { + sysfree(LA, b, blocksize(b)); /* Can't fit block, kill it and fail */ + return NULL; + } + + /* Find correct spot to insert */ + /* Invariant: Array is already sorted */ + Block **spot = findspot(LA, b); + Block **end = LA->all + LA->allnum; + + /* inserting in the middle? Must preserve sort order */ + if(spot < end) + { + /* move other pointers up */ + LA_MEMMOVE(spot+1, spot, (end - spot) * sizeof(Block*)); + } + + *spot = b; + ++LA->allnum; + + /* Link in chain */ + const unsigned si = bsizeindex(b); + Block *top = LA->chain[si]; + LA->chain[si] = b; + if(top) + { + LA_ASSERT(!top->next); + top->next = b; + } + b->prev = top; + +#ifdef LA_TRACK_STATS + LA->stats.blocks_alive[si]++; +#endif + + checkblock(b); + + return b; +} + +static void freeblock(LuaAlloc * LA_RESTRICT LA, Block ** LA_RESTRICT spot) +{ + LA_ASSERT(LA->allnum); + Block *b = *spot; + checkblock(b); + + /* Remove from central list */ + Block **end = LA->all + LA->allnum; + if(spot+1 < end) + { + /* Move other pointers down */ + LA_MEMMOVE(spot, spot+1, (end - (spot+1)) * sizeof(Block*)); + } + --LA->allnum; + /* Invariant: Array is still sorted after removing an element */ + + /* Remove from chain */ + unsigned si = bsizeindex(b); + if(LA->chain[si] == b) + { + LA_ASSERT(!b->next); + LA->chain[si] = b->prev; + } + + if(LA->active[si] == b) + LA->active[si] = NULL; + + /* Unlink from linked list */ + if(b->next) + { + LA_ASSERT(b->next->prev == b); + b->next->prev = b->prev; + } + if(b->prev) + { + LA_ASSERT(b->prev->next == b); + b->prev->next = b->next; + } + +#ifdef LA_TRACK_STATS + LA->stats.blocks_alive[si]--; +#endif + + sysfree(LA, b, blocksize(b)); /* free it */ +} + +static Block *newblock(LuaAlloc *LA, u16 nelems, u16 elemsz) +{ + Block *b = _allocblock(LA, nelems, elemsz); + return b ? insertblock(LA, b) : NULL; +} + +static void *_Balloc(Block *b) +{ + LA_ASSERT(b->elemsfree); + ubitmap *bitmap = b->bitmap; + unsigned i = 0, bm; + for( ; !((bm = bitmap[i])); ++i) {} /* as soon as one isn't all zero, there's a free slot */ + LA_ASSERT(i < b->bitmapInts); /* And there must've been a free slot because b->elemsfree != 0 */ + ubitmap bitIdx = bitmap_CTZ(bm); /* Get exact location of free slot */ + LA_ASSERT(bm & ((ubitmap)1 << bitIdx)); /* make sure this is '1' (= free) */ + bm &= ~((ubitmap)1 << bitIdx); /* put '0' where '1' was (-> mark as non-free) */ + bitmap[i] = bm; + --b->elemsfree; + const size_t where = (i * (size_t)BITMAP_ELEM_SIZE) + bitIdx; + void *ret = ((char*)getdata(b)) + (where * b->elemSize); + LA_ASSERT(contains(b, ret)); + return ret; +} + +static void _Bfree(Block * LA_RESTRICT b, void * LA_RESTRICT p) +{ + LA_ASSERT(b->elemsfree < b->elemstotal); + LA_ASSERT(contains(b, p)); + const ptrdiff_t offs = (char*)p - (char*)getdata(b); + LA_ASSERT(offs % b->elemSize == 0); + const unsigned idx = (unsigned)(offs / b->elemSize); + const unsigned bitmapIdx = idx / BITMAP_ELEM_SIZE; + const ubitmap bitIdx = idx % BITMAP_ELEM_SIZE; + LA_ASSERT(bitmapIdx < b->bitmapInts); + LA_ASSERT(!(b->bitmap[bitmapIdx] & ((ubitmap)1 << bitIdx))); /* make sure this is '0' (= used) */ + b->bitmap[bitmapIdx] |= ((ubitmap)1 << bitIdx); /* put '1' where '0' was (-> mark as free) */ + ++b->elemsfree; +} + +/* returns block with at least 1 free slot, NULL only in case of allocation fail */ +static Block *getfreeblock(LuaAlloc *LA, u16 size) +{ + unsigned si = sizeindex(size); + Block *b = LA->active[si]; + if(b && b->elemsfree) /* Good case: Currently active block is free, use that */ + return b; + + /* Not-so-good case: Active block is full or doesn't exist, try an older block in the chain */ + b = LA->chain[si]; + while(b && !b->elemsfree) + b = b->prev; + + /* Still no good? Allocate new block */ + if(!b || !b->elemsfree) + b = newblock(LA, nextblockelems(LA->chain[si]), size); /* Use newest block in chain to compute size */ + + /* Use this block for further allocation requests */ + LA->active[si] = b; + + return b; +} + +static void *_Alloc(LuaAlloc *LA, size_t size) +{ + LA_ASSERT(size); + + if(size <= LA_MAX_ALLOC) + { + Block *b = getfreeblock(LA, (u16)size); + if(b) + { + checkblock(b); + void *p = _Balloc(b); + LA_ASSERT(p); /* Can't fail -- block was known to be free */ + +#ifdef LA_TRACK_STATS + unsigned si = bsizeindex(b); + LA->stats.alive[si]++; + LA->stats.total[si]++; +#endif + return p; + } + /* else try the alloc below */ + } + + void *p = sysmalloc(LA, LUAALLOC_TYPE_LARGELUA, size); /* large Lua allocation */ + +#ifdef LA_TRACK_STATS + if(p) + { + LA->stats.alive[BLOCK_ARRAY_SIZE]++; + LA->stats.total[BLOCK_ARRAY_SIZE]++; + } +#endif + return p; +} + +static void freefromspot(LuaAlloc * LA_RESTRICT LA, Block ** LA_RESTRICT spot, void *p) +{ + Block *b = *spot; +#ifdef LA_TRACK_STATS + unsigned si = bsizeindex(b); + LA->stats.alive[si]--; +#endif + if(b->elemsfree + 1 == b->elemstotal) + freeblock(LA, spot); /* Freeing last element in the block -> just free the whole thing */ + else + _Bfree(b, p); +} + +static void _Free(LuaAlloc * LA_RESTRICT LA , void * LA_RESTRICT p, size_t oldsize) +{ + LA_ASSERT(p); + + if(oldsize <= LA_MAX_ALLOC) + { + Block **spot = findspot(LA, p); /* Here, spot might point one past the end */ + spot -= (spot > LA->all); /* One back unless we're already at the front -- now spot is always valid */ + Block *b = *spot; + checkblock(b); + if(contains(b, p)) + { + freefromspot(LA, spot, p); + return; + } + /* else p is outside of any block area. This case is unlikely but possible: + - alloc large size (falling through to system alloc), + - then, try to shrink it to fit inside LA_MAX_ALLOC, + - ... but there is no block free for that size... + - try to alloc new block and fail (out of memory) + - then _Realloc() uses the original, still valid pointer since by spec shrink requests must not fail + - Lua sees the "reallocated" (actually the old) pointer and records the new, smaller size; + - when this pointer is freed, we're here in this situation. + Therefore fall through to free a large allocation. */ + } + +#ifdef LA_TRACK_STATS + LA->stats.alive[BLOCK_ARRAY_SIZE]--; +#endif + + sysfree(LA, p, oldsize); /* large Lua free */ +} + +static void *_Realloc(LuaAlloc * LA_RESTRICT LA, void * LA_RESTRICT p, size_t newsize, size_t oldsize) +{ + LA_ASSERT(p); + void *newptr = _Alloc(LA, newsize); + + /* If the new allocation failed, just re-use the old pointer if it was a shrink request. + This also satisfies Lua, which assumes that shrink requests cannot fail */ + if(!newptr) + return newsize <= oldsize ? p : NULL; + + const size_t minsize = oldsize < newsize ? oldsize : newsize; + LA_MEMCPY(newptr, p, minsize); + _Free(LA, p, oldsize); + return newptr; +} + +/* ---- Default system allocator ---- */ + +#ifdef LA_ENABLE_DEFAULT_ALLOC +static void *defaultalloc(void *user, void *ptr, size_t osize, size_t nsize) +{ + (void)user; + (void)osize; + if(nsize) + return realloc(ptr, nsize); + free(ptr); + return NULL; +} +#endif + +/* ---- Public API ---- */ + +#ifdef __cplusplus +extern "C" { +#endif + +LUAALLOC_EXPORT void *luaalloc(void * ud, void *ptr, size_t oldsize, size_t newsize) +{ + LuaAlloc *LA = (LuaAlloc*)ud; + if(ptr) + { + if(!newsize) + _Free(LA, ptr, oldsize); + else if(newsize != oldsize) + return _Realloc(LA, ptr, newsize, oldsize); + else + return ptr; + } + else if(newsize) + return _Alloc(LA, newsize); + + return NULL; +} + +LUAALLOC_EXPORT LuaAlloc * luaalloc_create(LuaSysAlloc sysalloc, void *user) +{ + if(!sysalloc) + { +#ifdef LA_ENABLE_DEFAULT_ALLOC + sysalloc = defaultalloc; +#else + LA_ASSERT(sysalloc); + return NULL; +#endif + } + + LuaAlloc *LA = (LuaAlloc*)sysalloc(user, NULL, LUAALLOC_TYPE_INTERNAL, sizeof(LuaAlloc)); + if(LA) + { + LA_MEMSET(LA, 0, sizeof(LuaAlloc)); + LA->sysalloc = sysalloc; + LA->user = user; + } + return LA; +} + +LUAALLOC_EXPORT void luaalloc_delete(LuaAlloc *LA) +{ + LA_ASSERT(LA->allnum == 0); /* If this fails the Lua state didn't GC everything, which is a bug */ + if(LA->all) + sysfree(LA, LA->all, LA->allcap * sizeof(Block*)); + sysfree(LA, LA, sizeof(LuaAlloc)); /* free self */ +} + +/* ---- Optional stats tracking ---- */ + +LUAALLOC_EXPORT unsigned luaalloc_getstats(const LuaAlloc *LA, const size_t ** alive, const size_t ** total, const size_t ** blocks, unsigned *pbinstep) +{ + if(pbinstep) + *pbinstep = LA_ALLOC_STEP; + +#ifdef LA_TRACK_STATS + if(alive) + *alive = LA->stats.alive; + if(total) + *total = LA->stats.total; + if(blocks) + *blocks = LA->stats.blocks_alive; + return BLOCK_ARRAY_SIZE + 1; +#else + if(alive) + *alive = NULL; + if(total) + *total = NULL; + if(blocks) + *blocks = NULL; + return 0; +#endif +} + +#ifdef __cplusplus +} +#endif diff --git a/ExternalLibs/luaalloc.h b/ExternalLibs/luaalloc.h new file mode 100644 index 0000000..eef9dee --- /dev/null +++ b/ExternalLibs/luaalloc.h @@ -0,0 +1,120 @@ +/* +Small and fast Lua allocator, compatible with Lua 5.1 and up. +For more info and compile-time config, see luaalloc.c + +Usage: + LuaAlloc *LA = luaalloc_create(NULL, NULL); + lua_State *L = lua_newstate(luaalloc, LA); + ... use L ... + lua_close(L); + luaalloc_delete(LA); +*/ + +#pragma once + +/* Every public API function is annotated with this */ +#ifndef LUAALLOC_EXPORT +#define LUAALLOC_EXPORT +#endif + +#include /* for size_t */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Opaque allocator type */ +typedef struct LuaAlloc LuaAlloc; + +/* Main allocation callback. Lua will call this when it needs memory. + 'ud' must be a valid LuaAlloc context passed as user pointer to lua_newstate(). */ +LUAALLOC_EXPORT void *luaalloc(void *ud, void *ptr, size_t osize, size_t nsize); + +/* Block requests and large allocations will be forwarded to the system allocator. + If you don't provide one, a suitable one based on realloc()/free() will be used. + Details below. */ +typedef void *(*LuaSysAlloc)(void *ud, void *ptr, size_t osize, size_t nsize); + +/* Create allocator context. Pass custom system allocator if needed or NULL for the built-in default. + Multiple Lua states can share a single LuaAlloc as long as they run on the same thread. */ +LUAALLOC_EXPORT LuaAlloc *luaalloc_create(LuaSysAlloc sysalloc, void *ud); + +/* Destroy allocator. Call after lua_close()ing each Lua state using the allocator. */ +LUAALLOC_EXPORT void luaalloc_delete(LuaAlloc*); + +/* Statistics tracking. Define LA_TRACK_STATS in luaalloc.c to use this. [Enabled by default in debug mode]. + Provides pointers to internal stats area. Each element corresponds to an internal allocation bin. + - alive: How many allocations of a bin size are currently in use. + - total: How many allocations of a bin size were ever made. + - blocks: How many blocks currently exist for a bin. + With the default config, index 0 corresponds to all allocations of 1-4 bytes, index 1 to those of 5-8 bytes, and so on. + The bin size increment is returned in pbinstep (default: 4). + All output pointers can be NULL if you're not interested in the thing. + Returns the total number of bins. 0 when stats tracking is disabled. + The last valid index is not an actual bin -- instead, large allocations that bypass the allocator are collected there. + The returned pointers are owned by the LuaAlloc instance and stay valid throughout its lifetime. + To iterate over the size bins, you can do: + + const size_t *alive, *total, *blocks; + unsigned step, n = luaalloc_getstats(LA, &alive, &total, &blocks, &step); + if(n) + { + for(unsigned i = 0, a = 1, b = step; i < n-1; ++i, a = b+1, b += step) + printf("%zu blocks of %u..%u bytes: %zu allocations alive, %zu done all-time\n", + blocks[i], a, b, alive[i], total[i]); + printf("large allocations: %zu alive, %zu done all-time\n", alive[n-1], total[n-1]); + } +*/ +LUAALLOC_EXPORT unsigned luaalloc_getstats(const LuaAlloc*, const size_t **alive, const size_t **total, const size_t **blocks, unsigned *pbinstep); + + + +typedef enum +{ + LUAALLOC_TYPE_LARGELUA = 1, + LUAALLOC_TYPE_BLOCK = 2, + LUAALLOC_TYPE_INTERNAL = 3 +} AllocType; + +#ifdef __cplusplus +} +#endif + + +/* +Details about the system allocator: + + typedef void *(*LuaSysAlloc)(void *ud, void *ptr, size_t osize, size_t nsize); + +Block requests and large Lua allocations will be forwarded to the system allocator. +The function signature is (intentionally) the same as luaalloc() and the semantics are very similar. +The caller knows the size of each allocation so you do not have to track this yourself. +The system allocator must not fail shrink requests (same requirement as Lua). + +You must handle the following cases: + if(!ptr && nsize) + return malloc(nsize); (osize encodes the type of allocation, see below) + else if(ptr && !nsize) + free(ptr); (osize is the previously allocated size; the return value is ignored) + else if(ptr && nsize) + return realloc(ptr, nsize); (must not fail shrink requests. osize is the previously allocated size; osize != nsize guaranteed) + // never called with (!ptr && !nsize), can ignore this case + +Types of allocations, in case (!ptr && nsize): +switch(osize) +{ + case LUAALLOC_TYPE_LARGELUA: + passthrough/large Lua allocation (alloc'd/free'd/realloc'd incl. shrink requests) + case LUAALLOC_TYPE_BLOCK: + block allocation (alloc'd/free'd, but never realloc'd) + case LUAALLOC_TYPE_INTERNAL: + allocation of LuaAlloc-internal data (usually long-lived. alloc'd, realloc'd to enlarge, but never shrunk. free'd only in luaalloc_delete()) + case 0: default: + some other allocation (not used by LuaAlloc. Maybe some other code uses this allocator as well?) +} + +Lua allocations may fail and Lua usually handles this gracefully by running an emergency GC; +5.2 and up do this out-of-the box and there is a patch for 5.1 as well. +This block allocator is built to properly handle system allocator failures, +and return a failed allocation back to Lua as appropriate. +*/ diff --git a/win/vc90/BBGE.vcproj b/win/vc90/BBGE.vcproj index 13f67cc..c88dfaf 100644 --- a/win/vc90/BBGE.vcproj +++ b/win/vc90/BBGE.vcproj @@ -489,14 +489,6 @@ RelativePath="..\..\BBGE\MathFunctions.h" > - - - - diff --git a/win/vc90/external.vcproj b/win/vc90/external.vcproj index 193bd1a..b304964 100644 --- a/win/vc90/external.vcproj +++ b/win/vc90/external.vcproj @@ -321,6 +321,14 @@ RelativePath="..\..\ExternalLibs\DeflateCompressor.h" > + + + +