1
0
Fork 0
mirror of https://github.com/AquariaOSE/Aquaria.git synced 2025-01-24 17:26:41 +00:00

Replace the old Lua small block allocator with a new one (for #74)

Renamed original .c file to .cpp to make VS2008 happy
This commit is contained in:
fgenesis 2021-01-23 14:07:39 +01:00
parent e4b2ee6e70
commit db079a55ef
9 changed files with 828 additions and 426 deletions

View file

@ -19,15 +19,17 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "SDL.h"
#include "ScriptInterface.h"
#include "../BBGE/ScriptObject.h"
extern "C"
{
#include "lua.h"
#include "lauxlib.h"
#include "lualib.h"
#include "lua.h"
#include "lauxlib.h"
#include "lualib.h"
}
#include "luaalloc.h"
#include "SDL.h"
#include "ScriptInterface.h"
#include "../BBGE/ScriptObject.h"
#include "ReadXML.h"
@ -11498,7 +11500,7 @@ static const struct {
//============================================================================================
ScriptInterface::ScriptInterface()
: baseState(NULL), _sballoc(8, 128)
: baseState(NULL), _LA(NULL)
{
}
@ -11513,6 +11515,8 @@ void ScriptInterface::init()
allowUnsafeFunctions = dsq->user.system.allowDangerousScriptFunctions;
if(!_LA)
_LA = luaalloc_create(NULL, NULL);
if (!baseState)
baseState = createLuaVM();
}
@ -11523,15 +11527,9 @@ void ScriptInterface::reset()
init();
}
void *ScriptInterface::the_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
{
ScriptInterface *this_ = (ScriptInterface*)ud;
return this_->_sballoc.Alloc(ptr, nsize, osize);
}
lua_State *ScriptInterface::createLuaVM()
{
lua_State *state = lua_newstate(the_alloc, this); /* opens Lua */
lua_State *state = lua_newstate(_LA ? luaalloc : NULL, _LA); /* opens Lua */
luaL_openlibs(state);
#ifdef LUAAPI_HAS_CLIPBOARD
@ -11612,12 +11610,6 @@ lua_State *ScriptInterface::createLuaVM()
return state;
}
void ScriptInterface::destroyLuaVM(lua_State *state)
{
if (state)
lua_close(state);
}
// Initial value for the instance-local table should be on the stack of
// the base Lua state; it will be popped when this function returns.
lua_State *ScriptInterface::createLuaThread(const std::string &file)
@ -11721,8 +11713,16 @@ int ScriptInterface::gcGetStats()
void ScriptInterface::shutdown()
{
destroyLuaVM(baseState);
baseState = NULL;
if (baseState)
{
lua_close(baseState);
baseState = NULL;
}
if(_LA)
{
luaalloc_delete(_LA);
_LA = NULL;
}
}
Script *ScriptInterface::openScript(const std::string &file, bool ignoremissing /* = false */)

View file

@ -22,9 +22,9 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#define SCRIPTINTERFACE_H
#include "../BBGE/Base.h"
#include "../BBGE/MemoryAllocatorSmallBlock.h"
struct lua_State;
struct LuaAlloc;
class Entity;
class CollideEntity;
@ -115,13 +115,12 @@ public:
protected:
lua_State *createLuaVM();
void destroyLuaVM(lua_State *state);
lua_State *createLuaThread(const std::string &file);
int destroyLuaThread(const std::string &file, lua_State *thread);
static void *the_alloc(void *ud, void *ptr, size_t osize, size_t nsize);
lua_State *baseState;
SmallBlockAllocator _sballoc;
LuaAlloc *_LA;
};
#endif

View file

@ -1,292 +0,0 @@
// Public domain
// Aquaria specific...
#include "Base.h"
#include "algorithmx.h"
#include "MemoryAllocatorSmallBlock.h"
#include "bithacks.h"
#include <assert.h>
#define DD(...)
#define logdev(...)
#define logerror(...)
#ifdef NDEBUG
# define ASSERT(x)
#else
# define ASSERT(x) assert(x)
#endif
SmallBlockAllocator::SmallBlockAllocator(unsigned int blockSizeMin,
unsigned int blockSizeMax,
unsigned int blockSizeIncr /* = 8 */,
unsigned int elemsPerBlockMin /* = 64 */,
unsigned int elemsPerBlockMax /* = 2048 */)
: _blockSizeMin(blockSizeMin)
, _blockSizeMax(blockSizeMax)
, _blockSizeIncr(blockSizeIncr)
, _elemsPerBlockMin(elemsPerBlockMin)
, _elemsPerBlockMax(elemsPerBlockMax)
{
ASSERT(_blockSizeIncr % 4 == 0); // less than 4 bytes makes no sense
ASSERT(_blockSizeMin % _blockSizeIncr == 0);
ASSERT(_blockSizeMax % _blockSizeIncr == 0);
ASSERT((_blockSizeMax - _blockSizeMin) % _blockSizeIncr == 0);
unsigned int c = ((_blockSizeMax - _blockSizeMin) / _blockSizeIncr) + 1;
logdev("SBA: Using %u distinct block sizes from %u - %u bytes", c, _blockSizeMin, _blockSizeMax);
_blocks = new Block*[c]; // TODO: Do we really want to use dynamic allocation here?
memset(_blocks, 0, c * sizeof(Block*));
}
SmallBlockAllocator::~SmallBlockAllocator()
{
while(_allblocks.size())
{
Block *blk = _allblocks.back();
logerror("~SmallBlockAllocator(): Warning: Leftover block with %u/%u elements, %uB each",
blk->maxElems, blk->maxElems - blk->freeElems, blk->elemSize);
_FreeBlock(blk);
}
delete [] _blocks;
}
void *SmallBlockAllocator::Alloc(void *ptr, size_t newsize, size_t oldsize)
{
DD("SBA::Alloc() ptr = %p; newsize = %u, oldsize = %u", ptr, newsize, oldsize);
if(ptr)
{
if(!newsize)
{
_Free(ptr, oldsize);
return NULL;
}
else if(newsize == oldsize)
return ptr;
else
return _Realloc(ptr, newsize, oldsize);
}
else
{
if(newsize)
return _Alloc(newsize);
}
return NULL;
}
SmallBlockAllocator::Block *SmallBlockAllocator::_AllocBlock(unsigned int elemCount, unsigned int elemSize)
{
DD("SBA: _AllocBlock: elemCount = %u, elemSize = %u", elemCount, elemSize);
const unsigned int bitsPerInt = (sizeof(unsigned int) * 8); // 32
unsigned int bitmapInts = (elemCount + (bitsPerInt - 1)) / bitsPerInt;
void *ptr = malloc(
(sizeof(Block) - sizeof(unsigned int)) // block header without bitmap[1]
+ (bitmapInts * sizeof(unsigned int)) // actual bitmap size
+ (elemCount * elemSize) // data size
);
if(!ptr)
return NULL;
Block *blk = (Block*)ptr;
memset(&blk->bitmap[0], 0xff, bitmapInts * sizeof(unsigned int)); // all free
blk->elemSize = elemSize;
blk->maxElems = elemCount;
blk->freeElems = elemCount;
blk->bitmapInts = bitmapInts;
blk->next = NULL;
blk->prev = NULL;
// using insertion sort
std::vector<Block*>::iterator insertit = std::lower_bound(_allblocks.begin(), _allblocks.end(), blk);
_allblocks.insert(insertit, blk);
return blk;
}
void SmallBlockAllocator::_FreeBlock(Block *blk)
{
DD("SBA: _FreeBlock: elemCount = %u, elemSize = %u", blk->maxElems, blk->elemSize);
if(blk->prev)
blk->prev->next = blk->next;
else
_blocks[GetIndexForElemSize(blk->elemSize)] = blk->next;
if(blk->next)
blk->next->prev = blk->prev;
free(blk);
// keeps the vector sorted
std::vector<Block*>::iterator where = std::remove(_allblocks.begin(), _allblocks.end(), blk);
_allblocks.erase(where, _allblocks.end());
}
SmallBlockAllocator::Block *SmallBlockAllocator::_AppendBlock(unsigned int elemSize)
{
unsigned int idx = GetIndexForElemSize(elemSize);
Block *blk = _blocks[idx];
unsigned int elemsPerBlock = _elemsPerBlockMin;
if(blk)
{
while(blk->next)
blk = blk->next;
elemsPerBlock = blk->maxElems * 2; // new block is double the size
if(elemsPerBlock > _elemsPerBlockMax)
elemsPerBlock = _elemsPerBlockMax;
}
unsigned int blockElemSize = ((elemSize + (_blockSizeIncr - 1)) / _blockSizeIncr) * _blockSizeIncr;
ASSERT(blockElemSize >= elemSize);
Block *newblk = _AllocBlock(elemsPerBlock, blockElemSize);
if(!newblk)
return NULL;
if(blk)
{
blk->next = newblk; // append to list
newblk->prev = blk;
}
else
_blocks[idx] = newblk; // list head
return newblk;
}
SmallBlockAllocator::Block *SmallBlockAllocator::_GetFreeBlock(unsigned int elemSize)
{
unsigned int idx = GetIndexForElemSize(elemSize);
Block *blk = _blocks[idx];
while(blk && !blk->freeElems)
blk = blk->next;
return blk;
}
void *SmallBlockAllocator::Block::allocElem()
{
ASSERT(freeElems);
unsigned int i = 0;
for( ; !bitmap[i]; ++i) // as soon as one isn't all zero, there's a free slot
ASSERT(i < bitmapInts);
ASSERT(i < bitmapInts);
int freeidx = bithacks::ctz(bitmap[i]);
ASSERT(bitmap[i] & (1 << freeidx)); // make sure this is '1' (= free)
bitmap[i] &= ~(1 << freeidx); // put '0' where '1' was (-> mark as non-free)
--freeElems;
const unsigned int offs = (i * sizeof(unsigned int) * 8 * elemSize); // skip forward i bitmaps (32 elems each)
unsigned char *ret = getPtr() + offs + (elemSize * freeidx);
ASSERT(contains(ret));
return ret;
}
bool SmallBlockAllocator::Block::contains(unsigned char *ptr) const
{
const unsigned char *pp = getPtr();
if(ptr < pp)
return false; // pointer is out of range (1)
if(ptr >= pp + (maxElems * elemSize))
return false; // pointer is out of range (2)
return true;
}
void SmallBlockAllocator::Block::freeElem(unsigned char *ptr)
{
ASSERT(contains(ptr));
ASSERT(freeElems < maxElems); // make sure the block is not all free
const ptrdiff_t p = ptr - getPtr();
ASSERT((p % elemSize) == 0); // make sure alignment is right
const unsigned int idx = p / elemSize;
const unsigned int bitsPerInt = sizeof(unsigned int) * 8; // 32
const unsigned int bitmapIdx = idx / bitsPerInt;
const unsigned int bitIdx = idx % bitsPerInt;
ASSERT(bitmapIdx < bitmapInts);
ASSERT(!(bitmap[bitmapIdx] & (1 << bitIdx))); // make sure this is '0' (= used)
bitmap[bitmapIdx] |= (1 << bitIdx); // put '1' where '0' was (-> mark as free)
++freeElems;
#ifdef _DEBUG
memset(ptr, 0xfa, elemSize);
#endif
}
void *SmallBlockAllocator::_FallbackAlloc(unsigned int size)
{
return malloc(size);
}
void SmallBlockAllocator::_FallbackFree(void *ptr)
{
free(ptr);
}
void *SmallBlockAllocator::_Alloc(unsigned int size)
{
if(size > _blockSizeMax)
return _FallbackAlloc(size);
Block *blk = _GetFreeBlock(size);
ASSERT(!blk || blk->freeElems);
if(!blk)
{
blk = _AppendBlock(size);
if(!blk)
return _FallbackAlloc(size);
}
return blk->allocElem();
}
bool SmallBlockAllocator::Block_ptr_cmp(const Block *blk, const void *ptr)
{
return blk->getEndPtr() < ((unsigned char*)ptr);
}
SmallBlockAllocator::Block *SmallBlockAllocator::_FindBlockContainingPtr(void *ptr)
{
// MSVC's std::lower_bound uses iterator debug checks in debug mode,
// which breaks Block_ptr_cmp() because the left and right types are different.
std::vector<Block*>::iterator it = stdx_fg::lower_bound(_allblocks.begin(), _allblocks.end(), ptr, Block_ptr_cmp);
return (it != _allblocks.end() && (*it)->contains((unsigned char*)ptr)) ? *it : NULL;
}
void SmallBlockAllocator::_Free(void *ptr, unsigned int size)
{
if(size <= _blockSizeMax)
{
Block *blk = _FindBlockContainingPtr(ptr);
if(blk)
{
ASSERT(blk->elemSize >= size); // ptr might be from a larger block in case _Realloc() failed to shrink
blk->freeElem((unsigned char*)ptr);
if(blk->freeElems == blk->maxElems)
_FreeBlock(blk); // remove if completely unused
return;
}
}
_FallbackFree(ptr);
}
void *SmallBlockAllocator::_Realloc(void *ptr, unsigned int newsize, unsigned int oldsize)
{
void *newptr = _Alloc(newsize);
// If the new allocation failed, just re-use the old pointer if it was a shrink request
// This also satisfies Lua, which assumes that realloc() shrink requests cannot fail
if(!newptr)
return newsize <= oldsize ? ptr : NULL;
memcpy(newptr, ptr, std::min(oldsize, newsize));
_Free(ptr, oldsize);
return newptr;
}

View file

@ -1,100 +0,0 @@
#ifndef MEMORY_ALLOCATOR_SMALL_BLOCK_H
#define MEMORY_ALLOCATOR_SMALL_BLOCK_H
/* Optimized memory allocator for small & frequent (de-)allocations.
* Low memory overhead. Used for Lua.
* Inspired by http://dns.achurch.org/cgi-bin/hg/aquaria-psp/file/tip/PSP/src/lalloc.c
*/
// Originally made for LV3proj_ng (https://github.com/fgenesis/lv3proj_ng)
// Hacked in shape for use in Aquaria
// Public domain
#include <vector>
class SmallBlockAllocator
{
public:
SmallBlockAllocator(unsigned int blockSizeMin, unsigned int blockSizeMax, unsigned int blockSizeIncr = 8,
unsigned int elemsPerBlockMin = 64, unsigned int elemsPerBlockMax = 2048);
~SmallBlockAllocator();
void *Alloc(void *ptr, size_t newsize, size_t oldsize);
private:
void *_Alloc(unsigned int size);
void *_Realloc(void *ptr, unsigned int newsize, unsigned int oldsize);
void _Free(void* ptr, unsigned int size);
void *_FallbackAlloc(unsigned int size);
void _FallbackFree(void *ptr);
struct Block
{
// block header start
Block *next;
Block *prev;
unsigned short maxElems;
unsigned short freeElems;
unsigned short elemSize;
unsigned short bitmapInts;
// block header end
unsigned int bitmap[1]; // variable sized
// actual storage memory starts after bitmap[bitmapInts]
inline unsigned char *getPtr()
{
return reinterpret_cast<unsigned char *>(&bitmap[bitmapInts]);
}
inline const unsigned char *getPtr() const
{
return reinterpret_cast<const unsigned char *>(&bitmap[bitmapInts]);
}
inline unsigned char *getEndPtr()
{
return getPtr() + (maxElems * elemSize);
}
inline const unsigned char *getEndPtr() const
{
return getPtr() + (maxElems * elemSize);
}
void *allocElem();
void freeElem(unsigned char *ptr);
bool contains(unsigned char *ptr) const;
};
Block *_AllocBlock(unsigned int elemCount, unsigned int elemSize);
void _FreeBlock(Block *blk);
Block *_AppendBlock(unsigned int elemSize);
Block *_GetFreeBlock(unsigned int elemSize); // NULL if none free
Block *_FindBlockContainingPtr(void *ptr);
inline unsigned int GetIndexForElemSize(unsigned int elemSize)
{
return ((elemSize + (_blockSizeIncr - 1)) / _blockSizeIncr) - 1;
}
static bool Block_ptr_cmp(const Block *blk, const void *ptr);
Block **_blocks;
std::vector<Block*> _allblocks; // always sorted by pointer address
unsigned int _blockSizeMin;
unsigned int _blockSizeMax;
unsigned int _blockSizeIncr;
unsigned int _elemsPerBlockMin;
unsigned int _elemsPerBlockMax;
};
#endif

View file

@ -510,7 +510,6 @@ SET(BBGE_SRCS
${BBGEDIR}/Joystick.cpp
${BBGEDIR}/LensFlare.cpp
${BBGEDIR}/Localization.cpp
${BBGEDIR}/MemoryAllocatorSmallBlock.cpp
${BBGEDIR}/MT.cpp
${BBGEDIR}/OSFunctions.cpp
${BBGEDIR}/ParticleEffect.cpp
@ -544,6 +543,7 @@ SET(BBGE_SRCS
${BBGEDIR}/FmodOpenALBridge.cpp
${COCOA_SRCS}
${EXTLIBDIR}/DeflateCompressor.cpp
${EXTLIBDIR}/luaalloc.c
${EXTLIBDIR}/glfont2/glfont2.cpp
${EXTLIBDIR}/minihttp.cpp
${EXTLIBDIR}/JPS.h

675
ExternalLibs/luaalloc.cpp Normal file
View file

@ -0,0 +1,675 @@
/* Small and fast memory allocator tailored for Lua.
License:
Public domain, WTFPL, CC0 or your favorite permissive license; whatever is available in your country.
Dependencies:
libc by default, change defines below to use your own functions
Compiles as C99 or C++ code.
Thread safety:
No global state. LuaAlloc instances are not thread-safe (same as Lua).
Background:
Lua tends to make tiny allocations (4, 8, 16, generally less than 100 bytes) most of the time.
malloc() & friends tend to be rather slow and also add some bytes of overhead for bookkeeping (typically 8 or 16 bytes),
so a large percentage of the actually allocated memory is wasted.
This allocator groups allocations of the same (small) size into blocks and passes through larger allocations.
Small allocations have an overhead of 1 bit plus some bookkeeping information for each block.
This allocator is also rather fast; in the typical case a block known to contain free slots is cached,
and inside of this block, finding a free slot is a tiny loop checking 32 slots at once,
followed by a CTZ (count trailing zeros) to locate the exact slot out of the 32.
Freeing is similar, first do a binary search to locate the block containing the pointer to be freed,
then flip the bit for that slot to mark it as unused. (Bitmap position and bit index is computed from the address, no loop there.)
Once a block for a given size bin is full, other blocks in this bin are filled. A new block is allocated from the system if there is no free block.
Unused blocks are free()d as soon as they are completely empty.
Origin:
https://github.com/fgenesis/tinypile/blob/master/luaalloc.c
Inspired by:
http://dns.achurch.org/cgi-bin/hg/aquaria-psp/file/tip/PSP/src/lalloc.c
http://wiki.luajit.org/New-Garbage-Collector#arenas (--> LuaJIT has its own allocator. Don't use this one for LuaJIT.)
*/
/* ---- Configuration begin ---- */
/* Track allocation stats to get an overview of your memory usage. By default disabled in release mode. */
#ifndef NDEBUG
# define LA_TRACK_STATS
#endif
/* Internal consistency checks. By default disabled in release mode. */
#ifdef NDEBUG
# define LA_ASSERT(x)
#else
# include <assert.h>
# define LA_ASSERT(x) assert(x)
#endif
/* Required libc functions. Use your own if needed */
#include <string.h> /* for memcpy, memmove, memset */
#define LA_MEMCPY(dst, src, n) memcpy((dst), (src), (n))
#define LA_MEMMOVE(dst, src, n) memmove((dst), (src), (n))
#define LA_MEMSET(dst, val, n) memset((dst), (val), (n))
/* If you want to turn off the internal default system allocator, comment out the next line.
If the default sysalloc is disabled, symbols for realloc()/free() won't be pulled in. */
#define LA_ENABLE_DEFAULT_ALLOC
/* Maximum size of allocations to handle. Any size beyond that will be redirected to the system allocator.
Must be a multiple of LA_ALLOC_STEP */
#define LA_MAX_ALLOC 128
/* Provide pools in increments of this size, up to LA_MAX_ALLOC. 4 or 8 are good values. */
/* E.g. A value of 4 will create pools for size 4, 8, 12, ... 128; which is 32 distinct sizes. */
#define LA_ALLOC_STEP 4
/* Initial/Max. # of elements per block. Default growing behavior is to double the size for each full block until hitting LA_ELEMS_MAX.
Note that each element requires 1 bit in the bitmap, the number of elements is rounded up so that no bit is unused,
and the bitmap array is sized accordingly. Best is to use powers of 2. */
#define LA_ELEMS_MIN 64
#define LA_ELEMS_MAX 2048 /* Stored in u16, don't go higher than 0x8000 */
#define LA_GROW_BLOCK_SIZE(n) (n * 2)
typedef unsigned int u32;
typedef unsigned short u16;
/* Bitmap type. Default u32. If you want to use another unsigned type (e.g. uint64_t)
you must provide a count-trailing-zeroes function.
Note that the bitmap implicitly controls the data alignment -- the data area starts directly after the bitmap array,
there is no explicit padding in between. */
typedef u32 ubitmap;
/* CTZ for your bitmap type. */
#define bitmap_CTZ(x) ctz32(x)
/* ---- Configuration end ---- */
#include "luaalloc.h"
#include <stddef.h> /* for size_t, ptrdiff_t */
#include <limits.h> /* for CHAR_BIT */
#ifdef LA_ENABLE_DEFAULT_ALLOC
#include <stdlib.h> /* for realloc, free */
#endif
/* ---- Intrinsics ---- */
#define LA_RESTRICT __restrict
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM))
# include <intrin.h>
# define HAS_BITSCANFORWARD
#elif defined(__clang__)
# if __has_builtin(__builtin_ctz)
# define HAS_BUILTIN_CTZ
# endif
#elif defined(__GNUC__)
# define HAS_BUILTIN_CTZ
#endif
inline static unsigned ctz32(u32 x)
{
#if defined(HAS_BUILTIN_CTZ)
return __builtin_ctz(x);
#elif defined(HAS_BITSCANFORWARD)
unsigned long r = 0;
_BitScanForward(&r, x);
return r;
#else /* bit magic */
x = (x & -x) - 1;
/* begin popcount32 */
x -= ((x >> 1) & 0x55555555);
x = (((x >> 2) & 0x33333333) + (x & 0x33333333));
x = (((x >> 4) + x) & 0x0f0f0f0f);
x += (x >> 8);
x += (x >> 16);
x &= 0x0000003f;
/* end popcount32 */
return x;
#endif
}
/* ---- Structs for internal book-keeping ---- */
#define BLOCK_ARRAY_SIZE (LA_MAX_ALLOC / LA_ALLOC_STEP)
typedef struct Block Block;
struct Block
{
u16 elemsfree; /* dynamic */
u16 elemstotal; /* const */
u16 elemSize; /* const */
u16 bitmapInts; /* const */
Block *next; /* dynamic */
Block *prev; /* dynamic */
ubitmap bitmap[1];
/* bitmap area */
/* data area */
};
typedef struct LuaAlloc
{
Block *active[BLOCK_ARRAY_SIZE]; /* current work block for each size, that serves allocations until full */
Block *chain[BLOCK_ARRAY_SIZE]; /* newest allocated block for each size (follow ->prev to get older block) */
Block **all; /* All blocks in use, sorted by address */
size_t allnum; /* number of blocks in use */
size_t allcap; /* capacity of array */
LuaSysAlloc sysalloc;
void *user;
#ifdef LA_TRACK_STATS
struct
{
/* Extra entry is for large allocations outside of this allocator */
size_t alive[BLOCK_ARRAY_SIZE + 1]; /* How many allocations of each size bin are currently in use */
size_t total[BLOCK_ARRAY_SIZE + 1]; /* How many allocations of each size bin were done in total */
size_t blocks_alive[BLOCK_ARRAY_SIZE + 1]; /* How many blocks for each size bin do currently exist */
} stats;
#endif
} LuaAlloc;
/* ---- Helper functions ---- */
static const u16 BITMAP_ELEM_SIZE = sizeof(ubitmap) * CHAR_BIT;
inline static ubitmap *getbitmap(Block *b)
{
return &b->bitmap[0];
}
inline static void *getdata(Block *b)
{
return ((char*)getbitmap(b)) + (b->bitmapInts * sizeof(ubitmap));
}
inline static void *getdataend(Block *b)
{
return ((char*)getdata(b)) + ((size_t)b->elemSize * b->elemstotal);
}
inline static unsigned sizeindex(u16 elemSize)
{
LA_ASSERT(elemSize && elemSize <= LA_MAX_ALLOC);
return (elemSize - 1) / LA_ALLOC_STEP;
}
inline static unsigned bsizeindex(const Block *b)
{
return sizeindex(b->elemSize);
}
static int contains(Block * b, const void *p)
{
return getdata(b) <= p && p < getdataend(b);
}
inline static u16 roundToFullBitmap(u16 n)
{
#if CHAR_BIT == 8
return (n + BITMAP_ELEM_SIZE - 1) & -BITMAP_ELEM_SIZE; /* Fast round if BITMAP_ELEM_SIZE is a power of 2 */
#else
# error Weird hardware detected! CHAR_BIT != 8, does this mean BITMAP_ELEM_SIZE is not a power of 2? Check this, and CTZ function.
#endif
}
inline static void checkblock(Block *b)
{
LA_ASSERT(b->elemSize && (b->elemSize % LA_ALLOC_STEP) == 0);
LA_ASSERT(b->bitmapInts * BITMAP_ELEM_SIZE == b->elemstotal);
LA_ASSERT(b->elemsfree <= b->elemstotal);
LA_ASSERT(b->elemstotal >= LA_ELEMS_MIN);
LA_ASSERT(b->elemstotal <= LA_ELEMS_MAX);
}
inline static size_t blocksize(Block *b)
{
return (char*)getdataend(b) - (char*)b;
}
inline static u16 nextblockelems(Block *b)
{
if(!b)
return LA_ELEMS_MIN;
u32 n = LA_GROW_BLOCK_SIZE(b->elemstotal);
return (u16)(n < LA_ELEMS_MAX ? n : LA_ELEMS_MAX);
}
/* ---- System allocator interface ---- */
inline static void *sysmalloc(LuaAlloc *LA, size_t osize, size_t nsize)
{
LA_ASSERT(nsize);
return LA->sysalloc(LA->user, NULL, osize, nsize);
}
inline static void sysfree(LuaAlloc * LA, void * p, size_t osize)
{
LA_ASSERT(p && osize);
LA->sysalloc(LA->user, p, osize, 0); /* ignore return value */
}
inline static void *sysrealloc(LuaAlloc * LA_RESTRICT LA, void * LA_RESTRICT p, size_t osize, size_t nsize)
{
LA_ASSERT(osize && nsize); /* This assert is correct even if an AllocType enum value is passed as osize. */
return LA->sysalloc(LA->user, p, osize, nsize);
}
/* ---- Allocator internals ---- */
static Block *_allocblock(LuaAlloc *LA, u16 nelems, u16 elemsz)
{
elemsz = ((elemsz + LA_ALLOC_STEP-1) / LA_ALLOC_STEP) * LA_ALLOC_STEP; /* round up */
nelems = roundToFullBitmap(nelems); /* The bitmap array must not have any unused bits */
const u16 nbitmap = nelems / BITMAP_ELEM_SIZE;
void *ptr = sysmalloc(LA, LUAALLOC_TYPE_BLOCK,
(sizeof(Block) - sizeof(ubitmap)) /* block header without bitmap[1] */
+ (nbitmap * sizeof(ubitmap)) /* actual bitmap size */
+ (nelems * (size_t)elemsz) /* data size */
);
if(!ptr)
return NULL;
Block *b = (Block*)ptr;
b->elemsfree = nelems;
b->elemstotal = nelems;
b->elemSize = elemsz;
b->bitmapInts = nbitmap;
b->next = NULL;
b->prev = NULL;
LA_MEMSET(b->bitmap, -1, nbitmap * sizeof(ubitmap)); /* mark all as free */
return b;
}
/* Given the sorting order of LA->all, find the right spot to insert p that preserves the sorting order.
Returns the address of the block that is <= p, or one past the end if no such block was found.
Use cases:
1) Pass a block to get the address where this block is stored
2) Pass any other pointer to get ONE BLOCK PAST the address of the block that would contain it (this is not checked)
*/
static Block **findspot(LuaAlloc * LA_RESTRICT LA, void * LA_RESTRICT p)
{
Block **all = LA->all;
/* Binary search to find leftmost element */
size_t L = 0;
size_t R = LA->allnum;
while(L < R)
{
size_t m = (L + R) / 2u;
if((void*)all[m] < p)
L = m + 1;
else
R = m;
}
return all + L;
}
static size_t enlarge(LuaAlloc *LA)
{
const size_t incr = (LA->allcap / 2) + 16;
const size_t newcap = LA->allcap + incr; /* Rough guess */
Block **newall = (Block**)sysrealloc(LA, LA->all, LA->all ? LA->allcap : LUAALLOC_TYPE_INTERNAL, sizeof(Block*) * newcap);
if(newall)
{
LA->all = newall;
LA->allcap = newcap;
return newcap;
}
return 0;
}
static Block *insertblock(LuaAlloc * LA_RESTRICT LA, Block * LA_RESTRICT b)
{
/* Enlarge central block storage if necessary */
if(LA->allcap == LA->allnum && !enlarge(LA))
{
sysfree(LA, b, blocksize(b)); /* Can't fit block, kill it and fail */
return NULL;
}
/* Find correct spot to insert */
/* Invariant: Array is already sorted */
Block **spot = findspot(LA, b);
Block **end = LA->all + LA->allnum;
/* inserting in the middle? Must preserve sort order */
if(spot < end)
{
/* move other pointers up */
LA_MEMMOVE(spot+1, spot, (end - spot) * sizeof(Block*));
}
*spot = b;
++LA->allnum;
/* Link in chain */
const unsigned si = bsizeindex(b);
Block *top = LA->chain[si];
LA->chain[si] = b;
if(top)
{
LA_ASSERT(!top->next);
top->next = b;
}
b->prev = top;
#ifdef LA_TRACK_STATS
LA->stats.blocks_alive[si]++;
#endif
checkblock(b);
return b;
}
static void freeblock(LuaAlloc * LA_RESTRICT LA, Block ** LA_RESTRICT spot)
{
LA_ASSERT(LA->allnum);
Block *b = *spot;
checkblock(b);
/* Remove from central list */
Block **end = LA->all + LA->allnum;
if(spot+1 < end)
{
/* Move other pointers down */
LA_MEMMOVE(spot, spot+1, (end - (spot+1)) * sizeof(Block*));
}
--LA->allnum;
/* Invariant: Array is still sorted after removing an element */
/* Remove from chain */
unsigned si = bsizeindex(b);
if(LA->chain[si] == b)
{
LA_ASSERT(!b->next);
LA->chain[si] = b->prev;
}
if(LA->active[si] == b)
LA->active[si] = NULL;
/* Unlink from linked list */
if(b->next)
{
LA_ASSERT(b->next->prev == b);
b->next->prev = b->prev;
}
if(b->prev)
{
LA_ASSERT(b->prev->next == b);
b->prev->next = b->next;
}
#ifdef LA_TRACK_STATS
LA->stats.blocks_alive[si]--;
#endif
sysfree(LA, b, blocksize(b)); /* free it */
}
static Block *newblock(LuaAlloc *LA, u16 nelems, u16 elemsz)
{
Block *b = _allocblock(LA, nelems, elemsz);
return b ? insertblock(LA, b) : NULL;
}
static void *_Balloc(Block *b)
{
LA_ASSERT(b->elemsfree);
ubitmap *bitmap = b->bitmap;
unsigned i = 0, bm;
for( ; !((bm = bitmap[i])); ++i) {} /* as soon as one isn't all zero, there's a free slot */
LA_ASSERT(i < b->bitmapInts); /* And there must've been a free slot because b->elemsfree != 0 */
ubitmap bitIdx = bitmap_CTZ(bm); /* Get exact location of free slot */
LA_ASSERT(bm & ((ubitmap)1 << bitIdx)); /* make sure this is '1' (= free) */
bm &= ~((ubitmap)1 << bitIdx); /* put '0' where '1' was (-> mark as non-free) */
bitmap[i] = bm;
--b->elemsfree;
const size_t where = (i * (size_t)BITMAP_ELEM_SIZE) + bitIdx;
void *ret = ((char*)getdata(b)) + (where * b->elemSize);
LA_ASSERT(contains(b, ret));
return ret;
}
static void _Bfree(Block * LA_RESTRICT b, void * LA_RESTRICT p)
{
LA_ASSERT(b->elemsfree < b->elemstotal);
LA_ASSERT(contains(b, p));
const ptrdiff_t offs = (char*)p - (char*)getdata(b);
LA_ASSERT(offs % b->elemSize == 0);
const unsigned idx = (unsigned)(offs / b->elemSize);
const unsigned bitmapIdx = idx / BITMAP_ELEM_SIZE;
const ubitmap bitIdx = idx % BITMAP_ELEM_SIZE;
LA_ASSERT(bitmapIdx < b->bitmapInts);
LA_ASSERT(!(b->bitmap[bitmapIdx] & ((ubitmap)1 << bitIdx))); /* make sure this is '0' (= used) */
b->bitmap[bitmapIdx] |= ((ubitmap)1 << bitIdx); /* put '1' where '0' was (-> mark as free) */
++b->elemsfree;
}
/* returns block with at least 1 free slot, NULL only in case of allocation fail */
static Block *getfreeblock(LuaAlloc *LA, u16 size)
{
unsigned si = sizeindex(size);
Block *b = LA->active[si];
if(b && b->elemsfree) /* Good case: Currently active block is free, use that */
return b;
/* Not-so-good case: Active block is full or doesn't exist, try an older block in the chain */
b = LA->chain[si];
while(b && !b->elemsfree)
b = b->prev;
/* Still no good? Allocate new block */
if(!b || !b->elemsfree)
b = newblock(LA, nextblockelems(LA->chain[si]), size); /* Use newest block in chain to compute size */
/* Use this block for further allocation requests */
LA->active[si] = b;
return b;
}
static void *_Alloc(LuaAlloc *LA, size_t size)
{
LA_ASSERT(size);
if(size <= LA_MAX_ALLOC)
{
Block *b = getfreeblock(LA, (u16)size);
if(b)
{
checkblock(b);
void *p = _Balloc(b);
LA_ASSERT(p); /* Can't fail -- block was known to be free */
#ifdef LA_TRACK_STATS
unsigned si = bsizeindex(b);
LA->stats.alive[si]++;
LA->stats.total[si]++;
#endif
return p;
}
/* else try the alloc below */
}
void *p = sysmalloc(LA, LUAALLOC_TYPE_LARGELUA, size); /* large Lua allocation */
#ifdef LA_TRACK_STATS
if(p)
{
LA->stats.alive[BLOCK_ARRAY_SIZE]++;
LA->stats.total[BLOCK_ARRAY_SIZE]++;
}
#endif
return p;
}
static void freefromspot(LuaAlloc * LA_RESTRICT LA, Block ** LA_RESTRICT spot, void *p)
{
Block *b = *spot;
#ifdef LA_TRACK_STATS
unsigned si = bsizeindex(b);
LA->stats.alive[si]--;
#endif
if(b->elemsfree + 1 == b->elemstotal)
freeblock(LA, spot); /* Freeing last element in the block -> just free the whole thing */
else
_Bfree(b, p);
}
static void _Free(LuaAlloc * LA_RESTRICT LA , void * LA_RESTRICT p, size_t oldsize)
{
LA_ASSERT(p);
if(oldsize <= LA_MAX_ALLOC)
{
Block **spot = findspot(LA, p); /* Here, spot might point one past the end */
spot -= (spot > LA->all); /* One back unless we're already at the front -- now spot is always valid */
Block *b = *spot;
checkblock(b);
if(contains(b, p))
{
freefromspot(LA, spot, p);
return;
}
/* else p is outside of any block area. This case is unlikely but possible:
- alloc large size (falling through to system alloc),
- then, try to shrink it to fit inside LA_MAX_ALLOC,
- ... but there is no block free for that size...
- try to alloc new block and fail (out of memory)
- then _Realloc() uses the original, still valid pointer since by spec shrink requests must not fail
- Lua sees the "reallocated" (actually the old) pointer and records the new, smaller size;
- when this pointer is freed, we're here in this situation.
Therefore fall through to free a large allocation. */
}
#ifdef LA_TRACK_STATS
LA->stats.alive[BLOCK_ARRAY_SIZE]--;
#endif
sysfree(LA, p, oldsize); /* large Lua free */
}
static void *_Realloc(LuaAlloc * LA_RESTRICT LA, void * LA_RESTRICT p, size_t newsize, size_t oldsize)
{
LA_ASSERT(p);
void *newptr = _Alloc(LA, newsize);
/* If the new allocation failed, just re-use the old pointer if it was a shrink request.
This also satisfies Lua, which assumes that shrink requests cannot fail */
if(!newptr)
return newsize <= oldsize ? p : NULL;
const size_t minsize = oldsize < newsize ? oldsize : newsize;
LA_MEMCPY(newptr, p, minsize);
_Free(LA, p, oldsize);
return newptr;
}
/* ---- Default system allocator ---- */
#ifdef LA_ENABLE_DEFAULT_ALLOC
static void *defaultalloc(void *user, void *ptr, size_t osize, size_t nsize)
{
(void)user;
(void)osize;
if(nsize)
return realloc(ptr, nsize);
free(ptr);
return NULL;
}
#endif
/* ---- Public API ---- */
#ifdef __cplusplus
extern "C" {
#endif
LUAALLOC_EXPORT void *luaalloc(void * ud, void *ptr, size_t oldsize, size_t newsize)
{
LuaAlloc *LA = (LuaAlloc*)ud;
if(ptr)
{
if(!newsize)
_Free(LA, ptr, oldsize);
else if(newsize != oldsize)
return _Realloc(LA, ptr, newsize, oldsize);
else
return ptr;
}
else if(newsize)
return _Alloc(LA, newsize);
return NULL;
}
LUAALLOC_EXPORT LuaAlloc * luaalloc_create(LuaSysAlloc sysalloc, void *user)
{
if(!sysalloc)
{
#ifdef LA_ENABLE_DEFAULT_ALLOC
sysalloc = defaultalloc;
#else
LA_ASSERT(sysalloc);
return NULL;
#endif
}
LuaAlloc *LA = (LuaAlloc*)sysalloc(user, NULL, LUAALLOC_TYPE_INTERNAL, sizeof(LuaAlloc));
if(LA)
{
LA_MEMSET(LA, 0, sizeof(LuaAlloc));
LA->sysalloc = sysalloc;
LA->user = user;
}
return LA;
}
LUAALLOC_EXPORT void luaalloc_delete(LuaAlloc *LA)
{
LA_ASSERT(LA->allnum == 0); /* If this fails the Lua state didn't GC everything, which is a bug */
if(LA->all)
sysfree(LA, LA->all, LA->allcap * sizeof(Block*));
sysfree(LA, LA, sizeof(LuaAlloc)); /* free self */
}
/* ---- Optional stats tracking ---- */
LUAALLOC_EXPORT unsigned luaalloc_getstats(const LuaAlloc *LA, const size_t ** alive, const size_t ** total, const size_t ** blocks, unsigned *pbinstep)
{
if(pbinstep)
*pbinstep = LA_ALLOC_STEP;
#ifdef LA_TRACK_STATS
if(alive)
*alive = LA->stats.alive;
if(total)
*total = LA->stats.total;
if(blocks)
*blocks = LA->stats.blocks_alive;
return BLOCK_ARRAY_SIZE + 1;
#else
if(alive)
*alive = NULL;
if(total)
*total = NULL;
if(blocks)
*blocks = NULL;
return 0;
#endif
}
#ifdef __cplusplus
}
#endif

120
ExternalLibs/luaalloc.h Normal file
View file

@ -0,0 +1,120 @@
/*
Small and fast Lua allocator, compatible with Lua 5.1 and up.
For more info and compile-time config, see luaalloc.c
Usage:
LuaAlloc *LA = luaalloc_create(NULL, NULL);
lua_State *L = lua_newstate(luaalloc, LA);
... use L ...
lua_close(L);
luaalloc_delete(LA);
*/
#pragma once
/* Every public API function is annotated with this */
#ifndef LUAALLOC_EXPORT
#define LUAALLOC_EXPORT
#endif
#include <stdlib.h> /* for size_t */
#ifdef __cplusplus
extern "C" {
#endif
/* Opaque allocator type */
typedef struct LuaAlloc LuaAlloc;
/* Main allocation callback. Lua will call this when it needs memory.
'ud' must be a valid LuaAlloc context passed as user pointer to lua_newstate(). */
LUAALLOC_EXPORT void *luaalloc(void *ud, void *ptr, size_t osize, size_t nsize);
/* Block requests and large allocations will be forwarded to the system allocator.
If you don't provide one, a suitable one based on realloc()/free() will be used.
Details below. */
typedef void *(*LuaSysAlloc)(void *ud, void *ptr, size_t osize, size_t nsize);
/* Create allocator context. Pass custom system allocator if needed or NULL for the built-in default.
Multiple Lua states can share a single LuaAlloc as long as they run on the same thread. */
LUAALLOC_EXPORT LuaAlloc *luaalloc_create(LuaSysAlloc sysalloc, void *ud);
/* Destroy allocator. Call after lua_close()ing each Lua state using the allocator. */
LUAALLOC_EXPORT void luaalloc_delete(LuaAlloc*);
/* Statistics tracking. Define LA_TRACK_STATS in luaalloc.c to use this. [Enabled by default in debug mode].
Provides pointers to internal stats area. Each element corresponds to an internal allocation bin.
- alive: How many allocations of a bin size are currently in use.
- total: How many allocations of a bin size were ever made.
- blocks: How many blocks currently exist for a bin.
With the default config, index 0 corresponds to all allocations of 1-4 bytes, index 1 to those of 5-8 bytes, and so on.
The bin size increment is returned in pbinstep (default: 4).
All output pointers can be NULL if you're not interested in the thing.
Returns the total number of bins. 0 when stats tracking is disabled.
The last valid index is not an actual bin -- instead, large allocations that bypass the allocator are collected there.
The returned pointers are owned by the LuaAlloc instance and stay valid throughout its lifetime.
To iterate over the size bins, you can do:
const size_t *alive, *total, *blocks;
unsigned step, n = luaalloc_getstats(LA, &alive, &total, &blocks, &step);
if(n)
{
for(unsigned i = 0, a = 1, b = step; i < n-1; ++i, a = b+1, b += step)
printf("%zu blocks of %u..%u bytes: %zu allocations alive, %zu done all-time\n",
blocks[i], a, b, alive[i], total[i]);
printf("large allocations: %zu alive, %zu done all-time\n", alive[n-1], total[n-1]);
}
*/
LUAALLOC_EXPORT unsigned luaalloc_getstats(const LuaAlloc*, const size_t **alive, const size_t **total, const size_t **blocks, unsigned *pbinstep);
typedef enum
{
LUAALLOC_TYPE_LARGELUA = 1,
LUAALLOC_TYPE_BLOCK = 2,
LUAALLOC_TYPE_INTERNAL = 3
} AllocType;
#ifdef __cplusplus
}
#endif
/*
Details about the system allocator:
typedef void *(*LuaSysAlloc)(void *ud, void *ptr, size_t osize, size_t nsize);
Block requests and large Lua allocations will be forwarded to the system allocator.
The function signature is (intentionally) the same as luaalloc() and the semantics are very similar.
The caller knows the size of each allocation so you do not have to track this yourself.
The system allocator must not fail shrink requests (same requirement as Lua).
You must handle the following cases:
if(!ptr && nsize)
return malloc(nsize); (osize encodes the type of allocation, see below)
else if(ptr && !nsize)
free(ptr); (osize is the previously allocated size; the return value is ignored)
else if(ptr && nsize)
return realloc(ptr, nsize); (must not fail shrink requests. osize is the previously allocated size; osize != nsize guaranteed)
// never called with (!ptr && !nsize), can ignore this case
Types of allocations, in case (!ptr && nsize):
switch(osize)
{
case LUAALLOC_TYPE_LARGELUA:
passthrough/large Lua allocation (alloc'd/free'd/realloc'd incl. shrink requests)
case LUAALLOC_TYPE_BLOCK:
block allocation (alloc'd/free'd, but never realloc'd)
case LUAALLOC_TYPE_INTERNAL:
allocation of LuaAlloc-internal data (usually long-lived. alloc'd, realloc'd to enlarge, but never shrunk. free'd only in luaalloc_delete())
case 0: default:
some other allocation (not used by LuaAlloc. Maybe some other code uses this allocator as well?)
}
Lua allocations may fail and Lua usually handles this gracefully by running an emergency GC;
5.2 and up do this out-of-the box and there is a patch for 5.1 as well.
This block allocator is built to properly handle system allocator failures,
and return a failed allocation back to Lua as appropriate.
*/

View file

@ -489,14 +489,6 @@
RelativePath="..\..\BBGE\MathFunctions.h"
>
</File>
<File
RelativePath="..\..\BBGE\MemoryAllocatorSmallBlock.cpp"
>
</File>
<File
RelativePath="..\..\BBGE\MemoryAllocatorSmallBlock.h"
>
</File>
<File
RelativePath="..\..\BBGE\MT.cpp"
>

View file

@ -321,6 +321,14 @@
RelativePath="..\..\ExternalLibs\DeflateCompressor.h"
>
</File>
<File
RelativePath="..\..\ExternalLibs\luaalloc.cpp"
>
</File>
<File
RelativePath="..\..\ExternalLibs\luaalloc.h"
>
</File>
<File
RelativePath="..\..\ExternalLibs\minihttp.cpp"
>