mirror of
https://github.com/AquariaOSE/Aquaria.git
synced 2024-11-29 03:33:48 +00:00
remove iprof and BBGE_PROF (#74)
This commit is contained in:
parent
47f8677aa7
commit
26759c8be1
22 changed files with 3 additions and 2286 deletions
|
@ -1829,7 +1829,6 @@ const int chkDist = 2500*2500;
|
||||||
|
|
||||||
Target Avatar::getNearestTarget(const Vector &checkPos, const Vector &distPos, Entity *source, DamageType dt, bool override, std::vector<Target> *ignore)
|
Target Avatar::getNearestTarget(const Vector &checkPos, const Vector &distPos, Entity *source, DamageType dt, bool override, std::vector<Target> *ignore)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Avatar_getNearestTarget);
|
|
||||||
Target t;
|
Target t;
|
||||||
|
|
||||||
Vector targetPosition;
|
Vector targetPosition;
|
||||||
|
@ -5330,8 +5329,6 @@ bool lastCursorKeyboard = false;
|
||||||
|
|
||||||
void Avatar::onUpdate(float dt)
|
void Avatar::onUpdate(float dt)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Avatar_onUpdate);
|
|
||||||
|
|
||||||
looking = 0;
|
looking = 0;
|
||||||
|
|
||||||
|
|
||||||
|
@ -5492,7 +5489,6 @@ void Avatar::onUpdate(float dt)
|
||||||
|
|
||||||
lastWaterBubble = waterBubble;
|
lastWaterBubble = waterBubble;
|
||||||
waterBubble = 0;
|
waterBubble = 0;
|
||||||
BBGE_PROF(Avatar_splashOut);
|
|
||||||
splash(false);
|
splash(false);
|
||||||
|
|
||||||
if (dsq->continuity.form != FORM_FISH)
|
if (dsq->continuity.form != FORM_FISH)
|
||||||
|
|
|
@ -179,7 +179,6 @@ void Element::updateEffects(float dt)
|
||||||
|
|
||||||
void Element::update(float dt)
|
void Element::update(float dt)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Element_update);
|
|
||||||
if (!core->particlesPaused)
|
if (!core->particlesPaused)
|
||||||
{
|
{
|
||||||
updateLife(dt);
|
updateLife(dt);
|
||||||
|
|
|
@ -2546,7 +2546,6 @@ void Entity::addIgnoreShotDamageType(DamageType dt)
|
||||||
|
|
||||||
void Entity::doSpellAvoidance(float dt, int range, float mod)
|
void Entity::doSpellAvoidance(float dt, int range, float mod)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Entity_doSpellAvoidance);
|
|
||||||
Vector accum;
|
Vector accum;
|
||||||
|
|
||||||
int c = 0;
|
int c = 0;
|
||||||
|
|
|
@ -4071,7 +4071,6 @@ bool Game::isEntityCollideWithShot(Entity *e, Shot *shot)
|
||||||
|
|
||||||
void Game::handleShotCollisions(Entity *e, bool hasShield)
|
void Game::handleShotCollisions(Entity *e, bool hasShield)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Game_handleShotCollisions);
|
|
||||||
for (size_t i = 0; i < Shot::shots.size(); ++i)
|
for (size_t i = 0; i < Shot::shots.size(); ++i)
|
||||||
{
|
{
|
||||||
Shot *shot = Shot::shots[i];
|
Shot *shot = Shot::shots[i];
|
||||||
|
@ -4103,7 +4102,6 @@ bool Game::isDamageTypeEnemy(DamageType dt)
|
||||||
|
|
||||||
void Game::handleShotCollisionsSkeletal(Entity *e)
|
void Game::handleShotCollisionsSkeletal(Entity *e)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Game_HSSKELETAL);
|
|
||||||
for (size_t i = 0; i < Shot::shots.size(); ++i)
|
for (size_t i = 0; i < Shot::shots.size(); ++i)
|
||||||
{
|
{
|
||||||
Shot *shot = Shot::shots[i];
|
Shot *shot = Shot::shots[i];
|
||||||
|
|
|
@ -119,9 +119,6 @@ void Hair::onUpdate(float dt)
|
||||||
|
|
||||||
void Hair::updatePositions()
|
void Hair::updatePositions()
|
||||||
{
|
{
|
||||||
BBGE_PROF(Hair_updatePositions);
|
|
||||||
|
|
||||||
|
|
||||||
for (size_t i = 1; i < hairNodes.size(); i++)
|
for (size_t i = 1; i < hairNodes.size(); i++)
|
||||||
{
|
{
|
||||||
Vector diff = hairNodes[i].position - hairNodes[i-1].position;
|
Vector diff = hairNodes[i].position - hairNodes[i-1].position;
|
||||||
|
|
|
@ -330,25 +330,15 @@ void SchoolFish::applySeparation(Vector &accumulator)
|
||||||
|
|
||||||
void SchoolFish::onUpdate(float dt)
|
void SchoolFish::onUpdate(float dt)
|
||||||
{
|
{
|
||||||
BBGE_PROF(SchoolFish_onUpdate);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
burstDelay -= dt;
|
burstDelay -= dt;
|
||||||
if (burstDelay < 0)
|
if (burstDelay < 0)
|
||||||
{
|
|
||||||
burstDelay = 0;
|
burstDelay = 0;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stickToNaijasHead && alpha.x < 0.1f)
|
if (stickToNaijasHead && alpha.x < 0.1f)
|
||||||
stickToNaijasHead = false;
|
stickToNaijasHead = false;
|
||||||
|
|
||||||
if (this->layer < LR_ENTITIES)
|
if (this->layer < LR_ENTITIES)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
setEntityType(ET_NEUTRAL);
|
setEntityType(ET_NEUTRAL);
|
||||||
collideRadius = 0;
|
collideRadius = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -461,8 +461,6 @@ void ScriptedEntity::stopPull()
|
||||||
|
|
||||||
void ScriptedEntity::onUpdate(float dt)
|
void ScriptedEntity::onUpdate(float dt)
|
||||||
{
|
{
|
||||||
BBGE_PROF(ScriptedEntity_onUpdate);
|
|
||||||
|
|
||||||
CollideEntity::onUpdate(dt);
|
CollideEntity::onUpdate(dt);
|
||||||
|
|
||||||
if (becomeSolidDelay)
|
if (becomeSolidDelay)
|
||||||
|
|
|
@ -450,7 +450,6 @@ void Shot::onEndOfLife()
|
||||||
|
|
||||||
void Shot::doHitEffects()
|
void Shot::doHitEffects()
|
||||||
{
|
{
|
||||||
BBGE_PROF(Shot_doHitEffects);
|
|
||||||
if (shotData)
|
if (shotData)
|
||||||
{
|
{
|
||||||
if (!shotData->hitPrt.empty())
|
if (!shotData->hitPrt.empty())
|
||||||
|
@ -478,7 +477,6 @@ void Shot::suicide()
|
||||||
|
|
||||||
bool Shot::onHitWall(bool reflect)
|
bool Shot::onHitWall(bool reflect)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Shot_onHitWall);
|
|
||||||
doHitEffects();
|
doHitEffects();
|
||||||
updateSegments(position);
|
updateSegments(position);
|
||||||
|
|
||||||
|
|
|
@ -31,9 +31,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
#include "BBGECompileConfig.h"
|
#include "BBGECompileConfig.h"
|
||||||
|
|
||||||
#define BBGE_PROF(x)
|
|
||||||
|
|
||||||
|
|
||||||
#define compile_assert(pred) switch(0){case 0:case (pred):;}
|
#define compile_assert(pred) switch(0){case 0:case (pred):;}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
|
|
@ -1046,8 +1046,6 @@ void Core::run(float runTime)
|
||||||
|
|
||||||
while((runTime == -1 && !loopDone) || (runTime >0))
|
while((runTime == -1 && !loopDone) || (runTime >0))
|
||||||
{
|
{
|
||||||
BBGE_PROF(Core_main);
|
|
||||||
|
|
||||||
nowTicks = SDL_GetTicks();
|
nowTicks = SDL_GetTicks();
|
||||||
dt = (nowTicks-thenTicks)/1000.0;
|
dt = (nowTicks-thenTicks)/1000.0;
|
||||||
thenTicks = nowTicks;
|
thenTicks = nowTicks;
|
||||||
|
@ -1177,8 +1175,6 @@ void Core::run(float runTime)
|
||||||
|
|
||||||
showBuffer();
|
showBuffer();
|
||||||
|
|
||||||
BBGE_PROF(STOP);
|
|
||||||
|
|
||||||
if (nestedMains == 1)
|
if (nestedMains == 1)
|
||||||
clearGarbage();
|
clearGarbage();
|
||||||
|
|
||||||
|
@ -1741,10 +1737,6 @@ void Core::updateCullData()
|
||||||
|
|
||||||
void Core::render(int startLayer, int endLayer, bool useFrameBufferIfAvail)
|
void Core::render(int startLayer, int endLayer, bool useFrameBufferIfAvail)
|
||||||
{
|
{
|
||||||
|
|
||||||
BBGE_PROF(Core_render);
|
|
||||||
|
|
||||||
|
|
||||||
if (startLayer == -1 && endLayer == -1 && overrideStartLayer != 0)
|
if (startLayer == -1 && endLayer == -1 && overrideStartLayer != 0)
|
||||||
{
|
{
|
||||||
startLayer = overrideStartLayer;
|
startLayer = overrideStartLayer;
|
||||||
|
@ -2048,8 +2040,6 @@ CountedPtr<Texture> Core::doTextureAdd(const std::string &texture, const std::st
|
||||||
|
|
||||||
CountedPtr<Texture> Core::addTexture(const std::string &textureName)
|
CountedPtr<Texture> Core::addTexture(const std::string &textureName)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Core_addTexture);
|
|
||||||
|
|
||||||
if (textureName.empty())
|
if (textureName.empty())
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -2192,7 +2182,6 @@ void Core::enqueueRenderObjectDeletion(RenderObject *object)
|
||||||
|
|
||||||
void Core::clearGarbage()
|
void Core::clearGarbage()
|
||||||
{
|
{
|
||||||
BBGE_PROF(Core_clearGarbage);
|
|
||||||
// HACK: optimize this (use a list instead of a queue)
|
// HACK: optimize this (use a list instead of a queue)
|
||||||
|
|
||||||
for (RenderObjects::iterator i = garbage.begin(); i != garbage.end(); i++)
|
for (RenderObjects::iterator i = garbage.begin(); i != garbage.end(); i++)
|
||||||
|
|
|
@ -30,7 +30,6 @@ Emitter::Emitter(ParticleEffect *pe) : Quad(), pe(pe)
|
||||||
|
|
||||||
void Emitter::destroy()
|
void Emitter::destroy()
|
||||||
{
|
{
|
||||||
BBGE_PROF(Emitter_destroy);
|
|
||||||
for (Particles::iterator i = particles.begin(); i != particles.end(); i++)
|
for (Particles::iterator i = particles.begin(); i != particles.end(); i++)
|
||||||
{
|
{
|
||||||
(*i)->active = false;
|
(*i)->active = false;
|
||||||
|
@ -43,7 +42,6 @@ void Emitter::destroy()
|
||||||
|
|
||||||
void Emitter::spawnParticle(float perc)
|
void Emitter::spawnParticle(float perc)
|
||||||
{
|
{
|
||||||
BBGE_PROF(Emitter_spawnParticle);
|
|
||||||
Particle *p = particleManager->getFreeParticle(this);
|
Particle *p = particleManager->getFreeParticle(this);
|
||||||
|
|
||||||
p->active = true;
|
p->active = true;
|
||||||
|
@ -231,8 +229,6 @@ void Emitter::render()
|
||||||
|
|
||||||
void Emitter::onRender()
|
void Emitter::onRender()
|
||||||
{
|
{
|
||||||
BBGE_PROF(Emitter_onRender);
|
|
||||||
|
|
||||||
if (particles.empty()) return;
|
if (particles.empty()) return;
|
||||||
|
|
||||||
if (!data.spawnLocal)
|
if (!data.spawnLocal)
|
||||||
|
|
|
@ -39,7 +39,6 @@ void ParticleEffect::setDie(bool v)
|
||||||
|
|
||||||
void ParticleEffect::load(const std::string &name)
|
void ParticleEffect::load(const std::string &name)
|
||||||
{
|
{
|
||||||
BBGE_PROF(ParticleEffect_load);
|
|
||||||
particleManager->loadParticleEffectFromBank(name, this);
|
particleManager->loadParticleEffectFromBank(name, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,8 +442,6 @@ void ParticleEffect::stop()
|
||||||
|
|
||||||
void ParticleEffect::onRender()
|
void ParticleEffect::onRender()
|
||||||
{
|
{
|
||||||
BBGE_PROF(ParticleEffect_onRender);
|
|
||||||
|
|
||||||
RenderObject::onRender();
|
RenderObject::onRender();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -257,7 +257,6 @@ Particle *ParticleManager::stomp()
|
||||||
|
|
||||||
Particle *ParticleManager::getFreeParticle(Emitter *emitter)
|
Particle *ParticleManager::getFreeParticle(Emitter *emitter)
|
||||||
{
|
{
|
||||||
BBGE_PROF(ParticleManager_getFreeParticle);
|
|
||||||
if (size == 0) return 0;
|
if (size == 0) return 0;
|
||||||
|
|
||||||
Particle *p = 0;
|
Particle *p = 0;
|
||||||
|
@ -354,7 +353,6 @@ int ParticleManager::getSize()
|
||||||
|
|
||||||
void ParticleManager::update(float dt)
|
void ParticleManager::update(float dt)
|
||||||
{
|
{
|
||||||
BBGE_PROF(ParticleManager_update);
|
|
||||||
numActive = 0;
|
numActive = 0;
|
||||||
for (size_t i = 0; i < particles.size(); i++)
|
for (size_t i = 0; i < particles.size(); i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,617 +0,0 @@
|
||||||
IPROF: A Portable Industrial-Strength Interactive Profiler for C++ and C
|
|
||||||
by Sean Barrett
|
|
||||||
|
|
||||||
Version 0.2
|
|
||||||
|
|
||||||
|
|
||||||
CONTENTS
|
|
||||||
Overview
|
|
||||||
User Manual
|
|
||||||
Platform
|
|
||||||
Instrumentation
|
|
||||||
Private Zones
|
|
||||||
Public Zones
|
|
||||||
Initialization
|
|
||||||
Processing Data
|
|
||||||
Displaying Results
|
|
||||||
Controlling Display
|
|
||||||
Understanding CALL GRAPH output
|
|
||||||
Performance Expectation
|
|
||||||
Implementation Notes
|
|
||||||
Version History
|
|
||||||
|
|
||||||
|
|
||||||
OVERVIEW
|
|
||||||
|
|
||||||
IProf is an interactive profiler which works by intrusively instrumenting
|
|
||||||
code. Code is divided into zones by programmer-inserted statements. Zones
|
|
||||||
are both lexically and dynamically scoped--all time spent within a
|
|
||||||
lexically scoped zone, and any code which it calls which is not itself
|
|
||||||
zoned, is attributed to that zone.
|
|
||||||
|
|
||||||
Profiling occurs interactively; time is divided into "frames", and the
|
|
||||||
profiler shows time spent on the previous frame (or a smoothed average
|
|
||||||
or possibly even a frame a second or two ago).
|
|
||||||
|
|
||||||
Like a traditional profiler, IProf records or can compute the number of
|
|
||||||
times a zone is entered, the amount of time spent in the zone ("self
|
|
||||||
time"), and the amount of time spent in the zone and its descendents
|
|
||||||
("hierarchical time" -- "self + child time" in gprof).
|
|
||||||
|
|
||||||
Furthermore, IProf computes information along the lines of gprof--number of
|
|
||||||
times a given zone is entered from any other specific zone; self and
|
|
||||||
hierarchical time spent in a given zone on behalf of a specific parent
|
|
||||||
zone, etc. (However, where gprof estimates this information based only on
|
|
||||||
call counts, IProf measures the actual values. So, for example, IProf will
|
|
||||||
accurately report if a ray-casting routine called by both physics and AI
|
|
||||||
always spends longer per AI-call because the casts are longer.)
|
|
||||||
|
|
||||||
Precise information is available for recursive routines, including call
|
|
||||||
depths etc. [The current version of IProf does not yet completely handle
|
|
||||||
reporting of recursive data, although it is measured correctly.]
|
|
||||||
|
|
||||||
Additionally, IProf provides all numbers in instantaneous form or as two
|
|
||||||
differently weighted moving averages. It's easy to pause the profile
|
|
||||||
updating so that you can switch between multiple views of the paused data
|
|
||||||
set. Two optional flags allow trading off memory for deeper historical
|
|
||||||
views. The cheaper option provides only zone-self-time history, suitable
|
|
||||||
for a real-time graph of behavior. The more memory-expensive flag keeps a
|
|
||||||
history of all the data for a certain number of frames, allowing full
|
|
||||||
profile analysis of old frames.
|
|
||||||
|
|
||||||
IProf is designed for its monitoring/gathering mode to be "always on", even
|
|
||||||
in release/optimized builds. The monitoring routines are designed to be
|
|
||||||
reasonably efficient--the full hash on every function entry required by
|
|
||||||
gprof is avoided in most cases--and the programmer can minimize the impact
|
|
||||||
by limiting the instrumentation to relatively large routines. (One could
|
|
||||||
certainly instrument a vector add function and possibly get useful call
|
|
||||||
count data from it, but the monitoring overhead would be significant and
|
|
||||||
noticeable in that case.) In combination with history information, it
|
|
||||||
becomes possible to run an application, notice poor behavior, pause the
|
|
||||||
(always on) profiling and the application, and start browsing through the
|
|
||||||
historical profiling information.
|
|
||||||
|
|
||||||
IProf uses both per-call monitoring and a separate per-frame
|
|
||||||
gathering/analysis phase. The latter is itself instrumented so the overhead
|
|
||||||
due to it is easy to see.
|
|
||||||
|
|
||||||
|
|
||||||
USER MANUAL
|
|
||||||
|
|
||||||
These sections document the necessary code you must use and code changes
|
|
||||||
you must make to use the profiler.
|
|
||||||
|
|
||||||
The profiling system expects to be able to use any identifier which is
|
|
||||||
prefixed with "Prof_" with exactly that pattern of uppercase/lowercase
|
|
||||||
(i.e. "PROF_" and "prof_" can be used freely by other code).
|
|
||||||
|
|
||||||
|
|
||||||
COMPILING THE PROFILING SYSTEM
|
|
||||||
|
|
||||||
The profiler was developed using MSVC 6.0, but should be reasonably
|
|
||||||
portable. The implementation files are provided as .C files so they can be
|
|
||||||
used with C compilers; however, they can be renamed to C++ files and
|
|
||||||
compiled in that form. The implementations automatically insert extern "C"
|
|
||||||
on the public routines. Internal routines will use either C or C++ linkage
|
|
||||||
depending on which way you compile them; you must compile all the profiler
|
|
||||||
files as either C or C++, without intermixing.
|
|
||||||
|
|
||||||
[[ NOTE: Originally the code was written in C++, and then it was
|
|
||||||
converted to compile with C, and then some additional small changes
|
|
||||||
were made. As of this writing, I haven't actually tested compiling
|
|
||||||
everything as C++ again. Feel free to test for me. Or just compile
|
|
||||||
the C files as C--you can still USE the C++ interfaces fine.]]
|
|
||||||
|
|
||||||
Needed files:
|
|
||||||
prof_win32.c -- Win32 implementation of seconds-based timer
|
|
||||||
prof_gather.c -- raw data collection
|
|
||||||
prof_process.c -- high-level data collection, report generator
|
|
||||||
prof_draw.c -- opengl rendering interface
|
|
||||||
|
|
||||||
prof.h -- public front-end
|
|
||||||
prof_win32.h -- Win32 implementation of fast integer timestamp
|
|
||||||
prof_gather.h -- instrumentation macros (included by prof.h)
|
|
||||||
prof_internal.h -- private interfaces
|
|
||||||
|
|
||||||
|
|
||||||
PLATFORM SUPPORT
|
|
||||||
|
|
||||||
IProf requires a small amount--less than fifty lines--of platform-specific
|
|
||||||
code.
|
|
||||||
|
|
||||||
Win32 under MSVC is automatically supported with no further effort on your
|
|
||||||
part, using the files prof_win32.c and prof_win32.h
|
|
||||||
|
|
||||||
To use other platforms, just create equivalent files for your platform. The
|
|
||||||
C file contains a routine for getting an accurate floating point time
|
|
||||||
reading; the H file contains the definition of a 64-bit integer type and a
|
|
||||||
fast routine for reading a timestamp of that size. If 64-bit math isn't
|
|
||||||
available on your platform, or if your timestamp is only 32-bit, you can
|
|
||||||
replace the 64-bit type with a 32-bit type, as long as that item won't
|
|
||||||
overflow in the course of running the application. (A 31-bit millisecond
|
|
||||||
timer is good for 24 days, but is very imprecise for this application.) If
|
|
||||||
reading the timestamp is slow, you will want to minimize how often the zone
|
|
||||||
entry and exit points are called.
|
|
||||||
|
|
||||||
Also required is a display interface; an opengl one is provided, although
|
|
||||||
others would be easy to code. (The primary display is purely textual, and
|
|
||||||
is available through a text interface.)
|
|
||||||
|
|
||||||
|
|
||||||
INSTRUMENTATION
|
|
||||||
|
|
||||||
First, #include "prof.h" in files that need profiling.
|
|
||||||
|
|
||||||
The flag Prof_ENABLED determines whether the monitoring code is compiled or
|
|
||||||
not, to make it easy to turn off all profiling code for final shippable
|
|
||||||
builds. Additional flags controlling amount of history data and memory
|
|
||||||
usage therein are defined at the top of the file prof_process.c and should
|
|
||||||
just be changed there since they affect no other files.
|
|
||||||
|
|
||||||
There are two main ways of instrumenting, and each offers a C++ interface
|
|
||||||
and a C interface.
|
|
||||||
|
|
||||||
Private zones
|
|
||||||
C++ Prof(zone);
|
|
||||||
|
|
||||||
C Prof_Begin(zone)
|
|
||||||
Prof_End
|
|
||||||
|
|
||||||
Public zones
|
|
||||||
Prof_Define(zone);
|
|
||||||
Prof_Declare(zone);
|
|
||||||
|
|
||||||
C++ Prof_Scope(zone);
|
|
||||||
|
|
||||||
C Prof_Region(zone)
|
|
||||||
Prof_End
|
|
||||||
|
|
||||||
Zone names--indicated by "zone" above--must obey the rules for identifiers,
|
|
||||||
although they can begin with a number, and they exist in a separate
|
|
||||||
namespace from regular identifiers.
|
|
||||||
|
|
||||||
So these are valid zone names:
|
|
||||||
my_zone_2
|
|
||||||
2_my_zone
|
|
||||||
__
|
|
||||||
|
|
||||||
And these are NOT valid zone names:
|
|
||||||
"my_zone"
|
|
||||||
my_class::my_zone
|
|
||||||
|
|
||||||
|
|
||||||
PRIVATE ZONES
|
|
||||||
|
|
||||||
The simplest, and highly recommended, approach to instrumentation is to
|
|
||||||
create a private zone which only exists in a single location. In C++, you
|
|
||||||
do this by declaring a lexically scoped zone with a statement which behaves
|
|
||||||
semantically like a variable declaration:
|
|
||||||
|
|
||||||
// C++ instrumentation
|
|
||||||
void my_routine()
|
|
||||||
{
|
|
||||||
Prof(my_routine_name);
|
|
||||||
... my code ...
|
|
||||||
}
|
|
||||||
|
|
||||||
This will cause all time spent after Prof(my_routine_name) to accumulate in
|
|
||||||
a zone in the profiling reports labeled "my_routine_name". The zone ends
|
|
||||||
when the name goes out of scope, that is, when a destructor would be called
|
|
||||||
corresponding to this declaration.
|
|
||||||
|
|
||||||
Zones don't have to appear at routine-level function scope; for example:
|
|
||||||
|
|
||||||
// C++ instrumentation
|
|
||||||
void my_routine()
|
|
||||||
{
|
|
||||||
Prof(my_routine);
|
|
||||||
... // zone my_routine
|
|
||||||
if (...)
|
|
||||||
{
|
|
||||||
Prof(my_routine_special_case);
|
|
||||||
... // zone my_routine_special_case
|
|
||||||
}
|
|
||||||
... // zone my_routine
|
|
||||||
}
|
|
||||||
|
|
||||||
Instrumenting in C requires more work, because C doesn't provide
|
|
||||||
destructors, so it's not possible to lexically scope zones automatically.
|
|
||||||
Instead, the programmer must insert Begin/End pairs and make sure those
|
|
||||||
pairs are accurately balanced. All paths out of a function must be
|
|
||||||
accounted for. A crash or severe slowdown is likely to occur with
|
|
||||||
unbalanced pairs.
|
|
||||||
|
|
||||||
// C instrumentation
|
|
||||||
void my_routine(void)
|
|
||||||
{
|
|
||||||
Prof_Begin(my_routine)
|
|
||||||
int x = some_func();
|
|
||||||
if (x == 0) {
|
|
||||||
Prof_End
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
...
|
|
||||||
Prof_End
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_Begin() is declaration-like; however, it takes no trailing semicolon.
|
|
||||||
(This is necessary so it can be compiled out; C doesn't allow the empty
|
|
||||||
statement ";" to precede variable declarations.) Prof_End takes no
|
|
||||||
trailing semicolon or parentheses to help remind you of this. (You can
|
|
||||||
change the definition of Prof_End in prof_gather.h if you don't like that.)
|
|
||||||
|
|
||||||
Profiling instructions like Prof() and Prof_Begin() can be placed anywhere
|
|
||||||
that variable declarations are legal; generally you want to define them
|
|
||||||
before other variables so the variable initializations are profiled.
|
|
||||||
|
|
||||||
The C interfaces are also available in C++ if you should want to use a not-
|
|
||||||
exactly-lexically-scoped zone, e.g. end a zone before the destructor would
|
|
||||||
go out of scope. (You can't, however, end Prof() with Prof_End.)
|
|
||||||
|
|
||||||
|
|
||||||
PUBLIC ZONES
|
|
||||||
|
|
||||||
If you define multiple private zones with the same name, they will be
|
|
||||||
treated as entirely unrelated zones that happen to have the same name, and
|
|
||||||
you will see the same name multiple times in the profiling output.
|
|
||||||
|
|
||||||
Instead, you probably want to use public zones, to use the same zone in
|
|
||||||
multiple regions of code. For example, we might have two routines that
|
|
||||||
serve the same purpose which we always want to measure as one. Or we might
|
|
||||||
have two blocks of code within a single routine which we want to credit to
|
|
||||||
the same zone.
|
|
||||||
|
|
||||||
To do this, first define the zone with Prof_Define(zone), and then use it
|
|
||||||
with Prof_Scope(zone) [C++] or Prof_Region(zone) ... Prof_End [C].
|
|
||||||
|
|
||||||
// C++ instrumentation
|
|
||||||
Prof_Define(my_routine);
|
|
||||||
|
|
||||||
void my_routine_v1()
|
|
||||||
{
|
|
||||||
Prof_Scope(my_routine);
|
|
||||||
...
|
|
||||||
}
|
|
||||||
|
|
||||||
void my_routine_v2()
|
|
||||||
{
|
|
||||||
Prof_Scope(my_routine);
|
|
||||||
...
|
|
||||||
}
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
// C instrumentation
|
|
||||||
Prof_Define(my_routine);
|
|
||||||
|
|
||||||
void my_routine_v1(void)
|
|
||||||
{
|
|
||||||
Prof_Region(my_routine)
|
|
||||||
...
|
|
||||||
Prof_End
|
|
||||||
}
|
|
||||||
|
|
||||||
void my_routine_v2(void)
|
|
||||||
{
|
|
||||||
Prof_Region(my_routine)
|
|
||||||
...
|
|
||||||
Prof_End
|
|
||||||
}
|
|
||||||
|
|
||||||
Because Prof_Define defines an actual global symbol (if used at file
|
|
||||||
scope), the symbol can even be referenced from other files by saying:
|
|
||||||
|
|
||||||
extern Prof_Declare(my_routine);
|
|
||||||
|
|
||||||
void my_routine()
|
|
||||||
{
|
|
||||||
Prof_Scope(my_routine);
|
|
||||||
}
|
|
||||||
|
|
||||||
You can use 'extern "C" Prof_Declare()' or Prof_Define() to share a zone
|
|
||||||
between C and C++ code.
|
|
||||||
|
|
||||||
|
|
||||||
USER MANUAL - INIIALIZATION
|
|
||||||
|
|
||||||
The profiling system is self-initializing.
|
|
||||||
|
|
||||||
|
|
||||||
USER MANUAL - PROCESSING DATA
|
|
||||||
|
|
||||||
Every frame, you should call Prof_update(). Prof_update() will gather
|
|
||||||
results and record frame-history information on the assumption that each
|
|
||||||
call is a frame. Prof_update() takes a boolean flag which indicates whether
|
|
||||||
to update the history or not; passing in false means profiling is "paused"
|
|
||||||
and doesn't change.
|
|
||||||
|
|
||||||
You might wire this to its own toggle, or you might simply pass in a pre-
|
|
||||||
existing flag for whether the simulation itself is active or not, thus
|
|
||||||
allowing you to pause the simulation and automatically pause the profiling.
|
|
||||||
(On the other hand, if you're profiling a renderer, you might want to
|
|
||||||
pause the simulation and keep profiling.)
|
|
||||||
|
|
||||||
|
|
||||||
USER MANUAL - DISPLAYING RESULTS
|
|
||||||
|
|
||||||
IProf offers two separate types of display: the report, which is primarily
|
|
||||||
textual, and the graph, which is entirely graphical.
|
|
||||||
|
|
||||||
If you're using OpenGL, output is straightforward. For the text report,
|
|
||||||
call Prof_draw_gl() with the display set to a 2d rendering mode--one that
|
|
||||||
can use integer addressing, e.g. integers the size of pixels, virtual
|
|
||||||
pixels (e.g. a 640x480 screen regardless of actual dimension), or even
|
|
||||||
characters. Set the blending state to whatever blending mode you want for
|
|
||||||
the report display. For the graphics report, call Prof_draw_graph_gl().
|
|
||||||
Details of the parameters to these functions are available in the header
|
|
||||||
file.
|
|
||||||
|
|
||||||
For other output devices (Direct3D, text), you'll have to write your own
|
|
||||||
functions equivalent to Prof_draw_gl() and Prof_draw_graph_gl(). These
|
|
||||||
should not be too difficult; these functions don't compute any of the
|
|
||||||
profiling information; they simply format a text report or dataset to the
|
|
||||||
screen. The text report format consists of several title fields to be
|
|
||||||
printed, and then a collection of data records. Each data record has a name
|
|
||||||
and an indentation amount for that name (used for call graph
|
|
||||||
parent/children formatting), a collection of unnamed data "values", and a
|
|
||||||
flag field indicating which of the data values should be displayed.
|
|
||||||
Additionally, data records have a "heat" which indicates how rapidly
|
|
||||||
changing they are, and one record may be "highlighted" indicating a virtual
|
|
||||||
cursor is on that line.
|
|
||||||
|
|
||||||
[[ In practice, Prof_draw_gl makes few enough GL calls that maybe it's
|
|
||||||
worth modularizing things out further. ]]
|
|
||||||
|
|
||||||
|
|
||||||
USER MANUAL - CONTROLLING DISPLAY
|
|
||||||
|
|
||||||
IProf features some easy-to-use UI elements that allow program-direct
|
|
||||||
control or user-interaction-based control over what data is reported.
|
|
||||||
Simply hook these calls up to hotkey presses to complete your working
|
|
||||||
profile system. (You could even write code to support mouse clicking on the
|
|
||||||
report by calling Prof_set_cursor and on the graph by calling
|
|
||||||
Prof_set_frame, but the hit detection is up to you.)
|
|
||||||
|
|
||||||
These are in rough order of the priority with which you might want to
|
|
||||||
implement them.
|
|
||||||
|
|
||||||
Most important
|
|
||||||
|
|
||||||
Prof_set_report_mode(enum ...)
|
|
||||||
Selects what to show in the report:
|
|
||||||
Prof_SELF_TIME: flat times sorted by self time
|
|
||||||
Prof_HIERARCHICAL_TIME: flat times sorted by hierarchical time
|
|
||||||
Prof_CALL_GRAPH: call graph parent/children information
|
|
||||||
|
|
||||||
Prof_move_cursor(int delta)
|
|
||||||
Move the cursor up-or-down by delta lines
|
|
||||||
|
|
||||||
Prof_select(void)
|
|
||||||
Switch to call graph mode on whichever zone is currently selected
|
|
||||||
|
|
||||||
Prof_select_parent(void)
|
|
||||||
Go to largest-hierarchical-time parent of the active zone in
|
|
||||||
the call graph. (Roughly like "go up a directory".)
|
|
||||||
|
|
||||||
Important if you support history
|
|
||||||
|
|
||||||
Prof_move_frame(int delta)
|
|
||||||
Move backwards or forwards in history by delta frames
|
|
||||||
|
|
||||||
Not too important
|
|
||||||
|
|
||||||
Prof_set_average(int type)
|
|
||||||
Selects which moving average to use (0 == instantaneous, 1=default);
|
|
||||||
only meaningful if frame# = 0; when looking at history, instantaneous
|
|
||||||
values are always used.
|
|
||||||
|
|
||||||
Prof_set_frame(int frame)
|
|
||||||
Selects which history entry to view (0==current, 1==previous, etc.)
|
|
||||||
|
|
||||||
Prof_set_cursor(int pos)
|
|
||||||
Set the position of the up-and-down cursor.
|
|
||||||
|
|
||||||
Prof_set_recursion(enum ...)
|
|
||||||
Selects whether to show recursive routines as a single zone or
|
|
||||||
as a series of distinct zones for each recursion level.
|
|
||||||
[[ currently unimplemented ]]
|
|
||||||
|
|
||||||
|
|
||||||
UNDERSTANDING CALL GRAPH OUTPUT
|
|
||||||
|
|
||||||
The call graph output focuses on a single zone, and provides information
|
|
||||||
about the parents (callers) and children (callees) of that zone.
|
|
||||||
|
|
||||||
The general format is something like this:
|
|
||||||
|
|
||||||
zone self hier count
|
|
||||||
+my_parent1 0.75 2.50 4.0
|
|
||||||
+my_parent2 1.00 3.25 6.0
|
|
||||||
-my_routine 1.75 5.75 10.0
|
|
||||||
+my_child1 1.00 2.00 15.0
|
|
||||||
+my_child2 0.25 1.50 500.0
|
|
||||||
my_child3 0.50 0.50 3.0
|
|
||||||
|
|
||||||
"self" indicates self-time (time in this zone), "hier" is hierarchical-time
|
|
||||||
(time in this zone or its descendents), and "count" is the number of times
|
|
||||||
the zone was entered. (Entry counts are inherently integral, but are shown
|
|
||||||
as floating point since they may be a moving average of several integers.)
|
|
||||||
|
|
||||||
Currently the zone "my_routine" is being examined. It accounts for 5.75
|
|
||||||
milliseconds of time between itself and the zones it calls. 1.75ms are
|
|
||||||
spent in itself. The zone was entered (called) 10 times this frame.
|
|
||||||
|
|
||||||
The difference between my_routine's self time and hierarchical time is
|
|
||||||
4.00ms; that much time must be being spent in its descendents. Its
|
|
||||||
immediate children--the zones that my_routine calls directly--appear below
|
|
||||||
it on the table. The hierarchical times of each child represents the time
|
|
||||||
spent in that child and all its descendents *on behalf of my_routine*--
|
|
||||||
other calls to that child are not counted. Thus, the sum of all the
|
|
||||||
children's hierarchical time should account for all time spent in
|
|
||||||
descendents of my_routine; hence, the sum of the child hier times is 4.00,
|
|
||||||
identical to the difference between self and hier for my_routine.
|
|
||||||
|
|
||||||
Above "my_routine" in the chart is information about the callers of
|
|
||||||
my_routine. However, the timings and counts in this section are not the
|
|
||||||
self and hierarchical times of the parent functions themselves--there is no
|
|
||||||
sensible meaning of "on behalf of my_routine" for the parents. Instead, the
|
|
||||||
self, hier, and count fields show the time spent *in my_routine* on behalf
|
|
||||||
of those parents. Thus, for each field, all of the parent entries sum to
|
|
||||||
the corresponding entry in my_routine. Again, these are computed exactly.
|
|
||||||
If my_routine was the public interface to a raycaster called by both AI and
|
|
||||||
physics, but it passed the raycast on to further routines which were
|
|
||||||
themselves explicitly zoned, then most of the my_routine time would be
|
|
||||||
spent in descendents. This would show up in the "hierarchical time" field,
|
|
||||||
and the parent zones, AI and physics, would show that hierarchical time
|
|
||||||
attributed accurately.
|
|
||||||
|
|
||||||
There is additional data available in the system--it would be possible to
|
|
||||||
drill down into lower-level functions and still attribute them to zones
|
|
||||||
several parent levels above; there just isn't currently any user interface
|
|
||||||
or computation functionality to do it.
|
|
||||||
|
|
||||||
|
|
||||||
PERFORMANCE EXPECTATION
|
|
||||||
|
|
||||||
Except for recursive routines (see Implementation Notes section), the
|
|
||||||
expected performance on zone entry comes from running roughly the following
|
|
||||||
code:
|
|
||||||
|
|
||||||
extern Something *p0,*p1;
|
|
||||||
if (p0->ptr_field != p1) { ... /* rarely runs */ }
|
|
||||||
p0->int64_field0 = RDTSC; // read timestamp counter
|
|
||||||
p0->int32_field += 1;
|
|
||||||
p1->int64_field1 += p0->int64_field0 - p1->int64_field0;
|
|
||||||
p1 = p0;
|
|
||||||
|
|
||||||
Zone exit costs a bit less.
|
|
||||||
|
|
||||||
|
|
||||||
IMPLEMENTATION NOTES
|
|
||||||
|
|
||||||
IProf uses two relatively unknown techniques to produce accurate call
|
|
||||||
information with minimal overhead. The first technique produces accurate
|
|
||||||
call information at a similar cost to gprof's mcount monitoring; the second
|
|
||||||
reduces the overhead.
|
|
||||||
|
|
||||||
_Zone Stack Tracking_
|
|
||||||
|
|
||||||
gprof's mcount technique combines two separate measurements. At every
|
|
||||||
function entry, the function and the caller (grabbed from the return
|
|
||||||
address on the stack) are hashed to determine a unique "data-gathering
|
|
||||||
slot", and an integer in that slot is incremented. Thus, exact pairwise
|
|
||||||
call counts are computed. Simultaneously, gprof periodically samples the
|
|
||||||
instruction pointer to measure the time spent in any given routine--"self
|
|
||||||
times". Hierarchical times are computed by distributing the self times up
|
|
||||||
the tree based on the call graph counts. (If routine X is called 9 times
|
|
||||||
from routine Z, and one time from routine Y, then 90% of X's time is
|
|
||||||
attributed to Z, and 10% to Y.)
|
|
||||||
|
|
||||||
An intrusive profiler which samples a timer at zone entry and again at zone
|
|
||||||
exit will compute accurate hierarchical times. By keeping a stack of zones,
|
|
||||||
it's possible to compute accurate hierarchical and self times. The stack of
|
|
||||||
zones also provides caller information, so hierarchical and self times can
|
|
||||||
be attributed to each unique pair of caller & callee zones (via hashing).
|
|
||||||
This will allow much more accurate attribution. In fact, it is sufficient
|
|
||||||
to compute exact values for all the information gprof outputs, except in
|
|
||||||
the face of recursion. Performance is fairly good; unlike a single-zone
|
|
||||||
intrusive profiler, which must measure both self and hierarchical time,
|
|
||||||
since neither can be derived from the other, the zone-pair profiler can
|
|
||||||
only measure hierarchical time; self-time can be derived from hierarchical
|
|
||||||
time (but not vice versa).
|
|
||||||
|
|
||||||
A further improvement is, instead of having one data-gathering slot per
|
|
||||||
zone--that is, representing the state of the top of the zone stack--and
|
|
||||||
instead of having one data-gathering slot per caller/callee zone pair--that
|
|
||||||
is, representing the state of the top two entries of the zone stack--to
|
|
||||||
have one data-gathering slot per unique full stack state. This can be done
|
|
||||||
straightforwardly by building the stack as a linked list (creating an
|
|
||||||
inverted tree--a tree of all stack states with only parent-pointer links),
|
|
||||||
and hashing the "zone to be pushed" and the current stack to find the new
|
|
||||||
stack. Thus the cost of the hash computation is essentially identical to
|
|
||||||
the previous case. If every zone is only called from one specific place,
|
|
||||||
there will still only be one data-gathering slot per zone; if a routine is
|
|
||||||
recursive, it will create a large number of data-gathering slots, one for
|
|
||||||
each depth of recursion. (A complex mutually recursive program like a
|
|
||||||
compiler might generate an unreasonable number of unique states.)
|
|
||||||
|
|
||||||
With zone-stack tracking, it's possible to measure only either hierarchical
|
|
||||||
time or self-time and derive the other. Hierarchical time is actually more
|
|
||||||
efficient to measure, but it leaves handling the top-level overarching
|
|
||||||
global state as a special case (since it will have a timer that starts but
|
|
||||||
never ends). It's easier to instead measure self-time and rederive
|
|
||||||
hierarchical time. Moreover, a recursive routine will automatically
|
|
||||||
"overcount" hierchical time (it's accrued at each level of the hierarchy),
|
|
||||||
requiring significant fixup. It's more straightforward to just compute the
|
|
||||||
recursive data correctly from the self times in the first place.
|
|
||||||
|
|
||||||
|
|
||||||
_Hash Cacheing_
|
|
||||||
|
|
||||||
Although the hash lookup described above is coded to proceed as quickly as
|
|
||||||
possible if the hash hits on the first probe, it still requires enough
|
|
||||||
computation and a function call that it is worth avoiding if possible. To
|
|
||||||
that end, each zone-entry location declares a hidden static variable
|
|
||||||
private to that zone-entry point which caches the hash lookup. At zone-
|
|
||||||
entry, the code checks the cache's "next node in the linked list" field
|
|
||||||
with the current stack state. If the two are equal, then the cache is
|
|
||||||
valid, and no hash lookup occurs. If it does not much, then the cache is
|
|
||||||
wrong, and the hash lookup proceeds, and updates the cache. The cache is
|
|
||||||
initialized to a impossible value, so the first time the code is run a hash
|
|
||||||
lookup always occurs.
|
|
||||||
|
|
||||||
The result is that in the normal case, a routine called from a single
|
|
||||||
place, the cache is always valid (after the very first call). Furthermore,
|
|
||||||
the branch will always predict correctly, since it always branches
|
|
||||||
identically. However, for a routine that is called from several places,
|
|
||||||
there is a "switching" overhead each time it's called from a different
|
|
||||||
place. So, for example, a raycaster called by both physics and AI might pay
|
|
||||||
the overhead twice per frame, if all the AI calls occur before all the
|
|
||||||
physics calls. However, a common low-level routine (e.g. a vector add)
|
|
||||||
called alternately from two different zones would have to perform the hash
|
|
||||||
lookup every time.
|
|
||||||
|
|
||||||
The actual common "failure" case is a recursive routine, for which, each
|
|
||||||
time the routine is entered, the state of the call stack is different from
|
|
||||||
the last time, thus almost always paying the hash lookup case. For
|
|
||||||
something like a recursive linked list traversal, the hash occurs every
|
|
||||||
time. (It doesn't matter if the routine is tail-recursive; once you insert
|
|
||||||
the profiling instrumentation, it's no longer tail-recursive.) A full
|
|
||||||
binary tree traversal will always enter a different zone-stack-state from
|
|
||||||
last time, except after reaching a left-child leaf. (The recursion then
|
|
||||||
returns and then goes down to the right child, which is at the same height
|
|
||||||
as the left child.) So a full binary tree traversal will have to hash about
|
|
||||||
3/4 of the time. A full quadtree traversal will have to hash about 2/5 of
|
|
||||||
the time. If the traversal is doing anything complicated, this should not
|
|
||||||
be a problem; but if it's a simple traversal, the performance overhead may
|
|
||||||
be significant. Like the vector add case, it may be better to remove
|
|
||||||
instrumentation from low-inherent-cost recursive routines except when
|
|
||||||
absolutely needed. Of course, it's easy enough to compare performance
|
|
||||||
behavior before and after adding the instrumenting and see if the overhead
|
|
||||||
is acceptable.
|
|
||||||
|
|
||||||
|
|
||||||
VERSION HISTORY
|
|
||||||
|
|
||||||
version 0.2 -- 2003-02-06 STB
|
|
||||||
- Significant interface changes to Prof_draw_gl:
|
|
||||||
- accepts floating point instead of int for 2d screen metrics
|
|
||||||
- accepts a total width and height of the display and conforms
|
|
||||||
to that
|
|
||||||
- accepts a precision specification for display of time values
|
|
||||||
- added little '+' and '-' signs reminiscent of list displays
|
|
||||||
so you know which ones can be drilled down on
|
|
||||||
- expanded this doc's description of what's legal for a zone-name
|
|
||||||
- fixed an error trying to compile the C files as C++
|
|
||||||
- added Prof_select_parent() for moving up the tree
|
|
||||||
|
|
||||||
version 0.1 -- 2003-02-05 STB
|
|
||||||
- First public version, heavily refactored, 1500 lines
|
|
||||||
- win32 timing interface and smooth "moving average" code derived
|
|
||||||
from Jonathan Blow's Game Developer Magazine articles
|
|
||||||
- missing functionality:
|
|
||||||
- correct attribution of time to zones that are parents of
|
|
||||||
recursive zones in call graph view (hierarchical times don't
|
|
||||||
bubble up correctly)
|
|
||||||
- spread recursion display (displaying each depth of a recursive
|
|
||||||
zone as if it were a separate zone)
|
|
|
@ -1,94 +0,0 @@
|
||||||
#ifndef Prof_INC_PROF_H
|
|
||||||
#define Prof_INC_PROF_H
|
|
||||||
|
|
||||||
|
|
||||||
//#define Prof_ENABLED
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "prof_gather.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Prof_update
|
|
||||||
*
|
|
||||||
* Pass in true (1) to accumulate history info; pass
|
|
||||||
* in false (0) to throw away the current frame's data
|
|
||||||
*/
|
|
||||||
extern void Prof_update(int record);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Prof_draw_gl -- display the current report via OpenGL
|
|
||||||
*
|
|
||||||
* You must provide a callable text-printing function.
|
|
||||||
* Put the opengl state into a 2d rendering mode.
|
|
||||||
*
|
|
||||||
* Parameters:
|
|
||||||
* <sx,sy> -- location where top line is drawn
|
|
||||||
* <width, height> -- total size of display (if too small, text will overprint)
|
|
||||||
* line_spacing -- how much to move sy by after each line; use a
|
|
||||||
* negative value if y decreases down the screen
|
|
||||||
* precision -- decimal places of precision for time data, 1..4 (try 2)
|
|
||||||
* print_text -- function to display a line of text starting at the
|
|
||||||
* given coordinate; best if 0,1..9 are fixed-width
|
|
||||||
* text_width -- a function that computes the pixel-width of
|
|
||||||
* a given string before printing. you can fake with a
|
|
||||||
* simple approximation of width('0')*strlen(str)
|
|
||||||
*
|
|
||||||
* to avoid overprinting, you can make print_text truncate long strings
|
|
||||||
*/
|
|
||||||
extern void Prof_draw_gl(float sx, float sy,
|
|
||||||
float width, float height,
|
|
||||||
float line_spacing,
|
|
||||||
int precision,
|
|
||||||
void (*print_text)(float x, float y, char *str),
|
|
||||||
float (*text_width)(char *str));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Parameters
|
|
||||||
* <sx, sy> -- origin of the graph--location of (0,0)
|
|
||||||
* x_spacing -- screenspace size of each history sample; e.g.
|
|
||||||
* 2.0 pixels
|
|
||||||
* y_spacing -- screenspace size of one millisecond of time;
|
|
||||||
* for an app with max of 20ms in any one zone,
|
|
||||||
* 8.0 would produce a 160-pixel tall display,
|
|
||||||
* assuming screenspace is in pixels
|
|
||||||
*/
|
|
||||||
extern void Prof_draw_graph_gl(float sx, float sy,
|
|
||||||
float x_spacing, float y_spacing);
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
Prof_SELF_TIME,
|
|
||||||
Prof_HIERARCHICAL_TIME,
|
|
||||||
Prof_CALL_GRAPH,
|
|
||||||
} Prof_Report_Mode;
|
|
||||||
|
|
||||||
extern void Prof_set_report_mode(Prof_Report_Mode e);
|
|
||||||
extern void Prof_move_cursor(int delta);
|
|
||||||
extern void Prof_select(void);
|
|
||||||
extern void Prof_select_parent(void);
|
|
||||||
extern void Prof_move_frame(int delta);
|
|
||||||
|
|
||||||
extern void Prof_set_smoothing(int smoothing_mode);
|
|
||||||
extern void Prof_set_frame(int frame);
|
|
||||||
extern void Prof_set_cursor(int line);
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
Prof_FLATTEN_RECURSION,
|
|
||||||
Prof_SPREAD_RECURSION
|
|
||||||
} Prof_Recursion_Mode;
|
|
||||||
|
|
||||||
extern void Prof_set_recursion(Prof_Recursion_Mode e);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // Prof_INC_PROF_H
|
|
||||||
|
|
||||||
|
|
|
@ -1,334 +0,0 @@
|
||||||
#ifdef WIN32
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
#include <gl/gl.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "prof.h"
|
|
||||||
#include "prof_internal.h"
|
|
||||||
|
|
||||||
#pragma warning(disable:4305; disable:4244)
|
|
||||||
|
|
||||||
// use factor to compute a glow amount
|
|
||||||
static int get_colors(float factor,
|
|
||||||
float text_color_ret[3],
|
|
||||||
float glow_color_ret[3],
|
|
||||||
float *glow_alpha_ret)
|
|
||||||
{
|
|
||||||
const float GLOW_RANGE = 0.5f;
|
|
||||||
const float GLOW_ALPHA_MAX = 0.5f;
|
|
||||||
float glow_alpha;
|
|
||||||
int i;
|
|
||||||
float hot[3] = {1, 1.0, 0.9};
|
|
||||||
float cold[3] = {0.15, 0.9, 0.15};
|
|
||||||
|
|
||||||
float glow_cold[3] = {0.5f, 0.5f, 0};
|
|
||||||
float glow_hot[3] = {1.0f, 1.0f, 0};
|
|
||||||
|
|
||||||
if (factor < 0) factor = 0;
|
|
||||||
if (factor > 1) factor = 1;
|
|
||||||
|
|
||||||
for (i=0; i < 3; ++i)
|
|
||||||
text_color_ret[i] = cold[i] + (hot[i] - cold[i]) * factor;
|
|
||||||
|
|
||||||
// Figure out whether to start up the glow as well.
|
|
||||||
glow_alpha = (factor - GLOW_RANGE) / (1 - GLOW_RANGE);
|
|
||||||
if (glow_alpha < 0) {
|
|
||||||
*glow_alpha_ret = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i=0; i < 3; ++i)
|
|
||||||
glow_color_ret[i] = glow_cold[i] + (glow_hot[i] - glow_cold[i]) * factor;
|
|
||||||
|
|
||||||
*glow_alpha_ret = glow_alpha * GLOW_ALPHA_MAX;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void draw_rectangle(float x0, float y0, float x1, float y1)
|
|
||||||
{
|
|
||||||
// FACE_CULL is disabled so winding doesn't matter
|
|
||||||
glVertex2f(x0, y0);
|
|
||||||
glVertex2f(x1, y0);
|
|
||||||
glVertex2f(x1, y1);
|
|
||||||
glVertex2f(x0, y1);
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
float x0,y0;
|
|
||||||
float sx,sy;
|
|
||||||
} GraphLocation;
|
|
||||||
|
|
||||||
static void graph_func(int id, int x0, int x1, float *values, void *data)
|
|
||||||
{
|
|
||||||
GraphLocation *loc = (GraphLocation *) data;
|
|
||||||
int i, r,g,b;
|
|
||||||
|
|
||||||
// trim out values that are under 0.2 ms to accelerate rendering
|
|
||||||
while (x0 < x1 && (*values < 0.0002f)) { ++x0; ++values; }
|
|
||||||
while (x1 > x0 && (values[x1-1-x0] < 0.0002f)) --x1;
|
|
||||||
|
|
||||||
if (id == 0)
|
|
||||||
glColor4f(1,1,1,0.5);
|
|
||||||
else {
|
|
||||||
if (x0 == x1) return;
|
|
||||||
|
|
||||||
id = (id >> 8) + id;
|
|
||||||
r = id * 37;
|
|
||||||
g = id * 59;
|
|
||||||
b = id * 45;
|
|
||||||
#pragma warning(disable:4761)
|
|
||||||
glColor3ub((r & 127) + 80, (g & 127) + 80, (b & 127) + 80);
|
|
||||||
}
|
|
||||||
|
|
||||||
glBegin(GL_LINE_STRIP);
|
|
||||||
if (x0 == x1) {
|
|
||||||
float x,y;
|
|
||||||
x = loc->x0 + x0 * loc->sx;
|
|
||||||
y = loc->y0 + values[0] * loc->sy;
|
|
||||||
glVertex2f(x,loc->y0);
|
|
||||||
glVertex2f(x, y);
|
|
||||||
}
|
|
||||||
for (i=0; i < x1-x0; ++i) {
|
|
||||||
float x,y;
|
|
||||||
x = loc->x0 + (i+x0) * loc->sx;
|
|
||||||
y = loc->y0 + values[i] * loc->sy;
|
|
||||||
glVertex2f(x,y);
|
|
||||||
}
|
|
||||||
glEnd();
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_draw_graph_gl(float sx, float sy, float x_spacing, float y_spacing)
|
|
||||||
{
|
|
||||||
#ifdef Prof_ENABLED
|
|
||||||
Prof_Begin(iprof_draw_graph)
|
|
||||||
GraphLocation loc = { sx, sy, x_spacing, y_spacing * 1000 };
|
|
||||||
Prof_graph(128, graph_func, &loc);
|
|
||||||
Prof_End
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// float to string conversion with sprintf() was
|
|
||||||
// taking up 10-20% of the Prof_draw time, so I
|
|
||||||
// wrote a faster float-to-string converter
|
|
||||||
|
|
||||||
static char int_to_string[100][4];
|
|
||||||
static char int_to_string_decimal[100][4];
|
|
||||||
static char int_to_string_mid_decimal[100][4];
|
|
||||||
static void int_to_string_init(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i=0; i < 100; ++i) {
|
|
||||||
sprintf(int_to_string[i], "%d", i);
|
|
||||||
sprintf(int_to_string_decimal[i], ".%02d", i);
|
|
||||||
sprintf(int_to_string_mid_decimal[i], "%d.%d", i/10, i % 10);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static char *formats[5] =
|
|
||||||
{
|
|
||||||
"%.0f",
|
|
||||||
"%.1f",
|
|
||||||
"%.2f",
|
|
||||||
"%.3f",
|
|
||||||
"%.4f",
|
|
||||||
};
|
|
||||||
|
|
||||||
static void float_to_string(char *buf, float num, int precision)
|
|
||||||
{
|
|
||||||
int x,y;
|
|
||||||
switch(precision) {
|
|
||||||
case 2:
|
|
||||||
if (num < 0 || num >= 100)
|
|
||||||
break;
|
|
||||||
x = num;
|
|
||||||
y = (num - x) * 100;
|
|
||||||
strcpy(buf, int_to_string[x]);
|
|
||||||
strcat(buf, int_to_string_decimal[y]);
|
|
||||||
return;
|
|
||||||
case 3:
|
|
||||||
if (num < 0 || num >= 10)
|
|
||||||
break;
|
|
||||||
num *= 10;
|
|
||||||
x = num;
|
|
||||||
y = (num - x) * 100;
|
|
||||||
strcpy(buf, int_to_string_mid_decimal[x]);
|
|
||||||
strcat(buf, int_to_string_decimal[y]+1);
|
|
||||||
return;
|
|
||||||
case 4:
|
|
||||||
if (num < 0 || num >= 1)
|
|
||||||
break;
|
|
||||||
num *= 100;
|
|
||||||
x = num;
|
|
||||||
y = (num - x) * 100;
|
|
||||||
buf[0] = '0';
|
|
||||||
strcpy(buf+1, int_to_string_decimal[x]);
|
|
||||||
strcat(buf, int_to_string_decimal[y]+1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
sprintf(buf, formats[precision], num);
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_draw_gl(float sx, float sy,
|
|
||||||
float full_width, float height,
|
|
||||||
float line_spacing, int precision,
|
|
||||||
void (*printText)(float x, float y, char *str), float (*textWidth)(char *str))
|
|
||||||
{
|
|
||||||
#ifdef Prof_ENABLED
|
|
||||||
Prof_Begin(iprof_draw)
|
|
||||||
|
|
||||||
int i,j,n,o;
|
|
||||||
GLuint cull, texture;
|
|
||||||
float backup_sy;
|
|
||||||
|
|
||||||
float field_width = textWidth("5555.55");
|
|
||||||
float name_width = full_width - field_width * 3;
|
|
||||||
float plus_width = textWidth("+");
|
|
||||||
|
|
||||||
int max_records;
|
|
||||||
|
|
||||||
Prof_Report *pob;
|
|
||||||
|
|
||||||
if (!int_to_string[0][0]) int_to_string_init();
|
|
||||||
|
|
||||||
if (precision < 1) precision = 1;
|
|
||||||
if (precision > 4) precision = 4;
|
|
||||||
|
|
||||||
// disable face culling to avoid having to get winding correct
|
|
||||||
texture = glIsEnabled(GL_TEXTURE_2D);
|
|
||||||
cull = glIsEnabled(GL_CULL_FACE);
|
|
||||||
if (cull == GL_TRUE) {
|
|
||||||
glDisable(GL_CULL_FACE);
|
|
||||||
}
|
|
||||||
|
|
||||||
pob = Prof_create_report();
|
|
||||||
|
|
||||||
for (i=0; i < NUM_TITLE; ++i) {
|
|
||||||
if (pob->title[i]) {
|
|
||||||
float header_x0 = sx;
|
|
||||||
float header_x1 = header_x0 + full_width;
|
|
||||||
|
|
||||||
if (i == 0)
|
|
||||||
glColor4f(0.1f, 0.3f, 0, 0.85);
|
|
||||||
else
|
|
||||||
glColor4f(0.2f, 0.1f, 0.1f, 0.85);
|
|
||||||
|
|
||||||
glBegin(GL_QUADS);
|
|
||||||
draw_rectangle(header_x0, sy-2, header_x1, sy-line_spacing+2);
|
|
||||||
glEnd();
|
|
||||||
|
|
||||||
if (i == 0)
|
|
||||||
glColor4f(0.6, 0.4, 0, 0);
|
|
||||||
else
|
|
||||||
glColor4f(0.8f, 0.1f, 0.1f, 0);
|
|
||||||
|
|
||||||
printText(sx+2, sy, pob->title[i]);
|
|
||||||
|
|
||||||
sy += 1.5*line_spacing;
|
|
||||||
height -= abs(line_spacing)*1.5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
max_records = height / abs(line_spacing);
|
|
||||||
|
|
||||||
o = 0;
|
|
||||||
n = pob->num_record;
|
|
||||||
if (n > max_records) n = max_records;
|
|
||||||
if (pob->hilight >= o + n) {
|
|
||||||
o = pob->hilight - n + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
backup_sy = sy;
|
|
||||||
|
|
||||||
// Draw the background colors for the zone data.
|
|
||||||
glDisable(GL_TEXTURE_2D);
|
|
||||||
glBegin(GL_QUADS);
|
|
||||||
|
|
||||||
glColor4f(0,0,0,0.85);
|
|
||||||
draw_rectangle(sx, sy, sx + full_width, sy - line_spacing);
|
|
||||||
sy += line_spacing;
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
float y0, y1;
|
|
||||||
|
|
||||||
if (i & 1) {
|
|
||||||
glColor4f(0.1, 0.1f, 0.2, 0.85);
|
|
||||||
} else {
|
|
||||||
glColor4f(0.1f, 0.1f, 0.3, 0.85);
|
|
||||||
}
|
|
||||||
if (i+o == pob->hilight)
|
|
||||||
glColor4f(0.3f, 0.3f, 0.1f, 0.85);
|
|
||||||
|
|
||||||
y0 = sy;
|
|
||||||
y1 = sy - line_spacing;
|
|
||||||
|
|
||||||
draw_rectangle(sx, y0, sx + full_width, y1);
|
|
||||||
sy += line_spacing;
|
|
||||||
}
|
|
||||||
glEnd();
|
|
||||||
|
|
||||||
sy = backup_sy;
|
|
||||||
glColor4f(0.7,0.7,0.7,0);
|
|
||||||
|
|
||||||
if (pob->header[0])
|
|
||||||
printText(sx+8, sy, pob->header[0]);
|
|
||||||
|
|
||||||
for (j=1; j < NUM_HEADER; ++j)
|
|
||||||
if (pob->header[j])
|
|
||||||
printText(sx + name_width + field_width * (j-1) +
|
|
||||||
field_width/2 - textWidth(pob->header[j])/2, sy, pob->header[j]);
|
|
||||||
|
|
||||||
sy += line_spacing;
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
char buf[256], *b = buf;
|
|
||||||
Prof_Report_Record *r = &pob->record[i+o];
|
|
||||||
float text_color[3], glow_color[3];
|
|
||||||
float glow_alpha;
|
|
||||||
float x = sx + textWidth(" ") * r->indent + plus_width/2;
|
|
||||||
if (r->prefix) {
|
|
||||||
buf[0] = r->prefix;
|
|
||||||
++b;
|
|
||||||
} else {
|
|
||||||
x += plus_width;
|
|
||||||
}
|
|
||||||
if (r->number)
|
|
||||||
sprintf(b, "%s (%d)", r->name, r->number);
|
|
||||||
else
|
|
||||||
sprintf(b, "%s", r->name);
|
|
||||||
if (get_colors(r->heat, text_color, glow_color, &glow_alpha)) {
|
|
||||||
glColor4f(glow_color[0], glow_color[1], glow_color[2], glow_alpha);
|
|
||||||
//printText(x+2, sy-1, buf);
|
|
||||||
printText(x+1, sy, buf);
|
|
||||||
}
|
|
||||||
glColor3fv(text_color);
|
|
||||||
printText(x + 1, sy, buf);
|
|
||||||
|
|
||||||
for (j=0; j < NUM_VALUES; ++j) {
|
|
||||||
if (r->value_flag & (1 << j)) {
|
|
||||||
int pad;
|
|
||||||
float_to_string(buf, r->values[j], j == 2 ? 2 : precision);
|
|
||||||
pad = field_width- plus_width - textWidth(buf);
|
|
||||||
if (r->indent) pad += plus_width;
|
|
||||||
printText(sx + pad + name_width + field_width * j, sy, buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
sy += line_spacing;
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_free_report(pob);
|
|
||||||
|
|
||||||
if (cull == GL_TRUE)
|
|
||||||
glEnable(GL_CULL_FACE);
|
|
||||||
if (texture == GL_TRUE)
|
|
||||||
glEnable(GL_TEXTURE_2D);
|
|
||||||
|
|
||||||
Prof_End
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,166 +0,0 @@
|
||||||
#include <assert.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include "prof.h"
|
|
||||||
#include "prof_internal.h"
|
|
||||||
|
|
||||||
Prof_Define(_global);
|
|
||||||
|
|
||||||
Prof_Zone_Stack Prof_dummy ; // impossible parent
|
|
||||||
Prof_Zone_Stack Prof_dummy2 ;
|
|
||||||
Prof_Zone_Stack *Prof_stack = &Prof_dummy2;
|
|
||||||
|
|
||||||
int Prof_num_zones;
|
|
||||||
Prof_Zone *Prof_zones[];
|
|
||||||
|
|
||||||
#define MAX_HASH_SIZE 65536 // not unlimited, to catch unbalanced BEGIN/END_PROF
|
|
||||||
#define INIT_HASH_SIZE 256 // balance resource usage and avoid initial growth
|
|
||||||
|
|
||||||
static Prof_Zone_Stack *init_hash[] = { &Prof_dummy };
|
|
||||||
static Prof_Zone_Stack **zone_hash = init_hash;
|
|
||||||
static int zone_hash_count = 1;
|
|
||||||
static int zone_hash_max = 1;
|
|
||||||
static int zone_hash_mask = 0;
|
|
||||||
|
|
||||||
static int hash(Prof_Zone *z, Prof_Zone_Stack *s)
|
|
||||||
{
|
|
||||||
int n = (int) z + (int) s;
|
|
||||||
return n + (n >> 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void insert_node(Prof_Zone_Stack *q)
|
|
||||||
{
|
|
||||||
int h = hash(q->zone, q->parent);
|
|
||||||
int x = h & zone_hash_mask;
|
|
||||||
int s = ((h << 4) + (h >> 4)) | 1;
|
|
||||||
|
|
||||||
while (zone_hash[x] != &Prof_dummy)
|
|
||||||
x = (x + s) & zone_hash_mask;
|
|
||||||
|
|
||||||
zone_hash[x] = q;
|
|
||||||
|
|
||||||
++zone_hash_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void init_zone(Prof_Zone *zone)
|
|
||||||
{
|
|
||||||
Prof_zones[Prof_num_zones++] = zone;
|
|
||||||
|
|
||||||
zone->initialized = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int count_recursion_depth(Prof_Zone_Stack *stack, Prof_Zone *zone)
|
|
||||||
{
|
|
||||||
int n=0;
|
|
||||||
while (stack) {
|
|
||||||
if (stack->zone == zone)
|
|
||||||
++n;
|
|
||||||
stack = stack->parent;
|
|
||||||
}
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static Prof_Zone_Stack *createStackNode(Prof_Zone *zone, Prof_Zone_Stack *parent)
|
|
||||||
{
|
|
||||||
// create a new node
|
|
||||||
Prof_Zone_Stack *z = (Prof_Zone_Stack *) malloc(sizeof(*z));
|
|
||||||
z->zone = zone;
|
|
||||||
z->parent = parent;
|
|
||||||
z->total_entry_count = 0;
|
|
||||||
z->total_hier_ticks = 0;
|
|
||||||
z->total_self_ticks = 0;
|
|
||||||
z->t_self_start = 0;
|
|
||||||
z->highlevel = NULL;
|
|
||||||
z->recursion_depth = count_recursion_depth(parent, zone);
|
|
||||||
return z;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void init_zone_hash(int size)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
assert(size <= MAX_HASH_SIZE);
|
|
||||||
zone_hash_max = size;
|
|
||||||
zone_hash_count = 0;
|
|
||||||
zone_hash = (Prof_Zone_Stack **) malloc(sizeof(*zone_hash) * zone_hash_max);
|
|
||||||
zone_hash_mask = size-1;
|
|
||||||
for (i=0; i < zone_hash_max; ++i)
|
|
||||||
zone_hash[i] = &Prof_dummy;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void Prof_init_lowlevel(void);
|
|
||||||
|
|
||||||
// this code is structured to minimize computation
|
|
||||||
// assuming there's a hit in the very first slot
|
|
||||||
Prof_extern_C Prof_Zone_Stack *Prof_StackAppend(Prof_Zone *zone)
|
|
||||||
{
|
|
||||||
int h = hash(zone, Prof_stack), s;
|
|
||||||
int x = h & zone_hash_mask;
|
|
||||||
Prof_Zone_Stack *z = zone_hash[x];
|
|
||||||
if (z->parent == Prof_stack && z->zone == zone) return z;
|
|
||||||
if (z != &Prof_dummy) {
|
|
||||||
|
|
||||||
// compute a secondary hash function; force it to be odd
|
|
||||||
// so it's relatively prime to the power-of-two table size
|
|
||||||
s = ((h << 4) + (h >> 4)) | 1;
|
|
||||||
for(;;) {
|
|
||||||
x = (x + s) & zone_hash_mask;
|
|
||||||
z = zone_hash[x];
|
|
||||||
if (z->parent == Prof_stack && z->zone == zone) return z;
|
|
||||||
if (z == &Prof_dummy) break;
|
|
||||||
}
|
|
||||||
// loop is guaranteed to terminate because the hash table is never full
|
|
||||||
}
|
|
||||||
|
|
||||||
// now's as good a time as any to initialize this zone
|
|
||||||
if (!zone->initialized) {
|
|
||||||
if (zone_hash_max == 1) {
|
|
||||||
Prof_init_lowlevel();
|
|
||||||
// the above is reentrant since it initializes _global
|
|
||||||
// so now invariants are broken, so start over
|
|
||||||
return Prof_StackAppend(zone);
|
|
||||||
}
|
|
||||||
init_zone(zone);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if we need to grow the table
|
|
||||||
// we keep it at most 1/2 full to be very fast
|
|
||||||
if (zone_hash_count*2 > zone_hash_max) {
|
|
||||||
Prof_Zone_Stack **old_hash = zone_hash, *z;
|
|
||||||
int i,n = zone_hash_max;
|
|
||||||
|
|
||||||
init_zone_hash(zone_hash_max*2);
|
|
||||||
|
|
||||||
for (i=0; i < n; ++i)
|
|
||||||
if (old_hash[i] != &Prof_dummy)
|
|
||||||
insert_node(old_hash[i]);
|
|
||||||
|
|
||||||
z = createStackNode(zone, Prof_stack);
|
|
||||||
insert_node(z);
|
|
||||||
return z;
|
|
||||||
}
|
|
||||||
|
|
||||||
// insert new entry in hash table
|
|
||||||
++zone_hash_count;
|
|
||||||
return zone_hash[x] = createStackNode(zone, Prof_stack);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Prof_traverse(void (*func)(Prof_Zone_Stack *z))
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i=0; i < zone_hash_max; ++i)
|
|
||||||
if (zone_hash[i] != &Prof_dummy)
|
|
||||||
func(zone_hash[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void Prof_init_lowlevel(void)
|
|
||||||
{
|
|
||||||
init_zone_hash(INIT_HASH_SIZE);
|
|
||||||
|
|
||||||
Prof_init_highlevel();
|
|
||||||
|
|
||||||
// intentionally unbalanced, this wraps everything else
|
|
||||||
{
|
|
||||||
Prof_Region(_global)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,152 +0,0 @@
|
||||||
#ifndef INC_PROFILER_LOWLEVEL_H
|
|
||||||
#define INC_PROFILER_LOWLEVEL_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
#define Prof_C "C"
|
|
||||||
#define Prof_extern_C extern "C"
|
|
||||||
#define Prof_dummy_declare
|
|
||||||
#else
|
|
||||||
#define Prof_C
|
|
||||||
#define Prof_extern_C
|
|
||||||
#define Prof_dummy_declare int Prof_dummy_dec =
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
#include "prof_win32.h"
|
|
||||||
#else
|
|
||||||
#error "need to define Prof_get_timestamp() and Prof_Int64"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
char * name;
|
|
||||||
void * highlevel;
|
|
||||||
char initialized;
|
|
||||||
char visited;
|
|
||||||
char pad0,pad1;
|
|
||||||
} Prof_Zone;
|
|
||||||
|
|
||||||
typedef struct Prof_Zone_Stack
|
|
||||||
{
|
|
||||||
Prof_Int64 t_self_start;
|
|
||||||
|
|
||||||
Prof_Int64 total_self_ticks;
|
|
||||||
Prof_Int64 total_hier_ticks;
|
|
||||||
|
|
||||||
unsigned int total_entry_count;
|
|
||||||
|
|
||||||
struct Prof_Zone_Stack * parent;
|
|
||||||
Prof_Zone * zone;
|
|
||||||
int recursion_depth;
|
|
||||||
|
|
||||||
void * highlevel;
|
|
||||||
} Prof_Zone_Stack;
|
|
||||||
|
|
||||||
|
|
||||||
extern Prof_C Prof_Zone_Stack * Prof_stack; // current Zone stack
|
|
||||||
extern Prof_C Prof_Zone_Stack Prof_dummy; // parent never matches
|
|
||||||
|
|
||||||
extern Prof_C Prof_Zone_Stack * Prof_StackAppend(Prof_Zone *zone);
|
|
||||||
// return the zone stack created by pushing 'zone' on the current
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef Prof_ENABLED
|
|
||||||
|
|
||||||
static Prof_Int64 Prof_time;
|
|
||||||
|
|
||||||
#define Prof_Begin_Cache(z) \
|
|
||||||
/* declare a static cache of the zone stack */ \
|
|
||||||
static Prof_Zone_Stack *Prof_cache = &Prof_dummy
|
|
||||||
|
|
||||||
#define Prof_Begin_Raw(z) \
|
|
||||||
Prof_Begin_Cache(z); \
|
|
||||||
Prof_Begin_Code(z)
|
|
||||||
|
|
||||||
#define Prof_Begin_Code(z) \
|
|
||||||
Prof_dummy_declare ( \
|
|
||||||
\
|
|
||||||
/* check the cached Zone_Stack and update if needed */ \
|
|
||||||
(Prof_cache->parent != Prof_stack \
|
|
||||||
? Prof_cache = Prof_StackAppend(&z) \
|
|
||||||
: 0), \
|
|
||||||
\
|
|
||||||
++Prof_cache->total_entry_count, \
|
|
||||||
Prof_get_timestamp(&Prof_time), \
|
|
||||||
\
|
|
||||||
/* stop the timer on the parent zone stack */ \
|
|
||||||
(Prof_stack->total_self_ticks += \
|
|
||||||
Prof_time - Prof_stack->t_self_start), \
|
|
||||||
\
|
|
||||||
/* make cached stack current */ \
|
|
||||||
Prof_stack = Prof_cache, \
|
|
||||||
\
|
|
||||||
/* start the timer on this stack */ \
|
|
||||||
Prof_stack->t_self_start = Prof_time, \
|
|
||||||
0)
|
|
||||||
|
|
||||||
#define Prof_End_Raw() \
|
|
||||||
\
|
|
||||||
(Prof_get_timestamp(&Prof_time), \
|
|
||||||
\
|
|
||||||
/* stop timer for current zone stack */ \
|
|
||||||
Prof_stack->total_self_ticks += \
|
|
||||||
Prof_time - Prof_stack->t_self_start, \
|
|
||||||
\
|
|
||||||
/* make parent chain current */ \
|
|
||||||
Prof_stack = Prof_stack->parent, \
|
|
||||||
\
|
|
||||||
/* start timer for parent zone stack */ \
|
|
||||||
Prof_stack->t_self_start = Prof_time)
|
|
||||||
|
|
||||||
|
|
||||||
#define Prof_Declare(z) Prof_Zone Prof_region_##z
|
|
||||||
#define Prof_Define(z) Prof_Declare(z) = { #z }
|
|
||||||
#define Prof_Region(z) Prof_Begin_Raw(Prof_region_##z);
|
|
||||||
#define Prof_End Prof_End_Raw();
|
|
||||||
|
|
||||||
#define Prof_Begin(z) static Prof_Define(z); Prof_Region(z)
|
|
||||||
#define Prof_Counter(z) Prof_Begin(z) Prof_End
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
|
|
||||||
#define Prof(x) static Prof_Define(x); Prof_Scope(x)
|
|
||||||
|
|
||||||
#define Prof_Scope(x) \
|
|
||||||
Prof_Begin_Cache(x); \
|
|
||||||
Prof_Scope_Var Prof_scope_var(Prof_region_ ## x, Prof_cache)
|
|
||||||
|
|
||||||
struct Prof_Scope_Var {
|
|
||||||
inline Prof_Scope_Var(Prof_Zone &zone, Prof_Zone_Stack * &Prof_cache);
|
|
||||||
inline ~Prof_Scope_Var();
|
|
||||||
};
|
|
||||||
|
|
||||||
inline Prof_Scope_Var::Prof_Scope_Var(Prof_Zone &zone, Prof_Zone_Stack * &Prof_cache) {
|
|
||||||
Prof_Begin_Code(zone);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Prof_Scope_Var::~Prof_Scope_Var() {
|
|
||||||
Prof_End_Raw();
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#else // ifdef Prof_ENABLED
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
#define Prof(x)
|
|
||||||
#define Prof_Scope(x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define Prof_Define(name)
|
|
||||||
#define Prof_Begin(z)
|
|
||||||
#define Prof_End
|
|
||||||
#define Prof_Region(z)
|
|
||||||
#define Prof_Counter(z)
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // INC_PROFILER_LOWLEVEL_H
|
|
||||||
|
|
|
@ -1,53 +0,0 @@
|
||||||
#ifndef Prof_INC_PROF_INTERNAL_H
|
|
||||||
#define Prof_INC_PROF_INTERNAL_H
|
|
||||||
|
|
||||||
// report functions
|
|
||||||
|
|
||||||
#define NUM_VALUES 4
|
|
||||||
#define NUM_TITLE 2
|
|
||||||
#define NUM_HEADER (NUM_VALUES+1)
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int indent;
|
|
||||||
char *name;
|
|
||||||
int number;
|
|
||||||
char prefix;
|
|
||||||
int value_flag;
|
|
||||||
double values[NUM_VALUES];
|
|
||||||
double heat;
|
|
||||||
|
|
||||||
// used internally
|
|
||||||
void *zone;
|
|
||||||
} Prof_Report_Record;
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
char *title[NUM_TITLE];
|
|
||||||
char *header[NUM_HEADER];
|
|
||||||
int num_record;
|
|
||||||
int hilight;
|
|
||||||
Prof_Report_Record *record;
|
|
||||||
} Prof_Report;
|
|
||||||
|
|
||||||
extern void Prof_free_report(Prof_Report *z);
|
|
||||||
extern Prof_Report *Prof_create_report(void);
|
|
||||||
|
|
||||||
|
|
||||||
// really internal functions
|
|
||||||
|
|
||||||
extern void Prof_graph(int num_frames,
|
|
||||||
void (*callback)(int id, int x0, int x1, float *values, void *data),
|
|
||||||
void *data);
|
|
||||||
|
|
||||||
extern void Prof_init_highlevel();
|
|
||||||
|
|
||||||
extern double Prof_get_time(void);
|
|
||||||
|
|
||||||
extern int Prof_num_zones;
|
|
||||||
extern Prof_Zone *Prof_zones[];
|
|
||||||
|
|
||||||
extern Prof_Declare(_global);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,774 +0,0 @@
|
||||||
#include <math.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include "prof.h"
|
|
||||||
#include "prof_internal.h"
|
|
||||||
|
|
||||||
// whether zone-self-data is kept to allow the history graph
|
|
||||||
#define Prof_ZONE_HISTORY
|
|
||||||
|
|
||||||
// whether full detailed (and large)
|
|
||||||
#define Prof_CALL_HISTORY
|
|
||||||
|
|
||||||
// number of frames of history to keep
|
|
||||||
#define NUM_FRAME_SLOTS 128
|
|
||||||
|
|
||||||
|
|
||||||
// number of unique zones allowed in the entire application
|
|
||||||
// @TODO: remove MAX_PROFILING_ZONES and make it dynamic
|
|
||||||
#define MAX_PROFILING_ZONES 512
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// the number of moving averages
|
|
||||||
#define NUM_PROFILE_TRACKER_HISTORY_SLOTS 3
|
|
||||||
|
|
||||||
// the number of frames to ignore before starting the moving averages
|
|
||||||
#define NUM_THROWAWAY_UPDATES 3
|
|
||||||
|
|
||||||
// threshhold for a moving average of an integer to be at zero
|
|
||||||
#define INT_ZERO_THRESHHOLD 0.25
|
|
||||||
|
|
||||||
Prof_Zone *Prof_zones[MAX_PROFILING_ZONES];
|
|
||||||
|
|
||||||
#ifdef Prof_ZONE_HISTORY
|
|
||||||
static float zone_history[MAX_PROFILING_ZONES][NUM_FRAME_SLOTS]; // 256K
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// these structures are used solely to track data over time
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
double values[NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
|
||||||
double variances[NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
|
||||||
#ifdef Prof_CALL_HISTORY
|
|
||||||
float history[NUM_FRAME_SLOTS];
|
|
||||||
#endif
|
|
||||||
} History_Scalar;
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
History_Scalar self_time;
|
|
||||||
History_Scalar hierarchical_time;
|
|
||||||
History_Scalar entry_count;
|
|
||||||
int max_recursion;
|
|
||||||
} Profile_Tracker_Data_Record;
|
|
||||||
|
|
||||||
static History_Scalar frame_time;
|
|
||||||
|
|
||||||
static double times_to_reach_90_percent[NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
|
||||||
static double precomputed_factors [NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
|
||||||
|
|
||||||
static int num_active_zones;
|
|
||||||
static int update_index; // 2^31 at 100fps = 280 days
|
|
||||||
static double last_update_time;
|
|
||||||
static Prof_Report_Mode displayed_quantity;
|
|
||||||
|
|
||||||
#define FRAME_TIME_INITIAL 0.001
|
|
||||||
|
|
||||||
static int history_index;
|
|
||||||
static int display_frame;
|
|
||||||
static int slot = 1;
|
|
||||||
|
|
||||||
static void clear(History_Scalar *s)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
|
||||||
s->values[i] = 0;
|
|
||||||
s->variances[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void update(History_Scalar *s, double new_value, double *k_array)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
double new_variance = new_value * new_value;
|
|
||||||
|
|
||||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
|
||||||
double k = k_array[i];
|
|
||||||
s->values[i] = s->values[i] * k + new_value * (1 - k);
|
|
||||||
s->variances[i] = s->variances[i] * k + new_variance * (1 - k);
|
|
||||||
}
|
|
||||||
#ifdef Prof_CALL_HISTORY
|
|
||||||
s->history[history_index] = (float) new_value;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void eternity_set(History_Scalar *s, double new_value)
|
|
||||||
{
|
|
||||||
double new_variance = new_value * new_value;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
|
||||||
s->values[i] = new_value;
|
|
||||||
s->variances[i] = new_variance;
|
|
||||||
}
|
|
||||||
#ifdef Prof_CALL_HISTORY
|
|
||||||
s->history[history_index] = (float) new_value;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static double get_value(History_Scalar *s)
|
|
||||||
{
|
|
||||||
#ifdef Prof_CALL_HISTORY
|
|
||||||
if (display_frame) {
|
|
||||||
return s->history[(history_index - display_frame + NUM_FRAME_SLOTS) % NUM_FRAME_SLOTS];
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return s->values[slot];
|
|
||||||
}
|
|
||||||
|
|
||||||
void Prof_init_highlevel()
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
|
|
||||||
update_index = 0;
|
|
||||||
last_update_time = 0;
|
|
||||||
|
|
||||||
times_to_reach_90_percent[0] = 0.1f;
|
|
||||||
times_to_reach_90_percent[1] = 0.8f;
|
|
||||||
times_to_reach_90_percent[2] = 2.5f;
|
|
||||||
|
|
||||||
displayed_quantity = Prof_SELF_TIME;
|
|
||||||
|
|
||||||
clear(&frame_time);
|
|
||||||
|
|
||||||
for (j = 0; j < NUM_PROFILE_TRACKER_HISTORY_SLOTS; j++) {
|
|
||||||
frame_time.values[j] = FRAME_TIME_INITIAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef Prof_ENABLED
|
|
||||||
static Prof_Zone *expand = &Prof_region__global;
|
|
||||||
#else
|
|
||||||
static Prof_Zone *expand = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_set_report_mode(Prof_Report_Mode desired)
|
|
||||||
{
|
|
||||||
displayed_quantity = desired;
|
|
||||||
}
|
|
||||||
|
|
||||||
// visit all Prof_Zone_Stacks
|
|
||||||
extern void Prof_traverse(void (*func)(Prof_Zone_Stack *c));
|
|
||||||
|
|
||||||
static void propogate_stack(Prof_Zone_Stack *c)
|
|
||||||
{
|
|
||||||
Prof_Zone_Stack *p = c;
|
|
||||||
|
|
||||||
// propogate times up the stack for hierarchical
|
|
||||||
// times, but watch out for recursion
|
|
||||||
|
|
||||||
while (p->zone) {
|
|
||||||
if (!p->zone->visited) {
|
|
||||||
p->total_hier_ticks += c->total_self_ticks;
|
|
||||||
p->zone->visited = 1;
|
|
||||||
}
|
|
||||||
p = p->parent;
|
|
||||||
}
|
|
||||||
p = c;
|
|
||||||
while (p->zone) {
|
|
||||||
p->zone->visited = 0;
|
|
||||||
p = p->parent;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void clear_stack(Prof_Zone_Stack *c)
|
|
||||||
{
|
|
||||||
c->total_hier_ticks = 0;
|
|
||||||
c->total_self_ticks = 0;
|
|
||||||
c->total_entry_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static double sum;
|
|
||||||
static void sum_times(Prof_Zone_Stack *c)
|
|
||||||
{
|
|
||||||
sum += c->total_self_ticks;
|
|
||||||
}
|
|
||||||
|
|
||||||
static double timestamps_to_seconds;
|
|
||||||
static void update_history(Prof_Zone_Stack *c)
|
|
||||||
{
|
|
||||||
double self_time, hier_time, entry_count;
|
|
||||||
|
|
||||||
Profile_Tracker_Data_Record *record = (Profile_Tracker_Data_Record *) c->highlevel;
|
|
||||||
Prof_Zone *z = c->zone;
|
|
||||||
|
|
||||||
if (record == NULL) {
|
|
||||||
record = (Profile_Tracker_Data_Record *) malloc(sizeof(*record));
|
|
||||||
c->highlevel = (void *) record;
|
|
||||||
clear(&record->entry_count);
|
|
||||||
clear(&record->self_time);
|
|
||||||
clear(&record->hierarchical_time);
|
|
||||||
record->max_recursion = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c->recursion_depth > record->max_recursion)
|
|
||||||
record->max_recursion = c->recursion_depth;
|
|
||||||
|
|
||||||
self_time = c->total_self_ticks * timestamps_to_seconds;
|
|
||||||
hier_time = c->total_hier_ticks * timestamps_to_seconds;
|
|
||||||
entry_count = c->total_entry_count;
|
|
||||||
|
|
||||||
if (update_index < NUM_THROWAWAY_UPDATES) {
|
|
||||||
eternity_set(&record->entry_count, entry_count);
|
|
||||||
eternity_set(&record->self_time, self_time);
|
|
||||||
eternity_set(&record->hierarchical_time, hier_time);
|
|
||||||
} else {
|
|
||||||
update(&record->self_time, self_time, precomputed_factors);
|
|
||||||
update(&record->hierarchical_time, hier_time, precomputed_factors);
|
|
||||||
update(&record->entry_count, entry_count, precomputed_factors);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef Prof_ZONE_HISTORY
|
|
||||||
* ((float *) z->highlevel) += (float) self_time;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
const double SPEEDSTEP_DETECTION_RATIO = 0.08;
|
|
||||||
static int speedstep_warning;
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_update(int record_data)
|
|
||||||
{
|
|
||||||
#ifdef Prof_ENABLED
|
|
||||||
Prof_Begin(iprof_update)
|
|
||||||
|
|
||||||
static History_Scalar integer_timestamps_per_second;
|
|
||||||
static Prof_Int64 last_integer_timestamp;
|
|
||||||
static Prof_Int64 current_integer_timestamp;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
double now, dt;
|
|
||||||
Prof_Int64 timestamp_delta;
|
|
||||||
double timestamps_per_second;
|
|
||||||
|
|
||||||
assert(Prof_num_zones <= MAX_PROFILING_ZONES);
|
|
||||||
|
|
||||||
Prof_traverse(propogate_stack);
|
|
||||||
|
|
||||||
// Precompute the time factors
|
|
||||||
|
|
||||||
now = Prof_get_time();
|
|
||||||
|
|
||||||
if (update_index == 0) {
|
|
||||||
dt = FRAME_TIME_INITIAL;
|
|
||||||
} else {
|
|
||||||
dt = now - last_update_time;
|
|
||||||
if (dt == 0) dt = FRAME_TIME_INITIAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
last_update_time = now;
|
|
||||||
|
|
||||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
|
||||||
precomputed_factors[i] = pow(0.1f, dt / times_to_reach_90_percent[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
precomputed_factors[0] = 0; // instantaneous.
|
|
||||||
|
|
||||||
Prof_get_timestamp(¤t_integer_timestamp);
|
|
||||||
if (update_index == 0) {
|
|
||||||
sum = 0;
|
|
||||||
Prof_traverse(sum_times);
|
|
||||||
if (sum == 0) sum = 1;
|
|
||||||
timestamp_delta = (Prof_Int64) sum;
|
|
||||||
} else {
|
|
||||||
timestamp_delta = current_integer_timestamp - last_integer_timestamp;
|
|
||||||
if (timestamp_delta == 0) timestamp_delta = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
last_integer_timestamp = current_integer_timestamp;
|
|
||||||
timestamps_per_second = (double) timestamp_delta / dt;
|
|
||||||
|
|
||||||
if (update_index < NUM_THROWAWAY_UPDATES) {
|
|
||||||
eternity_set(&integer_timestamps_per_second, timestamps_per_second);
|
|
||||||
} else {
|
|
||||||
update(&integer_timestamps_per_second, timestamps_per_second, precomputed_factors);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const int ss_slot = 1;
|
|
||||||
double ss_val, ss_variance, ss_stdev, ss_ratio;
|
|
||||||
|
|
||||||
ss_val = integer_timestamps_per_second.values[ss_slot];
|
|
||||||
ss_variance = integer_timestamps_per_second.variances[ss_slot] - ss_val*ss_val;
|
|
||||||
ss_stdev = sqrt(fabs(ss_variance));
|
|
||||||
ss_ratio;
|
|
||||||
if (ss_val) {
|
|
||||||
ss_ratio = ss_stdev / fabs(ss_val);
|
|
||||||
} else {
|
|
||||||
ss_ratio = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
speedstep_warning = (ss_ratio > SPEEDSTEP_DETECTION_RATIO);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!record_data) {
|
|
||||||
Prof_traverse(clear_stack);
|
|
||||||
Prof_End
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (timestamps_per_second) {
|
|
||||||
timestamps_to_seconds = 1.0 / timestamps_per_second;
|
|
||||||
} else {
|
|
||||||
timestamps_to_seconds = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef Prof_ZONE_HISTORY
|
|
||||||
for (i=0; i < Prof_num_zones; ++i) {
|
|
||||||
Prof_zones[i]->highlevel = (void *) &zone_history[i][history_index];
|
|
||||||
zone_history[i][history_index] = 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Prof_traverse(update_history);
|
|
||||||
|
|
||||||
update(&frame_time, dt, precomputed_factors);
|
|
||||||
|
|
||||||
++update_index;
|
|
||||||
history_index = (history_index + 1) % NUM_FRAME_SLOTS;
|
|
||||||
|
|
||||||
Prof_traverse(clear_stack);
|
|
||||||
|
|
||||||
Prof_End
|
|
||||||
#endif // Prof_ENABLED
|
|
||||||
}
|
|
||||||
|
|
||||||
static Prof_Report *allocate_buffer(int n)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
Prof_Report *pob = (Prof_Report *) malloc(sizeof(*pob));
|
|
||||||
pob->num_record = n;
|
|
||||||
pob->record = (Prof_Report_Record *) malloc(sizeof(*pob->record) * pob->num_record);
|
|
||||||
pob->title[0] = pob->title[1] = NULL;
|
|
||||||
for (i=0; i < NUM_TITLE; ++i)
|
|
||||||
pob->title[i] = NULL;
|
|
||||||
for (i=0; i < NUM_HEADER; ++i)
|
|
||||||
pob->header[i] = NULL;
|
|
||||||
for (i=0; i < n; ++i) {
|
|
||||||
pob->record[i].values[0] = 0;
|
|
||||||
pob->record[i].values[1] = 0;
|
|
||||||
pob->record[i].values[2] = 0;
|
|
||||||
pob->record[i].values[3] = 0;
|
|
||||||
pob->record[i].value_flag = 0;
|
|
||||||
pob->record[i].heat = 0;
|
|
||||||
pob->record[i].indent = 0;
|
|
||||||
pob->record[i].number = 0;
|
|
||||||
}
|
|
||||||
return pob;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int uncounted;
|
|
||||||
static Prof_Recursion_Mode recurse = Prof_FLATTEN_RECURSION;
|
|
||||||
|
|
||||||
static void propogate_to_zone(Prof_Zone_Stack *c)
|
|
||||||
{
|
|
||||||
Prof_Zone *z = c->zone;
|
|
||||||
Profile_Tracker_Data_Record *d = (Profile_Tracker_Data_Record *) c->highlevel;
|
|
||||||
Prof_Report_Record *r;
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
r = (Prof_Report_Record *) z->highlevel;
|
|
||||||
#else
|
|
||||||
if (recurse == Prof_FLATTEN_RECURSION)
|
|
||||||
r = (Prof_Report_Record *) z->highlevel;
|
|
||||||
else
|
|
||||||
r = ((Prof_Report_Record **) z->highlevel)[c->recursion_depth];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (d) {
|
|
||||||
double t;
|
|
||||||
|
|
||||||
r->values[0] += 1000 * get_value(&d->self_time);
|
|
||||||
r->values[1] += 1000 * get_value(&d->hierarchical_time);
|
|
||||||
r->values[2] += get_value(&d->entry_count);
|
|
||||||
|
|
||||||
// arbitrary determination for how low a moving average
|
|
||||||
// has to go to reach 0
|
|
||||||
if (get_value(&d->entry_count) > INT_ZERO_THRESHHOLD) {
|
|
||||||
if (d->max_recursion > r->number)
|
|
||||||
r->number = d->max_recursion;
|
|
||||||
if (c->parent->zone)
|
|
||||||
((Prof_Report_Record *) c->parent->zone->highlevel)->prefix = '+';
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef Prof_CALL_HISTORY
|
|
||||||
if (display_frame) return; // no variances when examining history
|
|
||||||
#endif
|
|
||||||
if (displayed_quantity == Prof_HIERARCHICAL_TIME) {
|
|
||||||
t = d->hierarchical_time.variances[slot];
|
|
||||||
} else {
|
|
||||||
t = d->self_time.variances[slot];
|
|
||||||
}
|
|
||||||
|
|
||||||
t = 1000 * 1000 * t;
|
|
||||||
|
|
||||||
if (r->heat == 0)
|
|
||||||
r->heat = t;
|
|
||||||
else
|
|
||||||
r->heat = r->heat + t + 2 * sqrt(r->heat * t);
|
|
||||||
} else {
|
|
||||||
++uncounted;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void propogate_expanded(Prof_Zone_Stack *c)
|
|
||||||
{
|
|
||||||
Profile_Tracker_Data_Record *d = (Profile_Tracker_Data_Record *) c->highlevel;
|
|
||||||
if (d == NULL) {
|
|
||||||
++uncounted;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (c->parent->zone && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD) {
|
|
||||||
((Prof_Report_Record *) c->parent->zone->highlevel)[0].prefix = '+';
|
|
||||||
((Prof_Report_Record *) c->parent->zone->highlevel)[1].prefix = '+';
|
|
||||||
((Prof_Report_Record *) c->parent->zone->highlevel)[2].prefix = '+';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c->zone == expand) {
|
|
||||||
Prof_Report_Record *r = (Prof_Report_Record *) expand->highlevel;
|
|
||||||
// accumulate this time to ourselves
|
|
||||||
r[2].values[0] += 1000 * get_value(&d->self_time);
|
|
||||||
r[2].values[1] += 1000 * get_value(&d->hierarchical_time);
|
|
||||||
r[2].values[2] += get_value(&d->entry_count);
|
|
||||||
if (d->max_recursion > r[2].number && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD)
|
|
||||||
r[2].number = d->max_recursion;
|
|
||||||
// propogate it to the parents
|
|
||||||
if (c->parent->zone) {
|
|
||||||
r = (Prof_Report_Record *) c->parent->zone->highlevel;
|
|
||||||
r[1].values[0] += 1000 * get_value(&d->self_time);
|
|
||||||
r[1].values[1] += 1000 * get_value(&d->hierarchical_time);
|
|
||||||
r[1].values[2] += get_value(&d->entry_count);
|
|
||||||
d = (Profile_Tracker_Data_Record *) c->parent->highlevel;
|
|
||||||
if (d->max_recursion > r[1].number && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD)
|
|
||||||
r[1].number = d->max_recursion;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c->parent->zone == expand) {
|
|
||||||
Prof_Report_Record *r = (Prof_Report_Record *) c->zone->highlevel;
|
|
||||||
r[0].values[0] += 1000 * get_value(&d->self_time);
|
|
||||||
r[0].values[1] += 1000 * get_value(&d->hierarchical_time);
|
|
||||||
r[0].values[2] += get_value(&d->entry_count);
|
|
||||||
if (d->max_recursion > r[0].number && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD)
|
|
||||||
r[0].number = d->max_recursion;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static double compute_heat(double variance, double value)
|
|
||||||
{
|
|
||||||
double factor, stdev;
|
|
||||||
double fabs_value = fabs(value);
|
|
||||||
const float VARIANCE_TOLERANCE_FACTOR = 0.5f;
|
|
||||||
|
|
||||||
variance = variance - value*value;
|
|
||||||
if (variance < 0) variance = 0;
|
|
||||||
stdev = sqrt(variance);
|
|
||||||
|
|
||||||
if (fabs_value < 0.000001) {
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
factor = (stdev / fabs_value) * (1.0f / VARIANCE_TOLERANCE_FACTOR);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (factor < 0) return 0;
|
|
||||||
if (factor > 1) return 1;
|
|
||||||
return factor;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int pob_compare(const void *p, const void *q)
|
|
||||||
{
|
|
||||||
double a = ((Prof_Report_Record *) p)->values[0];
|
|
||||||
double b = ((Prof_Report_Record *) q)->values[0];
|
|
||||||
|
|
||||||
return (b < a) ? -1 : (b > a);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int pob_expand_compare(const void *p, const void *q)
|
|
||||||
{
|
|
||||||
Prof_Report_Record * a = (Prof_Report_Record *) p;
|
|
||||||
Prof_Report_Record * b = (Prof_Report_Record *) q;
|
|
||||||
|
|
||||||
if (a->indent != b->indent) {
|
|
||||||
if (a->indent == 5) return -1;
|
|
||||||
if (b->indent == 5) return 1;
|
|
||||||
if (a->indent == 3) return 1;
|
|
||||||
if (b->indent == 3) return -1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (a->values[1] == b->values[1])
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (a->values[1] < b->values[1]) {
|
|
||||||
if (a->indent == 5) return -1;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (a->indent == 5) return 1;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int cursor;
|
|
||||||
static int update_cursor;
|
|
||||||
|
|
||||||
Prof_Report *Prof_create_report(void)
|
|
||||||
{
|
|
||||||
double avg_frame_time,fps;
|
|
||||||
char *displayed_quantity_name;
|
|
||||||
int i,s;
|
|
||||||
Prof_Report *pob;
|
|
||||||
|
|
||||||
if (displayed_quantity == Prof_CALL_GRAPH)
|
|
||||||
s = 3;
|
|
||||||
else
|
|
||||||
s = 1;
|
|
||||||
|
|
||||||
pob = allocate_buffer(Prof_num_zones * s);
|
|
||||||
for (i=0; i < Prof_num_zones; ++i) {
|
|
||||||
Prof_Zone *z = Prof_zones[i];
|
|
||||||
Prof_Report_Record *r = &pob->record[i*s];
|
|
||||||
z->highlevel = (void *) r;
|
|
||||||
if (displayed_quantity == Prof_CALL_GRAPH) {
|
|
||||||
r[0].name = r[1].name = r[2].name = z->name;
|
|
||||||
r[0].value_flag = 1 | 2 | 4;
|
|
||||||
r[1].value_flag = 1 | 2 | 4;
|
|
||||||
r[2].value_flag = 1 | 2 | 4;
|
|
||||||
r[0].indent = 3;
|
|
||||||
r[1].indent = 5;
|
|
||||||
r[2].indent = 0;
|
|
||||||
r[0].zone = r[1].zone = r[2].zone = (void *) z;
|
|
||||||
r[0].prefix = r[1].prefix = r[2].prefix = 0;
|
|
||||||
} else {
|
|
||||||
r->value_flag = 1 | 2 | 4;
|
|
||||||
r->name = z->name;
|
|
||||||
r->zone = (void *) z;
|
|
||||||
r->indent = 0;
|
|
||||||
r->prefix = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
avg_frame_time = frame_time.values[slot];
|
|
||||||
if (avg_frame_time == 0) avg_frame_time = 0.01f;
|
|
||||||
fps = 1.0f / avg_frame_time;
|
|
||||||
|
|
||||||
displayed_quantity_name = "*error*";
|
|
||||||
switch (displayed_quantity) {
|
|
||||||
case Prof_SELF_TIME:
|
|
||||||
displayed_quantity_name = "sort self";
|
|
||||||
break;
|
|
||||||
case Prof_HIERARCHICAL_TIME:
|
|
||||||
displayed_quantity_name = "sort hier";
|
|
||||||
break;
|
|
||||||
case Prof_CALL_GRAPH:
|
|
||||||
displayed_quantity_name = "sort hier";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
pob->title[0] = (char *) malloc(BUFSIZ);
|
|
||||||
sprintf(pob->title[0],
|
|
||||||
"%3.3lf ms/frame (fps: %3.2lf) %s",
|
|
||||||
avg_frame_time * 1000, fps, displayed_quantity_name);
|
|
||||||
|
|
||||||
#ifdef Prof_CALL_HISTORY
|
|
||||||
if (display_frame) {
|
|
||||||
sprintf(pob->title[0] + strlen(pob->title[0]), " - %d frame%s ago",
|
|
||||||
display_frame, display_frame == 1 ? "" : "s");
|
|
||||||
} else {
|
|
||||||
strcat(pob->title[0], " - current frame");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (speedstep_warning)
|
|
||||||
pob->title[1] = _strdup("WARNING: SpeedStep-like timer inconsistencies detected. Results are unreliable!");
|
|
||||||
|
|
||||||
if (displayed_quantity == Prof_CALL_GRAPH) {
|
|
||||||
Prof_Report_Record *r = (Prof_Report_Record *) expand->highlevel;
|
|
||||||
int j=0;
|
|
||||||
|
|
||||||
Prof_traverse(propogate_expanded);
|
|
||||||
|
|
||||||
r[2].prefix = '-';
|
|
||||||
|
|
||||||
for (i=0; i < pob->num_record; ++i) {
|
|
||||||
if (pob->record[i].values[0] || pob->record[i].values[1] || pob->record[i].values[2]) {
|
|
||||||
pob->record[j] = pob->record[i];
|
|
||||||
++j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pob->num_record = j;
|
|
||||||
|
|
||||||
qsort(pob->record, pob->num_record, sizeof(pob->record[0]), pob_expand_compare);
|
|
||||||
|
|
||||||
for (i=0; i < pob->num_record; ++i)
|
|
||||||
if (pob->record[i].indent == 5)
|
|
||||||
pob->record[i].indent = 3;
|
|
||||||
} else {
|
|
||||||
|
|
||||||
uncounted = 0;
|
|
||||||
Prof_traverse(propogate_to_zone);
|
|
||||||
|
|
||||||
for (i=0; i < Prof_num_zones; ++i) {
|
|
||||||
if (displayed_quantity == Prof_HIERARCHICAL_TIME) {
|
|
||||||
double t = pob->record[i].values[0];
|
|
||||||
pob->record[i].values[0] = pob->record[i].values[1];
|
|
||||||
pob->record[i].values[1] = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
pob->record[i].heat = compute_heat(pob->record[i].heat, pob->record[i].values[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
qsort(pob->record, pob->num_record, sizeof(pob->record[0]), pob_compare);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (update_cursor) {
|
|
||||||
for (i=0; i < pob->num_record; ++i) {
|
|
||||||
if (pob->record[i].zone == expand) {
|
|
||||||
cursor = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
update_cursor = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
pob->header[0] = _strdup("zone");
|
|
||||||
if (displayed_quantity == Prof_HIERARCHICAL_TIME) {
|
|
||||||
pob->header[1] = _strdup("hier");
|
|
||||||
pob->header[2] = _strdup("self");
|
|
||||||
} else {
|
|
||||||
pob->header[1] = _strdup("self");
|
|
||||||
pob->header[2] = _strdup("hier");
|
|
||||||
}
|
|
||||||
pob->header[3] = _strdup("count");
|
|
||||||
|
|
||||||
if (cursor < 0) cursor = 0;
|
|
||||||
if (cursor >= pob->num_record) cursor = pob->num_record-1;
|
|
||||||
pob->hilight = cursor;
|
|
||||||
|
|
||||||
return pob;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Prof_free_report(Prof_Report *z)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i=0; i < NUM_TITLE; ++i)
|
|
||||||
if (z->title[i])
|
|
||||||
free(z->title[i]);
|
|
||||||
for (i=0; i < NUM_HEADER; ++i)
|
|
||||||
if (z->header[i])
|
|
||||||
free(z->header[i]);
|
|
||||||
free(z->record);
|
|
||||||
free(z);
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_move_cursor(int num)
|
|
||||||
{
|
|
||||||
cursor += num;
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_set_cursor(int num)
|
|
||||||
{
|
|
||||||
cursor = num;
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_select(void)
|
|
||||||
{
|
|
||||||
Prof_Report *b = Prof_create_report();
|
|
||||||
if (b->hilight >= 0) {
|
|
||||||
void *z = b->record[b->hilight].zone;
|
|
||||||
if (z != NULL) {
|
|
||||||
expand = (Prof_Zone *) z;
|
|
||||||
displayed_quantity = Prof_CALL_GRAPH;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Prof_free_report(b);
|
|
||||||
update_cursor = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_select_parent(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
void *old = (void *) expand;
|
|
||||||
Prof_Report *b = Prof_create_report();
|
|
||||||
for (i=0; i < b->num_record; ++i) {
|
|
||||||
if (b->record[i].indent == 0) break;
|
|
||||||
if (b->record[i].zone == old) continue;
|
|
||||||
expand = (Prof_Zone *) b->record[i].zone;
|
|
||||||
}
|
|
||||||
Prof_free_report(b);
|
|
||||||
update_cursor = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_set_frame(int num)
|
|
||||||
{
|
|
||||||
if (num < 0) num = 0;
|
|
||||||
if (num >= NUM_FRAME_SLOTS) num = NUM_FRAME_SLOTS-1;
|
|
||||||
|
|
||||||
display_frame = num;
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_move_frame(int delta)
|
|
||||||
{
|
|
||||||
// convert so negative delta = "into the past"
|
|
||||||
Prof_set_frame(display_frame - delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
Prof_extern_C void Prof_set_smoothing(int x)
|
|
||||||
{
|
|
||||||
if (x <= 0) x = 0;
|
|
||||||
if (x >= NUM_PROFILE_TRACKER_HISTORY_SLOTS)
|
|
||||||
x = NUM_PROFILE_TRACKER_HISTORY_SLOTS-1;
|
|
||||||
|
|
||||||
slot = x;
|
|
||||||
}
|
|
||||||
|
|
||||||
// currently does nothing
|
|
||||||
Prof_extern_C void Prof_set_recursion(Prof_Recursion_Mode e)
|
|
||||||
{
|
|
||||||
recurse = e;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int id(Prof_Zone *z)
|
|
||||||
{
|
|
||||||
// hash the string so that the id is consistent from
|
|
||||||
// run to run (rather than using the pointer itself which isn't)
|
|
||||||
// @TODO: only compute this at zone init time?
|
|
||||||
|
|
||||||
unsigned int h = 0x55555555;
|
|
||||||
char *n = z->name;
|
|
||||||
|
|
||||||
while (*n)
|
|
||||||
h = (h << 5) + (h >> 27) + *n++;
|
|
||||||
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Prof_graph(int num_frames, void (*callback)(int id, int x0, int x1, float *values, void *data), void *data)
|
|
||||||
{
|
|
||||||
#ifdef Prof_ZONE_HISTORY
|
|
||||||
int i,h = history_index;
|
|
||||||
if (num_frames > NUM_FRAME_SLOTS)
|
|
||||||
num_frames = NUM_FRAME_SLOTS;
|
|
||||||
|
|
||||||
for (i=0; i < Prof_num_zones; ++i) {
|
|
||||||
if (h >= num_frames) {
|
|
||||||
callback(id(Prof_zones[i]), 0, num_frames, &zone_history[i][h-num_frames], data);
|
|
||||||
} else {
|
|
||||||
callback(id(Prof_zones[i]), num_frames - h, num_frames, &zone_history[i][0], data);
|
|
||||||
callback(id(Prof_zones[i]), 0, num_frames-h, &zone_history[i][NUM_FRAME_SLOTS-(num_frames-h)], data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// display frame "cursor"
|
|
||||||
if (display_frame != 0) {
|
|
||||||
float value[2] = { 2.0, 0 };
|
|
||||||
callback(0, NUM_FRAME_SLOTS-1-display_frame, NUM_FRAME_SLOTS-1-display_frame, value, data);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#define WIN32_EXTRA_LEAN
|
|
||||||
#include <windows.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
double Prof_get_time(void)
|
|
||||||
{
|
|
||||||
LARGE_INTEGER freq;
|
|
||||||
LARGE_INTEGER time;
|
|
||||||
|
|
||||||
BOOL ok = QueryPerformanceFrequency(&freq);
|
|
||||||
assert(ok == TRUE);
|
|
||||||
|
|
||||||
freq.QuadPart = freq.QuadPart;
|
|
||||||
|
|
||||||
ok = QueryPerformanceCounter(&time);
|
|
||||||
assert(ok == TRUE);
|
|
||||||
|
|
||||||
return time.QuadPart / (double) freq.QuadPart;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
#ifndef Prof_INC_PROF_WIN32_H
|
|
||||||
#define Prof_INC_PROF_WIN32_H
|
|
||||||
|
|
||||||
typedef __int64 Prof_Int64;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
inline
|
|
||||||
#elif _MSC_VER >= 1200
|
|
||||||
__forceinline
|
|
||||||
#else
|
|
||||||
static
|
|
||||||
#endif
|
|
||||||
void Prof_get_timestamp(Prof_Int64 *result)
|
|
||||||
{
|
|
||||||
__asm {
|
|
||||||
rdtsc;
|
|
||||||
mov ebx, result
|
|
||||||
mov [ebx], eax
|
|
||||||
mov [ebx+4], edx
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
Loading…
Reference in a new issue