mirror of
https://github.com/AquariaOSE/Aquaria.git
synced 2024-11-29 03:33:48 +00:00
remove iprof and BBGE_PROF (#74)
This commit is contained in:
parent
47f8677aa7
commit
26759c8be1
22 changed files with 3 additions and 2286 deletions
|
@ -1829,7 +1829,6 @@ const int chkDist = 2500*2500;
|
|||
|
||||
Target Avatar::getNearestTarget(const Vector &checkPos, const Vector &distPos, Entity *source, DamageType dt, bool override, std::vector<Target> *ignore)
|
||||
{
|
||||
BBGE_PROF(Avatar_getNearestTarget);
|
||||
Target t;
|
||||
|
||||
Vector targetPosition;
|
||||
|
@ -5330,8 +5329,6 @@ bool lastCursorKeyboard = false;
|
|||
|
||||
void Avatar::onUpdate(float dt)
|
||||
{
|
||||
BBGE_PROF(Avatar_onUpdate);
|
||||
|
||||
looking = 0;
|
||||
|
||||
|
||||
|
@ -5492,7 +5489,6 @@ void Avatar::onUpdate(float dt)
|
|||
|
||||
lastWaterBubble = waterBubble;
|
||||
waterBubble = 0;
|
||||
BBGE_PROF(Avatar_splashOut);
|
||||
splash(false);
|
||||
|
||||
if (dsq->continuity.form != FORM_FISH)
|
||||
|
|
|
@ -179,7 +179,6 @@ void Element::updateEffects(float dt)
|
|||
|
||||
void Element::update(float dt)
|
||||
{
|
||||
BBGE_PROF(Element_update);
|
||||
if (!core->particlesPaused)
|
||||
{
|
||||
updateLife(dt);
|
||||
|
|
|
@ -2546,7 +2546,6 @@ void Entity::addIgnoreShotDamageType(DamageType dt)
|
|||
|
||||
void Entity::doSpellAvoidance(float dt, int range, float mod)
|
||||
{
|
||||
BBGE_PROF(Entity_doSpellAvoidance);
|
||||
Vector accum;
|
||||
|
||||
int c = 0;
|
||||
|
|
|
@ -4071,7 +4071,6 @@ bool Game::isEntityCollideWithShot(Entity *e, Shot *shot)
|
|||
|
||||
void Game::handleShotCollisions(Entity *e, bool hasShield)
|
||||
{
|
||||
BBGE_PROF(Game_handleShotCollisions);
|
||||
for (size_t i = 0; i < Shot::shots.size(); ++i)
|
||||
{
|
||||
Shot *shot = Shot::shots[i];
|
||||
|
@ -4103,7 +4102,6 @@ bool Game::isDamageTypeEnemy(DamageType dt)
|
|||
|
||||
void Game::handleShotCollisionsSkeletal(Entity *e)
|
||||
{
|
||||
BBGE_PROF(Game_HSSKELETAL);
|
||||
for (size_t i = 0; i < Shot::shots.size(); ++i)
|
||||
{
|
||||
Shot *shot = Shot::shots[i];
|
||||
|
|
|
@ -119,9 +119,6 @@ void Hair::onUpdate(float dt)
|
|||
|
||||
void Hair::updatePositions()
|
||||
{
|
||||
BBGE_PROF(Hair_updatePositions);
|
||||
|
||||
|
||||
for (size_t i = 1; i < hairNodes.size(); i++)
|
||||
{
|
||||
Vector diff = hairNodes[i].position - hairNodes[i-1].position;
|
||||
|
|
|
@ -330,25 +330,15 @@ void SchoolFish::applySeparation(Vector &accumulator)
|
|||
|
||||
void SchoolFish::onUpdate(float dt)
|
||||
{
|
||||
BBGE_PROF(SchoolFish_onUpdate);
|
||||
|
||||
|
||||
|
||||
{
|
||||
burstDelay -= dt;
|
||||
if (burstDelay < 0)
|
||||
{
|
||||
burstDelay = 0;
|
||||
}
|
||||
}
|
||||
burstDelay -= dt;
|
||||
if (burstDelay < 0)
|
||||
burstDelay = 0;
|
||||
|
||||
if (stickToNaijasHead && alpha.x < 0.1f)
|
||||
stickToNaijasHead = false;
|
||||
|
||||
if (this->layer < LR_ENTITIES)
|
||||
{
|
||||
|
||||
|
||||
setEntityType(ET_NEUTRAL);
|
||||
collideRadius = 0;
|
||||
}
|
||||
|
|
|
@ -461,8 +461,6 @@ void ScriptedEntity::stopPull()
|
|||
|
||||
void ScriptedEntity::onUpdate(float dt)
|
||||
{
|
||||
BBGE_PROF(ScriptedEntity_onUpdate);
|
||||
|
||||
CollideEntity::onUpdate(dt);
|
||||
|
||||
if (becomeSolidDelay)
|
||||
|
|
|
@ -450,7 +450,6 @@ void Shot::onEndOfLife()
|
|||
|
||||
void Shot::doHitEffects()
|
||||
{
|
||||
BBGE_PROF(Shot_doHitEffects);
|
||||
if (shotData)
|
||||
{
|
||||
if (!shotData->hitPrt.empty())
|
||||
|
@ -478,7 +477,6 @@ void Shot::suicide()
|
|||
|
||||
bool Shot::onHitWall(bool reflect)
|
||||
{
|
||||
BBGE_PROF(Shot_onHitWall);
|
||||
doHitEffects();
|
||||
updateSegments(position);
|
||||
|
||||
|
|
|
@ -31,9 +31,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|||
|
||||
#include "BBGECompileConfig.h"
|
||||
|
||||
#define BBGE_PROF(x)
|
||||
|
||||
|
||||
#define compile_assert(pred) switch(0){case 0:case (pred):;}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
|
@ -1046,8 +1046,6 @@ void Core::run(float runTime)
|
|||
|
||||
while((runTime == -1 && !loopDone) || (runTime >0))
|
||||
{
|
||||
BBGE_PROF(Core_main);
|
||||
|
||||
nowTicks = SDL_GetTicks();
|
||||
dt = (nowTicks-thenTicks)/1000.0;
|
||||
thenTicks = nowTicks;
|
||||
|
@ -1177,8 +1175,6 @@ void Core::run(float runTime)
|
|||
|
||||
showBuffer();
|
||||
|
||||
BBGE_PROF(STOP);
|
||||
|
||||
if (nestedMains == 1)
|
||||
clearGarbage();
|
||||
|
||||
|
@ -1741,10 +1737,6 @@ void Core::updateCullData()
|
|||
|
||||
void Core::render(int startLayer, int endLayer, bool useFrameBufferIfAvail)
|
||||
{
|
||||
|
||||
BBGE_PROF(Core_render);
|
||||
|
||||
|
||||
if (startLayer == -1 && endLayer == -1 && overrideStartLayer != 0)
|
||||
{
|
||||
startLayer = overrideStartLayer;
|
||||
|
@ -2048,8 +2040,6 @@ CountedPtr<Texture> Core::doTextureAdd(const std::string &texture, const std::st
|
|||
|
||||
CountedPtr<Texture> Core::addTexture(const std::string &textureName)
|
||||
{
|
||||
BBGE_PROF(Core_addTexture);
|
||||
|
||||
if (textureName.empty())
|
||||
return NULL;
|
||||
|
||||
|
@ -2192,7 +2182,6 @@ void Core::enqueueRenderObjectDeletion(RenderObject *object)
|
|||
|
||||
void Core::clearGarbage()
|
||||
{
|
||||
BBGE_PROF(Core_clearGarbage);
|
||||
// HACK: optimize this (use a list instead of a queue)
|
||||
|
||||
for (RenderObjects::iterator i = garbage.begin(); i != garbage.end(); i++)
|
||||
|
|
|
@ -30,7 +30,6 @@ Emitter::Emitter(ParticleEffect *pe) : Quad(), pe(pe)
|
|||
|
||||
void Emitter::destroy()
|
||||
{
|
||||
BBGE_PROF(Emitter_destroy);
|
||||
for (Particles::iterator i = particles.begin(); i != particles.end(); i++)
|
||||
{
|
||||
(*i)->active = false;
|
||||
|
@ -43,7 +42,6 @@ void Emitter::destroy()
|
|||
|
||||
void Emitter::spawnParticle(float perc)
|
||||
{
|
||||
BBGE_PROF(Emitter_spawnParticle);
|
||||
Particle *p = particleManager->getFreeParticle(this);
|
||||
|
||||
p->active = true;
|
||||
|
@ -231,8 +229,6 @@ void Emitter::render()
|
|||
|
||||
void Emitter::onRender()
|
||||
{
|
||||
BBGE_PROF(Emitter_onRender);
|
||||
|
||||
if (particles.empty()) return;
|
||||
|
||||
if (!data.spawnLocal)
|
||||
|
|
|
@ -39,7 +39,6 @@ void ParticleEffect::setDie(bool v)
|
|||
|
||||
void ParticleEffect::load(const std::string &name)
|
||||
{
|
||||
BBGE_PROF(ParticleEffect_load);
|
||||
particleManager->loadParticleEffectFromBank(name, this);
|
||||
}
|
||||
|
||||
|
@ -443,8 +442,6 @@ void ParticleEffect::stop()
|
|||
|
||||
void ParticleEffect::onRender()
|
||||
{
|
||||
BBGE_PROF(ParticleEffect_onRender);
|
||||
|
||||
RenderObject::onRender();
|
||||
}
|
||||
|
||||
|
|
|
@ -257,7 +257,6 @@ Particle *ParticleManager::stomp()
|
|||
|
||||
Particle *ParticleManager::getFreeParticle(Emitter *emitter)
|
||||
{
|
||||
BBGE_PROF(ParticleManager_getFreeParticle);
|
||||
if (size == 0) return 0;
|
||||
|
||||
Particle *p = 0;
|
||||
|
@ -354,7 +353,6 @@ int ParticleManager::getSize()
|
|||
|
||||
void ParticleManager::update(float dt)
|
||||
{
|
||||
BBGE_PROF(ParticleManager_update);
|
||||
numActive = 0;
|
||||
for (size_t i = 0; i < particles.size(); i++)
|
||||
{
|
||||
|
|
|
@ -1,617 +0,0 @@
|
|||
IPROF: A Portable Industrial-Strength Interactive Profiler for C++ and C
|
||||
by Sean Barrett
|
||||
|
||||
Version 0.2
|
||||
|
||||
|
||||
CONTENTS
|
||||
Overview
|
||||
User Manual
|
||||
Platform
|
||||
Instrumentation
|
||||
Private Zones
|
||||
Public Zones
|
||||
Initialization
|
||||
Processing Data
|
||||
Displaying Results
|
||||
Controlling Display
|
||||
Understanding CALL GRAPH output
|
||||
Performance Expectation
|
||||
Implementation Notes
|
||||
Version History
|
||||
|
||||
|
||||
OVERVIEW
|
||||
|
||||
IProf is an interactive profiler which works by intrusively instrumenting
|
||||
code. Code is divided into zones by programmer-inserted statements. Zones
|
||||
are both lexically and dynamically scoped--all time spent within a
|
||||
lexically scoped zone, and any code which it calls which is not itself
|
||||
zoned, is attributed to that zone.
|
||||
|
||||
Profiling occurs interactively; time is divided into "frames", and the
|
||||
profiler shows time spent on the previous frame (or a smoothed average
|
||||
or possibly even a frame a second or two ago).
|
||||
|
||||
Like a traditional profiler, IProf records or can compute the number of
|
||||
times a zone is entered, the amount of time spent in the zone ("self
|
||||
time"), and the amount of time spent in the zone and its descendents
|
||||
("hierarchical time" -- "self + child time" in gprof).
|
||||
|
||||
Furthermore, IProf computes information along the lines of gprof--number of
|
||||
times a given zone is entered from any other specific zone; self and
|
||||
hierarchical time spent in a given zone on behalf of a specific parent
|
||||
zone, etc. (However, where gprof estimates this information based only on
|
||||
call counts, IProf measures the actual values. So, for example, IProf will
|
||||
accurately report if a ray-casting routine called by both physics and AI
|
||||
always spends longer per AI-call because the casts are longer.)
|
||||
|
||||
Precise information is available for recursive routines, including call
|
||||
depths etc. [The current version of IProf does not yet completely handle
|
||||
reporting of recursive data, although it is measured correctly.]
|
||||
|
||||
Additionally, IProf provides all numbers in instantaneous form or as two
|
||||
differently weighted moving averages. It's easy to pause the profile
|
||||
updating so that you can switch between multiple views of the paused data
|
||||
set. Two optional flags allow trading off memory for deeper historical
|
||||
views. The cheaper option provides only zone-self-time history, suitable
|
||||
for a real-time graph of behavior. The more memory-expensive flag keeps a
|
||||
history of all the data for a certain number of frames, allowing full
|
||||
profile analysis of old frames.
|
||||
|
||||
IProf is designed for its monitoring/gathering mode to be "always on", even
|
||||
in release/optimized builds. The monitoring routines are designed to be
|
||||
reasonably efficient--the full hash on every function entry required by
|
||||
gprof is avoided in most cases--and the programmer can minimize the impact
|
||||
by limiting the instrumentation to relatively large routines. (One could
|
||||
certainly instrument a vector add function and possibly get useful call
|
||||
count data from it, but the monitoring overhead would be significant and
|
||||
noticeable in that case.) In combination with history information, it
|
||||
becomes possible to run an application, notice poor behavior, pause the
|
||||
(always on) profiling and the application, and start browsing through the
|
||||
historical profiling information.
|
||||
|
||||
IProf uses both per-call monitoring and a separate per-frame
|
||||
gathering/analysis phase. The latter is itself instrumented so the overhead
|
||||
due to it is easy to see.
|
||||
|
||||
|
||||
USER MANUAL
|
||||
|
||||
These sections document the necessary code you must use and code changes
|
||||
you must make to use the profiler.
|
||||
|
||||
The profiling system expects to be able to use any identifier which is
|
||||
prefixed with "Prof_" with exactly that pattern of uppercase/lowercase
|
||||
(i.e. "PROF_" and "prof_" can be used freely by other code).
|
||||
|
||||
|
||||
COMPILING THE PROFILING SYSTEM
|
||||
|
||||
The profiler was developed using MSVC 6.0, but should be reasonably
|
||||
portable. The implementation files are provided as .C files so they can be
|
||||
used with C compilers; however, they can be renamed to C++ files and
|
||||
compiled in that form. The implementations automatically insert extern "C"
|
||||
on the public routines. Internal routines will use either C or C++ linkage
|
||||
depending on which way you compile them; you must compile all the profiler
|
||||
files as either C or C++, without intermixing.
|
||||
|
||||
[[ NOTE: Originally the code was written in C++, and then it was
|
||||
converted to compile with C, and then some additional small changes
|
||||
were made. As of this writing, I haven't actually tested compiling
|
||||
everything as C++ again. Feel free to test for me. Or just compile
|
||||
the C files as C--you can still USE the C++ interfaces fine.]]
|
||||
|
||||
Needed files:
|
||||
prof_win32.c -- Win32 implementation of seconds-based timer
|
||||
prof_gather.c -- raw data collection
|
||||
prof_process.c -- high-level data collection, report generator
|
||||
prof_draw.c -- opengl rendering interface
|
||||
|
||||
prof.h -- public front-end
|
||||
prof_win32.h -- Win32 implementation of fast integer timestamp
|
||||
prof_gather.h -- instrumentation macros (included by prof.h)
|
||||
prof_internal.h -- private interfaces
|
||||
|
||||
|
||||
PLATFORM SUPPORT
|
||||
|
||||
IProf requires a small amount--less than fifty lines--of platform-specific
|
||||
code.
|
||||
|
||||
Win32 under MSVC is automatically supported with no further effort on your
|
||||
part, using the files prof_win32.c and prof_win32.h
|
||||
|
||||
To use other platforms, just create equivalent files for your platform. The
|
||||
C file contains a routine for getting an accurate floating point time
|
||||
reading; the H file contains the definition of a 64-bit integer type and a
|
||||
fast routine for reading a timestamp of that size. If 64-bit math isn't
|
||||
available on your platform, or if your timestamp is only 32-bit, you can
|
||||
replace the 64-bit type with a 32-bit type, as long as that item won't
|
||||
overflow in the course of running the application. (A 31-bit millisecond
|
||||
timer is good for 24 days, but is very imprecise for this application.) If
|
||||
reading the timestamp is slow, you will want to minimize how often the zone
|
||||
entry and exit points are called.
|
||||
|
||||
Also required is a display interface; an opengl one is provided, although
|
||||
others would be easy to code. (The primary display is purely textual, and
|
||||
is available through a text interface.)
|
||||
|
||||
|
||||
INSTRUMENTATION
|
||||
|
||||
First, #include "prof.h" in files that need profiling.
|
||||
|
||||
The flag Prof_ENABLED determines whether the monitoring code is compiled or
|
||||
not, to make it easy to turn off all profiling code for final shippable
|
||||
builds. Additional flags controlling amount of history data and memory
|
||||
usage therein are defined at the top of the file prof_process.c and should
|
||||
just be changed there since they affect no other files.
|
||||
|
||||
There are two main ways of instrumenting, and each offers a C++ interface
|
||||
and a C interface.
|
||||
|
||||
Private zones
|
||||
C++ Prof(zone);
|
||||
|
||||
C Prof_Begin(zone)
|
||||
Prof_End
|
||||
|
||||
Public zones
|
||||
Prof_Define(zone);
|
||||
Prof_Declare(zone);
|
||||
|
||||
C++ Prof_Scope(zone);
|
||||
|
||||
C Prof_Region(zone)
|
||||
Prof_End
|
||||
|
||||
Zone names--indicated by "zone" above--must obey the rules for identifiers,
|
||||
although they can begin with a number, and they exist in a separate
|
||||
namespace from regular identifiers.
|
||||
|
||||
So these are valid zone names:
|
||||
my_zone_2
|
||||
2_my_zone
|
||||
__
|
||||
|
||||
And these are NOT valid zone names:
|
||||
"my_zone"
|
||||
my_class::my_zone
|
||||
|
||||
|
||||
PRIVATE ZONES
|
||||
|
||||
The simplest, and highly recommended, approach to instrumentation is to
|
||||
create a private zone which only exists in a single location. In C++, you
|
||||
do this by declaring a lexically scoped zone with a statement which behaves
|
||||
semantically like a variable declaration:
|
||||
|
||||
// C++ instrumentation
|
||||
void my_routine()
|
||||
{
|
||||
Prof(my_routine_name);
|
||||
... my code ...
|
||||
}
|
||||
|
||||
This will cause all time spent after Prof(my_routine_name) to accumulate in
|
||||
a zone in the profiling reports labeled "my_routine_name". The zone ends
|
||||
when the name goes out of scope, that is, when a destructor would be called
|
||||
corresponding to this declaration.
|
||||
|
||||
Zones don't have to appear at routine-level function scope; for example:
|
||||
|
||||
// C++ instrumentation
|
||||
void my_routine()
|
||||
{
|
||||
Prof(my_routine);
|
||||
... // zone my_routine
|
||||
if (...)
|
||||
{
|
||||
Prof(my_routine_special_case);
|
||||
... // zone my_routine_special_case
|
||||
}
|
||||
... // zone my_routine
|
||||
}
|
||||
|
||||
Instrumenting in C requires more work, because C doesn't provide
|
||||
destructors, so it's not possible to lexically scope zones automatically.
|
||||
Instead, the programmer must insert Begin/End pairs and make sure those
|
||||
pairs are accurately balanced. All paths out of a function must be
|
||||
accounted for. A crash or severe slowdown is likely to occur with
|
||||
unbalanced pairs.
|
||||
|
||||
// C instrumentation
|
||||
void my_routine(void)
|
||||
{
|
||||
Prof_Begin(my_routine)
|
||||
int x = some_func();
|
||||
if (x == 0) {
|
||||
Prof_End
|
||||
return;
|
||||
}
|
||||
...
|
||||
Prof_End
|
||||
}
|
||||
|
||||
Prof_Begin() is declaration-like; however, it takes no trailing semicolon.
|
||||
(This is necessary so it can be compiled out; C doesn't allow the empty
|
||||
statement ";" to precede variable declarations.) Prof_End takes no
|
||||
trailing semicolon or parentheses to help remind you of this. (You can
|
||||
change the definition of Prof_End in prof_gather.h if you don't like that.)
|
||||
|
||||
Profiling instructions like Prof() and Prof_Begin() can be placed anywhere
|
||||
that variable declarations are legal; generally you want to define them
|
||||
before other variables so the variable initializations are profiled.
|
||||
|
||||
The C interfaces are also available in C++ if you should want to use a not-
|
||||
exactly-lexically-scoped zone, e.g. end a zone before the destructor would
|
||||
go out of scope. (You can't, however, end Prof() with Prof_End.)
|
||||
|
||||
|
||||
PUBLIC ZONES
|
||||
|
||||
If you define multiple private zones with the same name, they will be
|
||||
treated as entirely unrelated zones that happen to have the same name, and
|
||||
you will see the same name multiple times in the profiling output.
|
||||
|
||||
Instead, you probably want to use public zones, to use the same zone in
|
||||
multiple regions of code. For example, we might have two routines that
|
||||
serve the same purpose which we always want to measure as one. Or we might
|
||||
have two blocks of code within a single routine which we want to credit to
|
||||
the same zone.
|
||||
|
||||
To do this, first define the zone with Prof_Define(zone), and then use it
|
||||
with Prof_Scope(zone) [C++] or Prof_Region(zone) ... Prof_End [C].
|
||||
|
||||
// C++ instrumentation
|
||||
Prof_Define(my_routine);
|
||||
|
||||
void my_routine_v1()
|
||||
{
|
||||
Prof_Scope(my_routine);
|
||||
...
|
||||
}
|
||||
|
||||
void my_routine_v2()
|
||||
{
|
||||
Prof_Scope(my_routine);
|
||||
...
|
||||
}
|
||||
|
||||
or
|
||||
|
||||
// C instrumentation
|
||||
Prof_Define(my_routine);
|
||||
|
||||
void my_routine_v1(void)
|
||||
{
|
||||
Prof_Region(my_routine)
|
||||
...
|
||||
Prof_End
|
||||
}
|
||||
|
||||
void my_routine_v2(void)
|
||||
{
|
||||
Prof_Region(my_routine)
|
||||
...
|
||||
Prof_End
|
||||
}
|
||||
|
||||
Because Prof_Define defines an actual global symbol (if used at file
|
||||
scope), the symbol can even be referenced from other files by saying:
|
||||
|
||||
extern Prof_Declare(my_routine);
|
||||
|
||||
void my_routine()
|
||||
{
|
||||
Prof_Scope(my_routine);
|
||||
}
|
||||
|
||||
You can use 'extern "C" Prof_Declare()' or Prof_Define() to share a zone
|
||||
between C and C++ code.
|
||||
|
||||
|
||||
USER MANUAL - INIIALIZATION
|
||||
|
||||
The profiling system is self-initializing.
|
||||
|
||||
|
||||
USER MANUAL - PROCESSING DATA
|
||||
|
||||
Every frame, you should call Prof_update(). Prof_update() will gather
|
||||
results and record frame-history information on the assumption that each
|
||||
call is a frame. Prof_update() takes a boolean flag which indicates whether
|
||||
to update the history or not; passing in false means profiling is "paused"
|
||||
and doesn't change.
|
||||
|
||||
You might wire this to its own toggle, or you might simply pass in a pre-
|
||||
existing flag for whether the simulation itself is active or not, thus
|
||||
allowing you to pause the simulation and automatically pause the profiling.
|
||||
(On the other hand, if you're profiling a renderer, you might want to
|
||||
pause the simulation and keep profiling.)
|
||||
|
||||
|
||||
USER MANUAL - DISPLAYING RESULTS
|
||||
|
||||
IProf offers two separate types of display: the report, which is primarily
|
||||
textual, and the graph, which is entirely graphical.
|
||||
|
||||
If you're using OpenGL, output is straightforward. For the text report,
|
||||
call Prof_draw_gl() with the display set to a 2d rendering mode--one that
|
||||
can use integer addressing, e.g. integers the size of pixels, virtual
|
||||
pixels (e.g. a 640x480 screen regardless of actual dimension), or even
|
||||
characters. Set the blending state to whatever blending mode you want for
|
||||
the report display. For the graphics report, call Prof_draw_graph_gl().
|
||||
Details of the parameters to these functions are available in the header
|
||||
file.
|
||||
|
||||
For other output devices (Direct3D, text), you'll have to write your own
|
||||
functions equivalent to Prof_draw_gl() and Prof_draw_graph_gl(). These
|
||||
should not be too difficult; these functions don't compute any of the
|
||||
profiling information; they simply format a text report or dataset to the
|
||||
screen. The text report format consists of several title fields to be
|
||||
printed, and then a collection of data records. Each data record has a name
|
||||
and an indentation amount for that name (used for call graph
|
||||
parent/children formatting), a collection of unnamed data "values", and a
|
||||
flag field indicating which of the data values should be displayed.
|
||||
Additionally, data records have a "heat" which indicates how rapidly
|
||||
changing they are, and one record may be "highlighted" indicating a virtual
|
||||
cursor is on that line.
|
||||
|
||||
[[ In practice, Prof_draw_gl makes few enough GL calls that maybe it's
|
||||
worth modularizing things out further. ]]
|
||||
|
||||
|
||||
USER MANUAL - CONTROLLING DISPLAY
|
||||
|
||||
IProf features some easy-to-use UI elements that allow program-direct
|
||||
control or user-interaction-based control over what data is reported.
|
||||
Simply hook these calls up to hotkey presses to complete your working
|
||||
profile system. (You could even write code to support mouse clicking on the
|
||||
report by calling Prof_set_cursor and on the graph by calling
|
||||
Prof_set_frame, but the hit detection is up to you.)
|
||||
|
||||
These are in rough order of the priority with which you might want to
|
||||
implement them.
|
||||
|
||||
Most important
|
||||
|
||||
Prof_set_report_mode(enum ...)
|
||||
Selects what to show in the report:
|
||||
Prof_SELF_TIME: flat times sorted by self time
|
||||
Prof_HIERARCHICAL_TIME: flat times sorted by hierarchical time
|
||||
Prof_CALL_GRAPH: call graph parent/children information
|
||||
|
||||
Prof_move_cursor(int delta)
|
||||
Move the cursor up-or-down by delta lines
|
||||
|
||||
Prof_select(void)
|
||||
Switch to call graph mode on whichever zone is currently selected
|
||||
|
||||
Prof_select_parent(void)
|
||||
Go to largest-hierarchical-time parent of the active zone in
|
||||
the call graph. (Roughly like "go up a directory".)
|
||||
|
||||
Important if you support history
|
||||
|
||||
Prof_move_frame(int delta)
|
||||
Move backwards or forwards in history by delta frames
|
||||
|
||||
Not too important
|
||||
|
||||
Prof_set_average(int type)
|
||||
Selects which moving average to use (0 == instantaneous, 1=default);
|
||||
only meaningful if frame# = 0; when looking at history, instantaneous
|
||||
values are always used.
|
||||
|
||||
Prof_set_frame(int frame)
|
||||
Selects which history entry to view (0==current, 1==previous, etc.)
|
||||
|
||||
Prof_set_cursor(int pos)
|
||||
Set the position of the up-and-down cursor.
|
||||
|
||||
Prof_set_recursion(enum ...)
|
||||
Selects whether to show recursive routines as a single zone or
|
||||
as a series of distinct zones for each recursion level.
|
||||
[[ currently unimplemented ]]
|
||||
|
||||
|
||||
UNDERSTANDING CALL GRAPH OUTPUT
|
||||
|
||||
The call graph output focuses on a single zone, and provides information
|
||||
about the parents (callers) and children (callees) of that zone.
|
||||
|
||||
The general format is something like this:
|
||||
|
||||
zone self hier count
|
||||
+my_parent1 0.75 2.50 4.0
|
||||
+my_parent2 1.00 3.25 6.0
|
||||
-my_routine 1.75 5.75 10.0
|
||||
+my_child1 1.00 2.00 15.0
|
||||
+my_child2 0.25 1.50 500.0
|
||||
my_child3 0.50 0.50 3.0
|
||||
|
||||
"self" indicates self-time (time in this zone), "hier" is hierarchical-time
|
||||
(time in this zone or its descendents), and "count" is the number of times
|
||||
the zone was entered. (Entry counts are inherently integral, but are shown
|
||||
as floating point since they may be a moving average of several integers.)
|
||||
|
||||
Currently the zone "my_routine" is being examined. It accounts for 5.75
|
||||
milliseconds of time between itself and the zones it calls. 1.75ms are
|
||||
spent in itself. The zone was entered (called) 10 times this frame.
|
||||
|
||||
The difference between my_routine's self time and hierarchical time is
|
||||
4.00ms; that much time must be being spent in its descendents. Its
|
||||
immediate children--the zones that my_routine calls directly--appear below
|
||||
it on the table. The hierarchical times of each child represents the time
|
||||
spent in that child and all its descendents *on behalf of my_routine*--
|
||||
other calls to that child are not counted. Thus, the sum of all the
|
||||
children's hierarchical time should account for all time spent in
|
||||
descendents of my_routine; hence, the sum of the child hier times is 4.00,
|
||||
identical to the difference between self and hier for my_routine.
|
||||
|
||||
Above "my_routine" in the chart is information about the callers of
|
||||
my_routine. However, the timings and counts in this section are not the
|
||||
self and hierarchical times of the parent functions themselves--there is no
|
||||
sensible meaning of "on behalf of my_routine" for the parents. Instead, the
|
||||
self, hier, and count fields show the time spent *in my_routine* on behalf
|
||||
of those parents. Thus, for each field, all of the parent entries sum to
|
||||
the corresponding entry in my_routine. Again, these are computed exactly.
|
||||
If my_routine was the public interface to a raycaster called by both AI and
|
||||
physics, but it passed the raycast on to further routines which were
|
||||
themselves explicitly zoned, then most of the my_routine time would be
|
||||
spent in descendents. This would show up in the "hierarchical time" field,
|
||||
and the parent zones, AI and physics, would show that hierarchical time
|
||||
attributed accurately.
|
||||
|
||||
There is additional data available in the system--it would be possible to
|
||||
drill down into lower-level functions and still attribute them to zones
|
||||
several parent levels above; there just isn't currently any user interface
|
||||
or computation functionality to do it.
|
||||
|
||||
|
||||
PERFORMANCE EXPECTATION
|
||||
|
||||
Except for recursive routines (see Implementation Notes section), the
|
||||
expected performance on zone entry comes from running roughly the following
|
||||
code:
|
||||
|
||||
extern Something *p0,*p1;
|
||||
if (p0->ptr_field != p1) { ... /* rarely runs */ }
|
||||
p0->int64_field0 = RDTSC; // read timestamp counter
|
||||
p0->int32_field += 1;
|
||||
p1->int64_field1 += p0->int64_field0 - p1->int64_field0;
|
||||
p1 = p0;
|
||||
|
||||
Zone exit costs a bit less.
|
||||
|
||||
|
||||
IMPLEMENTATION NOTES
|
||||
|
||||
IProf uses two relatively unknown techniques to produce accurate call
|
||||
information with minimal overhead. The first technique produces accurate
|
||||
call information at a similar cost to gprof's mcount monitoring; the second
|
||||
reduces the overhead.
|
||||
|
||||
_Zone Stack Tracking_
|
||||
|
||||
gprof's mcount technique combines two separate measurements. At every
|
||||
function entry, the function and the caller (grabbed from the return
|
||||
address on the stack) are hashed to determine a unique "data-gathering
|
||||
slot", and an integer in that slot is incremented. Thus, exact pairwise
|
||||
call counts are computed. Simultaneously, gprof periodically samples the
|
||||
instruction pointer to measure the time spent in any given routine--"self
|
||||
times". Hierarchical times are computed by distributing the self times up
|
||||
the tree based on the call graph counts. (If routine X is called 9 times
|
||||
from routine Z, and one time from routine Y, then 90% of X's time is
|
||||
attributed to Z, and 10% to Y.)
|
||||
|
||||
An intrusive profiler which samples a timer at zone entry and again at zone
|
||||
exit will compute accurate hierarchical times. By keeping a stack of zones,
|
||||
it's possible to compute accurate hierarchical and self times. The stack of
|
||||
zones also provides caller information, so hierarchical and self times can
|
||||
be attributed to each unique pair of caller & callee zones (via hashing).
|
||||
This will allow much more accurate attribution. In fact, it is sufficient
|
||||
to compute exact values for all the information gprof outputs, except in
|
||||
the face of recursion. Performance is fairly good; unlike a single-zone
|
||||
intrusive profiler, which must measure both self and hierarchical time,
|
||||
since neither can be derived from the other, the zone-pair profiler can
|
||||
only measure hierarchical time; self-time can be derived from hierarchical
|
||||
time (but not vice versa).
|
||||
|
||||
A further improvement is, instead of having one data-gathering slot per
|
||||
zone--that is, representing the state of the top of the zone stack--and
|
||||
instead of having one data-gathering slot per caller/callee zone pair--that
|
||||
is, representing the state of the top two entries of the zone stack--to
|
||||
have one data-gathering slot per unique full stack state. This can be done
|
||||
straightforwardly by building the stack as a linked list (creating an
|
||||
inverted tree--a tree of all stack states with only parent-pointer links),
|
||||
and hashing the "zone to be pushed" and the current stack to find the new
|
||||
stack. Thus the cost of the hash computation is essentially identical to
|
||||
the previous case. If every zone is only called from one specific place,
|
||||
there will still only be one data-gathering slot per zone; if a routine is
|
||||
recursive, it will create a large number of data-gathering slots, one for
|
||||
each depth of recursion. (A complex mutually recursive program like a
|
||||
compiler might generate an unreasonable number of unique states.)
|
||||
|
||||
With zone-stack tracking, it's possible to measure only either hierarchical
|
||||
time or self-time and derive the other. Hierarchical time is actually more
|
||||
efficient to measure, but it leaves handling the top-level overarching
|
||||
global state as a special case (since it will have a timer that starts but
|
||||
never ends). It's easier to instead measure self-time and rederive
|
||||
hierarchical time. Moreover, a recursive routine will automatically
|
||||
"overcount" hierchical time (it's accrued at each level of the hierarchy),
|
||||
requiring significant fixup. It's more straightforward to just compute the
|
||||
recursive data correctly from the self times in the first place.
|
||||
|
||||
|
||||
_Hash Cacheing_
|
||||
|
||||
Although the hash lookup described above is coded to proceed as quickly as
|
||||
possible if the hash hits on the first probe, it still requires enough
|
||||
computation and a function call that it is worth avoiding if possible. To
|
||||
that end, each zone-entry location declares a hidden static variable
|
||||
private to that zone-entry point which caches the hash lookup. At zone-
|
||||
entry, the code checks the cache's "next node in the linked list" field
|
||||
with the current stack state. If the two are equal, then the cache is
|
||||
valid, and no hash lookup occurs. If it does not much, then the cache is
|
||||
wrong, and the hash lookup proceeds, and updates the cache. The cache is
|
||||
initialized to a impossible value, so the first time the code is run a hash
|
||||
lookup always occurs.
|
||||
|
||||
The result is that in the normal case, a routine called from a single
|
||||
place, the cache is always valid (after the very first call). Furthermore,
|
||||
the branch will always predict correctly, since it always branches
|
||||
identically. However, for a routine that is called from several places,
|
||||
there is a "switching" overhead each time it's called from a different
|
||||
place. So, for example, a raycaster called by both physics and AI might pay
|
||||
the overhead twice per frame, if all the AI calls occur before all the
|
||||
physics calls. However, a common low-level routine (e.g. a vector add)
|
||||
called alternately from two different zones would have to perform the hash
|
||||
lookup every time.
|
||||
|
||||
The actual common "failure" case is a recursive routine, for which, each
|
||||
time the routine is entered, the state of the call stack is different from
|
||||
the last time, thus almost always paying the hash lookup case. For
|
||||
something like a recursive linked list traversal, the hash occurs every
|
||||
time. (It doesn't matter if the routine is tail-recursive; once you insert
|
||||
the profiling instrumentation, it's no longer tail-recursive.) A full
|
||||
binary tree traversal will always enter a different zone-stack-state from
|
||||
last time, except after reaching a left-child leaf. (The recursion then
|
||||
returns and then goes down to the right child, which is at the same height
|
||||
as the left child.) So a full binary tree traversal will have to hash about
|
||||
3/4 of the time. A full quadtree traversal will have to hash about 2/5 of
|
||||
the time. If the traversal is doing anything complicated, this should not
|
||||
be a problem; but if it's a simple traversal, the performance overhead may
|
||||
be significant. Like the vector add case, it may be better to remove
|
||||
instrumentation from low-inherent-cost recursive routines except when
|
||||
absolutely needed. Of course, it's easy enough to compare performance
|
||||
behavior before and after adding the instrumenting and see if the overhead
|
||||
is acceptable.
|
||||
|
||||
|
||||
VERSION HISTORY
|
||||
|
||||
version 0.2 -- 2003-02-06 STB
|
||||
- Significant interface changes to Prof_draw_gl:
|
||||
- accepts floating point instead of int for 2d screen metrics
|
||||
- accepts a total width and height of the display and conforms
|
||||
to that
|
||||
- accepts a precision specification for display of time values
|
||||
- added little '+' and '-' signs reminiscent of list displays
|
||||
so you know which ones can be drilled down on
|
||||
- expanded this doc's description of what's legal for a zone-name
|
||||
- fixed an error trying to compile the C files as C++
|
||||
- added Prof_select_parent() for moving up the tree
|
||||
|
||||
version 0.1 -- 2003-02-05 STB
|
||||
- First public version, heavily refactored, 1500 lines
|
||||
- win32 timing interface and smooth "moving average" code derived
|
||||
from Jonathan Blow's Game Developer Magazine articles
|
||||
- missing functionality:
|
||||
- correct attribution of time to zones that are parents of
|
||||
recursive zones in call graph view (hierarchical times don't
|
||||
bubble up correctly)
|
||||
- spread recursion display (displaying each depth of a recursive
|
||||
zone as if it were a separate zone)
|
|
@ -1,94 +0,0 @@
|
|||
#ifndef Prof_INC_PROF_H
|
||||
#define Prof_INC_PROF_H
|
||||
|
||||
|
||||
//#define Prof_ENABLED
|
||||
|
||||
|
||||
|
||||
#include "prof_gather.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prof_update
|
||||
*
|
||||
* Pass in true (1) to accumulate history info; pass
|
||||
* in false (0) to throw away the current frame's data
|
||||
*/
|
||||
extern void Prof_update(int record);
|
||||
|
||||
/*
|
||||
* Prof_draw_gl -- display the current report via OpenGL
|
||||
*
|
||||
* You must provide a callable text-printing function.
|
||||
* Put the opengl state into a 2d rendering mode.
|
||||
*
|
||||
* Parameters:
|
||||
* <sx,sy> -- location where top line is drawn
|
||||
* <width, height> -- total size of display (if too small, text will overprint)
|
||||
* line_spacing -- how much to move sy by after each line; use a
|
||||
* negative value if y decreases down the screen
|
||||
* precision -- decimal places of precision for time data, 1..4 (try 2)
|
||||
* print_text -- function to display a line of text starting at the
|
||||
* given coordinate; best if 0,1..9 are fixed-width
|
||||
* text_width -- a function that computes the pixel-width of
|
||||
* a given string before printing. you can fake with a
|
||||
* simple approximation of width('0')*strlen(str)
|
||||
*
|
||||
* to avoid overprinting, you can make print_text truncate long strings
|
||||
*/
|
||||
extern void Prof_draw_gl(float sx, float sy,
|
||||
float width, float height,
|
||||
float line_spacing,
|
||||
int precision,
|
||||
void (*print_text)(float x, float y, char *str),
|
||||
float (*text_width)(char *str));
|
||||
|
||||
/*
|
||||
* Parameters
|
||||
* <sx, sy> -- origin of the graph--location of (0,0)
|
||||
* x_spacing -- screenspace size of each history sample; e.g.
|
||||
* 2.0 pixels
|
||||
* y_spacing -- screenspace size of one millisecond of time;
|
||||
* for an app with max of 20ms in any one zone,
|
||||
* 8.0 would produce a 160-pixel tall display,
|
||||
* assuming screenspace is in pixels
|
||||
*/
|
||||
extern void Prof_draw_graph_gl(float sx, float sy,
|
||||
float x_spacing, float y_spacing);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
Prof_SELF_TIME,
|
||||
Prof_HIERARCHICAL_TIME,
|
||||
Prof_CALL_GRAPH,
|
||||
} Prof_Report_Mode;
|
||||
|
||||
extern void Prof_set_report_mode(Prof_Report_Mode e);
|
||||
extern void Prof_move_cursor(int delta);
|
||||
extern void Prof_select(void);
|
||||
extern void Prof_select_parent(void);
|
||||
extern void Prof_move_frame(int delta);
|
||||
|
||||
extern void Prof_set_smoothing(int smoothing_mode);
|
||||
extern void Prof_set_frame(int frame);
|
||||
extern void Prof_set_cursor(int line);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
Prof_FLATTEN_RECURSION,
|
||||
Prof_SPREAD_RECURSION
|
||||
} Prof_Recursion_Mode;
|
||||
|
||||
extern void Prof_set_recursion(Prof_Recursion_Mode e);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // Prof_INC_PROF_H
|
||||
|
||||
|
|
@ -1,334 +0,0 @@
|
|||
#ifdef WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <gl/gl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "prof.h"
|
||||
#include "prof_internal.h"
|
||||
|
||||
#pragma warning(disable:4305; disable:4244)
|
||||
|
||||
// use factor to compute a glow amount
|
||||
static int get_colors(float factor,
|
||||
float text_color_ret[3],
|
||||
float glow_color_ret[3],
|
||||
float *glow_alpha_ret)
|
||||
{
|
||||
const float GLOW_RANGE = 0.5f;
|
||||
const float GLOW_ALPHA_MAX = 0.5f;
|
||||
float glow_alpha;
|
||||
int i;
|
||||
float hot[3] = {1, 1.0, 0.9};
|
||||
float cold[3] = {0.15, 0.9, 0.15};
|
||||
|
||||
float glow_cold[3] = {0.5f, 0.5f, 0};
|
||||
float glow_hot[3] = {1.0f, 1.0f, 0};
|
||||
|
||||
if (factor < 0) factor = 0;
|
||||
if (factor > 1) factor = 1;
|
||||
|
||||
for (i=0; i < 3; ++i)
|
||||
text_color_ret[i] = cold[i] + (hot[i] - cold[i]) * factor;
|
||||
|
||||
// Figure out whether to start up the glow as well.
|
||||
glow_alpha = (factor - GLOW_RANGE) / (1 - GLOW_RANGE);
|
||||
if (glow_alpha < 0) {
|
||||
*glow_alpha_ret = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i=0; i < 3; ++i)
|
||||
glow_color_ret[i] = glow_cold[i] + (glow_hot[i] - glow_cold[i]) * factor;
|
||||
|
||||
*glow_alpha_ret = glow_alpha * GLOW_ALPHA_MAX;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void draw_rectangle(float x0, float y0, float x1, float y1)
|
||||
{
|
||||
// FACE_CULL is disabled so winding doesn't matter
|
||||
glVertex2f(x0, y0);
|
||||
glVertex2f(x1, y0);
|
||||
glVertex2f(x1, y1);
|
||||
glVertex2f(x0, y1);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float x0,y0;
|
||||
float sx,sy;
|
||||
} GraphLocation;
|
||||
|
||||
static void graph_func(int id, int x0, int x1, float *values, void *data)
|
||||
{
|
||||
GraphLocation *loc = (GraphLocation *) data;
|
||||
int i, r,g,b;
|
||||
|
||||
// trim out values that are under 0.2 ms to accelerate rendering
|
||||
while (x0 < x1 && (*values < 0.0002f)) { ++x0; ++values; }
|
||||
while (x1 > x0 && (values[x1-1-x0] < 0.0002f)) --x1;
|
||||
|
||||
if (id == 0)
|
||||
glColor4f(1,1,1,0.5);
|
||||
else {
|
||||
if (x0 == x1) return;
|
||||
|
||||
id = (id >> 8) + id;
|
||||
r = id * 37;
|
||||
g = id * 59;
|
||||
b = id * 45;
|
||||
#pragma warning(disable:4761)
|
||||
glColor3ub((r & 127) + 80, (g & 127) + 80, (b & 127) + 80);
|
||||
}
|
||||
|
||||
glBegin(GL_LINE_STRIP);
|
||||
if (x0 == x1) {
|
||||
float x,y;
|
||||
x = loc->x0 + x0 * loc->sx;
|
||||
y = loc->y0 + values[0] * loc->sy;
|
||||
glVertex2f(x,loc->y0);
|
||||
glVertex2f(x, y);
|
||||
}
|
||||
for (i=0; i < x1-x0; ++i) {
|
||||
float x,y;
|
||||
x = loc->x0 + (i+x0) * loc->sx;
|
||||
y = loc->y0 + values[i] * loc->sy;
|
||||
glVertex2f(x,y);
|
||||
}
|
||||
glEnd();
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_draw_graph_gl(float sx, float sy, float x_spacing, float y_spacing)
|
||||
{
|
||||
#ifdef Prof_ENABLED
|
||||
Prof_Begin(iprof_draw_graph)
|
||||
GraphLocation loc = { sx, sy, x_spacing, y_spacing * 1000 };
|
||||
Prof_graph(128, graph_func, &loc);
|
||||
Prof_End
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// float to string conversion with sprintf() was
|
||||
// taking up 10-20% of the Prof_draw time, so I
|
||||
// wrote a faster float-to-string converter
|
||||
|
||||
static char int_to_string[100][4];
|
||||
static char int_to_string_decimal[100][4];
|
||||
static char int_to_string_mid_decimal[100][4];
|
||||
static void int_to_string_init(void)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i < 100; ++i) {
|
||||
sprintf(int_to_string[i], "%d", i);
|
||||
sprintf(int_to_string_decimal[i], ".%02d", i);
|
||||
sprintf(int_to_string_mid_decimal[i], "%d.%d", i/10, i % 10);
|
||||
}
|
||||
}
|
||||
|
||||
static char *formats[5] =
|
||||
{
|
||||
"%.0f",
|
||||
"%.1f",
|
||||
"%.2f",
|
||||
"%.3f",
|
||||
"%.4f",
|
||||
};
|
||||
|
||||
static void float_to_string(char *buf, float num, int precision)
|
||||
{
|
||||
int x,y;
|
||||
switch(precision) {
|
||||
case 2:
|
||||
if (num < 0 || num >= 100)
|
||||
break;
|
||||
x = num;
|
||||
y = (num - x) * 100;
|
||||
strcpy(buf, int_to_string[x]);
|
||||
strcat(buf, int_to_string_decimal[y]);
|
||||
return;
|
||||
case 3:
|
||||
if (num < 0 || num >= 10)
|
||||
break;
|
||||
num *= 10;
|
||||
x = num;
|
||||
y = (num - x) * 100;
|
||||
strcpy(buf, int_to_string_mid_decimal[x]);
|
||||
strcat(buf, int_to_string_decimal[y]+1);
|
||||
return;
|
||||
case 4:
|
||||
if (num < 0 || num >= 1)
|
||||
break;
|
||||
num *= 100;
|
||||
x = num;
|
||||
y = (num - x) * 100;
|
||||
buf[0] = '0';
|
||||
strcpy(buf+1, int_to_string_decimal[x]);
|
||||
strcat(buf, int_to_string_decimal[y]+1);
|
||||
return;
|
||||
}
|
||||
sprintf(buf, formats[precision], num);
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_draw_gl(float sx, float sy,
|
||||
float full_width, float height,
|
||||
float line_spacing, int precision,
|
||||
void (*printText)(float x, float y, char *str), float (*textWidth)(char *str))
|
||||
{
|
||||
#ifdef Prof_ENABLED
|
||||
Prof_Begin(iprof_draw)
|
||||
|
||||
int i,j,n,o;
|
||||
GLuint cull, texture;
|
||||
float backup_sy;
|
||||
|
||||
float field_width = textWidth("5555.55");
|
||||
float name_width = full_width - field_width * 3;
|
||||
float plus_width = textWidth("+");
|
||||
|
||||
int max_records;
|
||||
|
||||
Prof_Report *pob;
|
||||
|
||||
if (!int_to_string[0][0]) int_to_string_init();
|
||||
|
||||
if (precision < 1) precision = 1;
|
||||
if (precision > 4) precision = 4;
|
||||
|
||||
// disable face culling to avoid having to get winding correct
|
||||
texture = glIsEnabled(GL_TEXTURE_2D);
|
||||
cull = glIsEnabled(GL_CULL_FACE);
|
||||
if (cull == GL_TRUE) {
|
||||
glDisable(GL_CULL_FACE);
|
||||
}
|
||||
|
||||
pob = Prof_create_report();
|
||||
|
||||
for (i=0; i < NUM_TITLE; ++i) {
|
||||
if (pob->title[i]) {
|
||||
float header_x0 = sx;
|
||||
float header_x1 = header_x0 + full_width;
|
||||
|
||||
if (i == 0)
|
||||
glColor4f(0.1f, 0.3f, 0, 0.85);
|
||||
else
|
||||
glColor4f(0.2f, 0.1f, 0.1f, 0.85);
|
||||
|
||||
glBegin(GL_QUADS);
|
||||
draw_rectangle(header_x0, sy-2, header_x1, sy-line_spacing+2);
|
||||
glEnd();
|
||||
|
||||
if (i == 0)
|
||||
glColor4f(0.6, 0.4, 0, 0);
|
||||
else
|
||||
glColor4f(0.8f, 0.1f, 0.1f, 0);
|
||||
|
||||
printText(sx+2, sy, pob->title[i]);
|
||||
|
||||
sy += 1.5*line_spacing;
|
||||
height -= abs(line_spacing)*1.5;
|
||||
}
|
||||
}
|
||||
|
||||
max_records = height / abs(line_spacing);
|
||||
|
||||
o = 0;
|
||||
n = pob->num_record;
|
||||
if (n > max_records) n = max_records;
|
||||
if (pob->hilight >= o + n) {
|
||||
o = pob->hilight - n + 1;
|
||||
}
|
||||
|
||||
backup_sy = sy;
|
||||
|
||||
// Draw the background colors for the zone data.
|
||||
glDisable(GL_TEXTURE_2D);
|
||||
glBegin(GL_QUADS);
|
||||
|
||||
glColor4f(0,0,0,0.85);
|
||||
draw_rectangle(sx, sy, sx + full_width, sy - line_spacing);
|
||||
sy += line_spacing;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
float y0, y1;
|
||||
|
||||
if (i & 1) {
|
||||
glColor4f(0.1, 0.1f, 0.2, 0.85);
|
||||
} else {
|
||||
glColor4f(0.1f, 0.1f, 0.3, 0.85);
|
||||
}
|
||||
if (i+o == pob->hilight)
|
||||
glColor4f(0.3f, 0.3f, 0.1f, 0.85);
|
||||
|
||||
y0 = sy;
|
||||
y1 = sy - line_spacing;
|
||||
|
||||
draw_rectangle(sx, y0, sx + full_width, y1);
|
||||
sy += line_spacing;
|
||||
}
|
||||
glEnd();
|
||||
|
||||
sy = backup_sy;
|
||||
glColor4f(0.7,0.7,0.7,0);
|
||||
|
||||
if (pob->header[0])
|
||||
printText(sx+8, sy, pob->header[0]);
|
||||
|
||||
for (j=1; j < NUM_HEADER; ++j)
|
||||
if (pob->header[j])
|
||||
printText(sx + name_width + field_width * (j-1) +
|
||||
field_width/2 - textWidth(pob->header[j])/2, sy, pob->header[j]);
|
||||
|
||||
sy += line_spacing;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
char buf[256], *b = buf;
|
||||
Prof_Report_Record *r = &pob->record[i+o];
|
||||
float text_color[3], glow_color[3];
|
||||
float glow_alpha;
|
||||
float x = sx + textWidth(" ") * r->indent + plus_width/2;
|
||||
if (r->prefix) {
|
||||
buf[0] = r->prefix;
|
||||
++b;
|
||||
} else {
|
||||
x += plus_width;
|
||||
}
|
||||
if (r->number)
|
||||
sprintf(b, "%s (%d)", r->name, r->number);
|
||||
else
|
||||
sprintf(b, "%s", r->name);
|
||||
if (get_colors(r->heat, text_color, glow_color, &glow_alpha)) {
|
||||
glColor4f(glow_color[0], glow_color[1], glow_color[2], glow_alpha);
|
||||
//printText(x+2, sy-1, buf);
|
||||
printText(x+1, sy, buf);
|
||||
}
|
||||
glColor3fv(text_color);
|
||||
printText(x + 1, sy, buf);
|
||||
|
||||
for (j=0; j < NUM_VALUES; ++j) {
|
||||
if (r->value_flag & (1 << j)) {
|
||||
int pad;
|
||||
float_to_string(buf, r->values[j], j == 2 ? 2 : precision);
|
||||
pad = field_width- plus_width - textWidth(buf);
|
||||
if (r->indent) pad += plus_width;
|
||||
printText(sx + pad + name_width + field_width * j, sy, buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sy += line_spacing;
|
||||
}
|
||||
|
||||
Prof_free_report(pob);
|
||||
|
||||
if (cull == GL_TRUE)
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (texture == GL_TRUE)
|
||||
glEnable(GL_TEXTURE_2D);
|
||||
|
||||
Prof_End
|
||||
#endif
|
||||
}
|
||||
|
|
@ -1,166 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "prof.h"
|
||||
#include "prof_internal.h"
|
||||
|
||||
Prof_Define(_global);
|
||||
|
||||
Prof_Zone_Stack Prof_dummy ; // impossible parent
|
||||
Prof_Zone_Stack Prof_dummy2 ;
|
||||
Prof_Zone_Stack *Prof_stack = &Prof_dummy2;
|
||||
|
||||
int Prof_num_zones;
|
||||
Prof_Zone *Prof_zones[];
|
||||
|
||||
#define MAX_HASH_SIZE 65536 // not unlimited, to catch unbalanced BEGIN/END_PROF
|
||||
#define INIT_HASH_SIZE 256 // balance resource usage and avoid initial growth
|
||||
|
||||
static Prof_Zone_Stack *init_hash[] = { &Prof_dummy };
|
||||
static Prof_Zone_Stack **zone_hash = init_hash;
|
||||
static int zone_hash_count = 1;
|
||||
static int zone_hash_max = 1;
|
||||
static int zone_hash_mask = 0;
|
||||
|
||||
static int hash(Prof_Zone *z, Prof_Zone_Stack *s)
|
||||
{
|
||||
int n = (int) z + (int) s;
|
||||
return n + (n >> 8);
|
||||
}
|
||||
|
||||
static void insert_node(Prof_Zone_Stack *q)
|
||||
{
|
||||
int h = hash(q->zone, q->parent);
|
||||
int x = h & zone_hash_mask;
|
||||
int s = ((h << 4) + (h >> 4)) | 1;
|
||||
|
||||
while (zone_hash[x] != &Prof_dummy)
|
||||
x = (x + s) & zone_hash_mask;
|
||||
|
||||
zone_hash[x] = q;
|
||||
|
||||
++zone_hash_count;
|
||||
}
|
||||
|
||||
static void init_zone(Prof_Zone *zone)
|
||||
{
|
||||
Prof_zones[Prof_num_zones++] = zone;
|
||||
|
||||
zone->initialized = 1;
|
||||
}
|
||||
|
||||
static int count_recursion_depth(Prof_Zone_Stack *stack, Prof_Zone *zone)
|
||||
{
|
||||
int n=0;
|
||||
while (stack) {
|
||||
if (stack->zone == zone)
|
||||
++n;
|
||||
stack = stack->parent;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static Prof_Zone_Stack *createStackNode(Prof_Zone *zone, Prof_Zone_Stack *parent)
|
||||
{
|
||||
// create a new node
|
||||
Prof_Zone_Stack *z = (Prof_Zone_Stack *) malloc(sizeof(*z));
|
||||
z->zone = zone;
|
||||
z->parent = parent;
|
||||
z->total_entry_count = 0;
|
||||
z->total_hier_ticks = 0;
|
||||
z->total_self_ticks = 0;
|
||||
z->t_self_start = 0;
|
||||
z->highlevel = NULL;
|
||||
z->recursion_depth = count_recursion_depth(parent, zone);
|
||||
return z;
|
||||
}
|
||||
|
||||
static void init_zone_hash(int size)
|
||||
{
|
||||
int i;
|
||||
assert(size <= MAX_HASH_SIZE);
|
||||
zone_hash_max = size;
|
||||
zone_hash_count = 0;
|
||||
zone_hash = (Prof_Zone_Stack **) malloc(sizeof(*zone_hash) * zone_hash_max);
|
||||
zone_hash_mask = size-1;
|
||||
for (i=0; i < zone_hash_max; ++i)
|
||||
zone_hash[i] = &Prof_dummy;
|
||||
}
|
||||
|
||||
static void Prof_init_lowlevel(void);
|
||||
|
||||
// this code is structured to minimize computation
|
||||
// assuming there's a hit in the very first slot
|
||||
Prof_extern_C Prof_Zone_Stack *Prof_StackAppend(Prof_Zone *zone)
|
||||
{
|
||||
int h = hash(zone, Prof_stack), s;
|
||||
int x = h & zone_hash_mask;
|
||||
Prof_Zone_Stack *z = zone_hash[x];
|
||||
if (z->parent == Prof_stack && z->zone == zone) return z;
|
||||
if (z != &Prof_dummy) {
|
||||
|
||||
// compute a secondary hash function; force it to be odd
|
||||
// so it's relatively prime to the power-of-two table size
|
||||
s = ((h << 4) + (h >> 4)) | 1;
|
||||
for(;;) {
|
||||
x = (x + s) & zone_hash_mask;
|
||||
z = zone_hash[x];
|
||||
if (z->parent == Prof_stack && z->zone == zone) return z;
|
||||
if (z == &Prof_dummy) break;
|
||||
}
|
||||
// loop is guaranteed to terminate because the hash table is never full
|
||||
}
|
||||
|
||||
// now's as good a time as any to initialize this zone
|
||||
if (!zone->initialized) {
|
||||
if (zone_hash_max == 1) {
|
||||
Prof_init_lowlevel();
|
||||
// the above is reentrant since it initializes _global
|
||||
// so now invariants are broken, so start over
|
||||
return Prof_StackAppend(zone);
|
||||
}
|
||||
init_zone(zone);
|
||||
}
|
||||
|
||||
// check if we need to grow the table
|
||||
// we keep it at most 1/2 full to be very fast
|
||||
if (zone_hash_count*2 > zone_hash_max) {
|
||||
Prof_Zone_Stack **old_hash = zone_hash, *z;
|
||||
int i,n = zone_hash_max;
|
||||
|
||||
init_zone_hash(zone_hash_max*2);
|
||||
|
||||
for (i=0; i < n; ++i)
|
||||
if (old_hash[i] != &Prof_dummy)
|
||||
insert_node(old_hash[i]);
|
||||
|
||||
z = createStackNode(zone, Prof_stack);
|
||||
insert_node(z);
|
||||
return z;
|
||||
}
|
||||
|
||||
// insert new entry in hash table
|
||||
++zone_hash_count;
|
||||
return zone_hash[x] = createStackNode(zone, Prof_stack);
|
||||
}
|
||||
|
||||
void Prof_traverse(void (*func)(Prof_Zone_Stack *z))
|
||||
{
|
||||
int i;
|
||||
for (i=0; i < zone_hash_max; ++i)
|
||||
if (zone_hash[i] != &Prof_dummy)
|
||||
func(zone_hash[i]);
|
||||
}
|
||||
|
||||
static void Prof_init_lowlevel(void)
|
||||
{
|
||||
init_zone_hash(INIT_HASH_SIZE);
|
||||
|
||||
Prof_init_highlevel();
|
||||
|
||||
// intentionally unbalanced, this wraps everything else
|
||||
{
|
||||
Prof_Region(_global)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,152 +0,0 @@
|
|||
#ifndef INC_PROFILER_LOWLEVEL_H
|
||||
#define INC_PROFILER_LOWLEVEL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define Prof_C "C"
|
||||
#define Prof_extern_C extern "C"
|
||||
#define Prof_dummy_declare
|
||||
#else
|
||||
#define Prof_C
|
||||
#define Prof_extern_C
|
||||
#define Prof_dummy_declare int Prof_dummy_dec =
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
#include "prof_win32.h"
|
||||
#else
|
||||
#error "need to define Prof_get_timestamp() and Prof_Int64"
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char * name;
|
||||
void * highlevel;
|
||||
char initialized;
|
||||
char visited;
|
||||
char pad0,pad1;
|
||||
} Prof_Zone;
|
||||
|
||||
typedef struct Prof_Zone_Stack
|
||||
{
|
||||
Prof_Int64 t_self_start;
|
||||
|
||||
Prof_Int64 total_self_ticks;
|
||||
Prof_Int64 total_hier_ticks;
|
||||
|
||||
unsigned int total_entry_count;
|
||||
|
||||
struct Prof_Zone_Stack * parent;
|
||||
Prof_Zone * zone;
|
||||
int recursion_depth;
|
||||
|
||||
void * highlevel;
|
||||
} Prof_Zone_Stack;
|
||||
|
||||
|
||||
extern Prof_C Prof_Zone_Stack * Prof_stack; // current Zone stack
|
||||
extern Prof_C Prof_Zone_Stack Prof_dummy; // parent never matches
|
||||
|
||||
extern Prof_C Prof_Zone_Stack * Prof_StackAppend(Prof_Zone *zone);
|
||||
// return the zone stack created by pushing 'zone' on the current
|
||||
|
||||
|
||||
#ifdef Prof_ENABLED
|
||||
|
||||
static Prof_Int64 Prof_time;
|
||||
|
||||
#define Prof_Begin_Cache(z) \
|
||||
/* declare a static cache of the zone stack */ \
|
||||
static Prof_Zone_Stack *Prof_cache = &Prof_dummy
|
||||
|
||||
#define Prof_Begin_Raw(z) \
|
||||
Prof_Begin_Cache(z); \
|
||||
Prof_Begin_Code(z)
|
||||
|
||||
#define Prof_Begin_Code(z) \
|
||||
Prof_dummy_declare ( \
|
||||
\
|
||||
/* check the cached Zone_Stack and update if needed */ \
|
||||
(Prof_cache->parent != Prof_stack \
|
||||
? Prof_cache = Prof_StackAppend(&z) \
|
||||
: 0), \
|
||||
\
|
||||
++Prof_cache->total_entry_count, \
|
||||
Prof_get_timestamp(&Prof_time), \
|
||||
\
|
||||
/* stop the timer on the parent zone stack */ \
|
||||
(Prof_stack->total_self_ticks += \
|
||||
Prof_time - Prof_stack->t_self_start), \
|
||||
\
|
||||
/* make cached stack current */ \
|
||||
Prof_stack = Prof_cache, \
|
||||
\
|
||||
/* start the timer on this stack */ \
|
||||
Prof_stack->t_self_start = Prof_time, \
|
||||
0)
|
||||
|
||||
#define Prof_End_Raw() \
|
||||
\
|
||||
(Prof_get_timestamp(&Prof_time), \
|
||||
\
|
||||
/* stop timer for current zone stack */ \
|
||||
Prof_stack->total_self_ticks += \
|
||||
Prof_time - Prof_stack->t_self_start, \
|
||||
\
|
||||
/* make parent chain current */ \
|
||||
Prof_stack = Prof_stack->parent, \
|
||||
\
|
||||
/* start timer for parent zone stack */ \
|
||||
Prof_stack->t_self_start = Prof_time)
|
||||
|
||||
|
||||
#define Prof_Declare(z) Prof_Zone Prof_region_##z
|
||||
#define Prof_Define(z) Prof_Declare(z) = { #z }
|
||||
#define Prof_Region(z) Prof_Begin_Raw(Prof_region_##z);
|
||||
#define Prof_End Prof_End_Raw();
|
||||
|
||||
#define Prof_Begin(z) static Prof_Define(z); Prof_Region(z)
|
||||
#define Prof_Counter(z) Prof_Begin(z) Prof_End
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#define Prof(x) static Prof_Define(x); Prof_Scope(x)
|
||||
|
||||
#define Prof_Scope(x) \
|
||||
Prof_Begin_Cache(x); \
|
||||
Prof_Scope_Var Prof_scope_var(Prof_region_ ## x, Prof_cache)
|
||||
|
||||
struct Prof_Scope_Var {
|
||||
inline Prof_Scope_Var(Prof_Zone &zone, Prof_Zone_Stack * &Prof_cache);
|
||||
inline ~Prof_Scope_Var();
|
||||
};
|
||||
|
||||
inline Prof_Scope_Var::Prof_Scope_Var(Prof_Zone &zone, Prof_Zone_Stack * &Prof_cache) {
|
||||
Prof_Begin_Code(zone);
|
||||
}
|
||||
|
||||
inline Prof_Scope_Var::~Prof_Scope_Var() {
|
||||
Prof_End_Raw();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#else // ifdef Prof_ENABLED
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define Prof(x)
|
||||
#define Prof_Scope(x)
|
||||
#endif
|
||||
|
||||
#define Prof_Define(name)
|
||||
#define Prof_Begin(z)
|
||||
#define Prof_End
|
||||
#define Prof_Region(z)
|
||||
#define Prof_Counter(z)
|
||||
|
||||
#endif
|
||||
|
||||
#endif // INC_PROFILER_LOWLEVEL_H
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
#ifndef Prof_INC_PROF_INTERNAL_H
|
||||
#define Prof_INC_PROF_INTERNAL_H
|
||||
|
||||
// report functions
|
||||
|
||||
#define NUM_VALUES 4
|
||||
#define NUM_TITLE 2
|
||||
#define NUM_HEADER (NUM_VALUES+1)
|
||||
|
||||
typedef struct {
|
||||
int indent;
|
||||
char *name;
|
||||
int number;
|
||||
char prefix;
|
||||
int value_flag;
|
||||
double values[NUM_VALUES];
|
||||
double heat;
|
||||
|
||||
// used internally
|
||||
void *zone;
|
||||
} Prof_Report_Record;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *title[NUM_TITLE];
|
||||
char *header[NUM_HEADER];
|
||||
int num_record;
|
||||
int hilight;
|
||||
Prof_Report_Record *record;
|
||||
} Prof_Report;
|
||||
|
||||
extern void Prof_free_report(Prof_Report *z);
|
||||
extern Prof_Report *Prof_create_report(void);
|
||||
|
||||
|
||||
// really internal functions
|
||||
|
||||
extern void Prof_graph(int num_frames,
|
||||
void (*callback)(int id, int x0, int x1, float *values, void *data),
|
||||
void *data);
|
||||
|
||||
extern void Prof_init_highlevel();
|
||||
|
||||
extern double Prof_get_time(void);
|
||||
|
||||
extern int Prof_num_zones;
|
||||
extern Prof_Zone *Prof_zones[];
|
||||
|
||||
extern Prof_Declare(_global);
|
||||
|
||||
|
||||
|
||||
#endif
|
|
@ -1,774 +0,0 @@
|
|||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "prof.h"
|
||||
#include "prof_internal.h"
|
||||
|
||||
// whether zone-self-data is kept to allow the history graph
|
||||
#define Prof_ZONE_HISTORY
|
||||
|
||||
// whether full detailed (and large)
|
||||
#define Prof_CALL_HISTORY
|
||||
|
||||
// number of frames of history to keep
|
||||
#define NUM_FRAME_SLOTS 128
|
||||
|
||||
|
||||
// number of unique zones allowed in the entire application
|
||||
// @TODO: remove MAX_PROFILING_ZONES and make it dynamic
|
||||
#define MAX_PROFILING_ZONES 512
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// the number of moving averages
|
||||
#define NUM_PROFILE_TRACKER_HISTORY_SLOTS 3
|
||||
|
||||
// the number of frames to ignore before starting the moving averages
|
||||
#define NUM_THROWAWAY_UPDATES 3
|
||||
|
||||
// threshhold for a moving average of an integer to be at zero
|
||||
#define INT_ZERO_THRESHHOLD 0.25
|
||||
|
||||
Prof_Zone *Prof_zones[MAX_PROFILING_ZONES];
|
||||
|
||||
#ifdef Prof_ZONE_HISTORY
|
||||
static float zone_history[MAX_PROFILING_ZONES][NUM_FRAME_SLOTS]; // 256K
|
||||
#endif
|
||||
|
||||
// these structures are used solely to track data over time
|
||||
typedef struct
|
||||
{
|
||||
double values[NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
||||
double variances[NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
||||
#ifdef Prof_CALL_HISTORY
|
||||
float history[NUM_FRAME_SLOTS];
|
||||
#endif
|
||||
} History_Scalar;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
History_Scalar self_time;
|
||||
History_Scalar hierarchical_time;
|
||||
History_Scalar entry_count;
|
||||
int max_recursion;
|
||||
} Profile_Tracker_Data_Record;
|
||||
|
||||
static History_Scalar frame_time;
|
||||
|
||||
static double times_to_reach_90_percent[NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
||||
static double precomputed_factors [NUM_PROFILE_TRACKER_HISTORY_SLOTS];
|
||||
|
||||
static int num_active_zones;
|
||||
static int update_index; // 2^31 at 100fps = 280 days
|
||||
static double last_update_time;
|
||||
static Prof_Report_Mode displayed_quantity;
|
||||
|
||||
#define FRAME_TIME_INITIAL 0.001
|
||||
|
||||
static int history_index;
|
||||
static int display_frame;
|
||||
static int slot = 1;
|
||||
|
||||
static void clear(History_Scalar *s)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
||||
s->values[i] = 0;
|
||||
s->variances[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void update(History_Scalar *s, double new_value, double *k_array)
|
||||
{
|
||||
int i;
|
||||
|
||||
double new_variance = new_value * new_value;
|
||||
|
||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
||||
double k = k_array[i];
|
||||
s->values[i] = s->values[i] * k + new_value * (1 - k);
|
||||
s->variances[i] = s->variances[i] * k + new_variance * (1 - k);
|
||||
}
|
||||
#ifdef Prof_CALL_HISTORY
|
||||
s->history[history_index] = (float) new_value;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void eternity_set(History_Scalar *s, double new_value)
|
||||
{
|
||||
double new_variance = new_value * new_value;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
||||
s->values[i] = new_value;
|
||||
s->variances[i] = new_variance;
|
||||
}
|
||||
#ifdef Prof_CALL_HISTORY
|
||||
s->history[history_index] = (float) new_value;
|
||||
#endif
|
||||
}
|
||||
|
||||
static double get_value(History_Scalar *s)
|
||||
{
|
||||
#ifdef Prof_CALL_HISTORY
|
||||
if (display_frame) {
|
||||
return s->history[(history_index - display_frame + NUM_FRAME_SLOTS) % NUM_FRAME_SLOTS];
|
||||
}
|
||||
#endif
|
||||
return s->values[slot];
|
||||
}
|
||||
|
||||
void Prof_init_highlevel()
|
||||
{
|
||||
int j;
|
||||
|
||||
update_index = 0;
|
||||
last_update_time = 0;
|
||||
|
||||
times_to_reach_90_percent[0] = 0.1f;
|
||||
times_to_reach_90_percent[1] = 0.8f;
|
||||
times_to_reach_90_percent[2] = 2.5f;
|
||||
|
||||
displayed_quantity = Prof_SELF_TIME;
|
||||
|
||||
clear(&frame_time);
|
||||
|
||||
for (j = 0; j < NUM_PROFILE_TRACKER_HISTORY_SLOTS; j++) {
|
||||
frame_time.values[j] = FRAME_TIME_INITIAL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef Prof_ENABLED
|
||||
static Prof_Zone *expand = &Prof_region__global;
|
||||
#else
|
||||
static Prof_Zone *expand = NULL;
|
||||
#endif
|
||||
|
||||
Prof_extern_C void Prof_set_report_mode(Prof_Report_Mode desired)
|
||||
{
|
||||
displayed_quantity = desired;
|
||||
}
|
||||
|
||||
// visit all Prof_Zone_Stacks
|
||||
extern void Prof_traverse(void (*func)(Prof_Zone_Stack *c));
|
||||
|
||||
static void propogate_stack(Prof_Zone_Stack *c)
|
||||
{
|
||||
Prof_Zone_Stack *p = c;
|
||||
|
||||
// propogate times up the stack for hierarchical
|
||||
// times, but watch out for recursion
|
||||
|
||||
while (p->zone) {
|
||||
if (!p->zone->visited) {
|
||||
p->total_hier_ticks += c->total_self_ticks;
|
||||
p->zone->visited = 1;
|
||||
}
|
||||
p = p->parent;
|
||||
}
|
||||
p = c;
|
||||
while (p->zone) {
|
||||
p->zone->visited = 0;
|
||||
p = p->parent;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_stack(Prof_Zone_Stack *c)
|
||||
{
|
||||
c->total_hier_ticks = 0;
|
||||
c->total_self_ticks = 0;
|
||||
c->total_entry_count = 0;
|
||||
}
|
||||
|
||||
static double sum;
|
||||
static void sum_times(Prof_Zone_Stack *c)
|
||||
{
|
||||
sum += c->total_self_ticks;
|
||||
}
|
||||
|
||||
static double timestamps_to_seconds;
|
||||
static void update_history(Prof_Zone_Stack *c)
|
||||
{
|
||||
double self_time, hier_time, entry_count;
|
||||
|
||||
Profile_Tracker_Data_Record *record = (Profile_Tracker_Data_Record *) c->highlevel;
|
||||
Prof_Zone *z = c->zone;
|
||||
|
||||
if (record == NULL) {
|
||||
record = (Profile_Tracker_Data_Record *) malloc(sizeof(*record));
|
||||
c->highlevel = (void *) record;
|
||||
clear(&record->entry_count);
|
||||
clear(&record->self_time);
|
||||
clear(&record->hierarchical_time);
|
||||
record->max_recursion = 0;
|
||||
}
|
||||
|
||||
if (c->recursion_depth > record->max_recursion)
|
||||
record->max_recursion = c->recursion_depth;
|
||||
|
||||
self_time = c->total_self_ticks * timestamps_to_seconds;
|
||||
hier_time = c->total_hier_ticks * timestamps_to_seconds;
|
||||
entry_count = c->total_entry_count;
|
||||
|
||||
if (update_index < NUM_THROWAWAY_UPDATES) {
|
||||
eternity_set(&record->entry_count, entry_count);
|
||||
eternity_set(&record->self_time, self_time);
|
||||
eternity_set(&record->hierarchical_time, hier_time);
|
||||
} else {
|
||||
update(&record->self_time, self_time, precomputed_factors);
|
||||
update(&record->hierarchical_time, hier_time, precomputed_factors);
|
||||
update(&record->entry_count, entry_count, precomputed_factors);
|
||||
}
|
||||
|
||||
#ifdef Prof_ZONE_HISTORY
|
||||
* ((float *) z->highlevel) += (float) self_time;
|
||||
#endif
|
||||
}
|
||||
|
||||
const double SPEEDSTEP_DETECTION_RATIO = 0.08;
|
||||
static int speedstep_warning;
|
||||
|
||||
Prof_extern_C void Prof_update(int record_data)
|
||||
{
|
||||
#ifdef Prof_ENABLED
|
||||
Prof_Begin(iprof_update)
|
||||
|
||||
static History_Scalar integer_timestamps_per_second;
|
||||
static Prof_Int64 last_integer_timestamp;
|
||||
static Prof_Int64 current_integer_timestamp;
|
||||
|
||||
int i;
|
||||
double now, dt;
|
||||
Prof_Int64 timestamp_delta;
|
||||
double timestamps_per_second;
|
||||
|
||||
assert(Prof_num_zones <= MAX_PROFILING_ZONES);
|
||||
|
||||
Prof_traverse(propogate_stack);
|
||||
|
||||
// Precompute the time factors
|
||||
|
||||
now = Prof_get_time();
|
||||
|
||||
if (update_index == 0) {
|
||||
dt = FRAME_TIME_INITIAL;
|
||||
} else {
|
||||
dt = now - last_update_time;
|
||||
if (dt == 0) dt = FRAME_TIME_INITIAL;
|
||||
}
|
||||
|
||||
last_update_time = now;
|
||||
|
||||
for (i = 0; i < NUM_PROFILE_TRACKER_HISTORY_SLOTS; i++) {
|
||||
precomputed_factors[i] = pow(0.1f, dt / times_to_reach_90_percent[i]);
|
||||
}
|
||||
|
||||
precomputed_factors[0] = 0; // instantaneous.
|
||||
|
||||
Prof_get_timestamp(¤t_integer_timestamp);
|
||||
if (update_index == 0) {
|
||||
sum = 0;
|
||||
Prof_traverse(sum_times);
|
||||
if (sum == 0) sum = 1;
|
||||
timestamp_delta = (Prof_Int64) sum;
|
||||
} else {
|
||||
timestamp_delta = current_integer_timestamp - last_integer_timestamp;
|
||||
if (timestamp_delta == 0) timestamp_delta = 1;
|
||||
}
|
||||
|
||||
last_integer_timestamp = current_integer_timestamp;
|
||||
timestamps_per_second = (double) timestamp_delta / dt;
|
||||
|
||||
if (update_index < NUM_THROWAWAY_UPDATES) {
|
||||
eternity_set(&integer_timestamps_per_second, timestamps_per_second);
|
||||
} else {
|
||||
update(&integer_timestamps_per_second, timestamps_per_second, precomputed_factors);
|
||||
}
|
||||
|
||||
{
|
||||
const int ss_slot = 1;
|
||||
double ss_val, ss_variance, ss_stdev, ss_ratio;
|
||||
|
||||
ss_val = integer_timestamps_per_second.values[ss_slot];
|
||||
ss_variance = integer_timestamps_per_second.variances[ss_slot] - ss_val*ss_val;
|
||||
ss_stdev = sqrt(fabs(ss_variance));
|
||||
ss_ratio;
|
||||
if (ss_val) {
|
||||
ss_ratio = ss_stdev / fabs(ss_val);
|
||||
} else {
|
||||
ss_ratio = 0;
|
||||
}
|
||||
|
||||
speedstep_warning = (ss_ratio > SPEEDSTEP_DETECTION_RATIO);
|
||||
}
|
||||
|
||||
if (!record_data) {
|
||||
Prof_traverse(clear_stack);
|
||||
Prof_End
|
||||
return;
|
||||
}
|
||||
|
||||
if (timestamps_per_second) {
|
||||
timestamps_to_seconds = 1.0 / timestamps_per_second;
|
||||
} else {
|
||||
timestamps_to_seconds = 0;
|
||||
}
|
||||
|
||||
#ifdef Prof_ZONE_HISTORY
|
||||
for (i=0; i < Prof_num_zones; ++i) {
|
||||
Prof_zones[i]->highlevel = (void *) &zone_history[i][history_index];
|
||||
zone_history[i][history_index] = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
Prof_traverse(update_history);
|
||||
|
||||
update(&frame_time, dt, precomputed_factors);
|
||||
|
||||
++update_index;
|
||||
history_index = (history_index + 1) % NUM_FRAME_SLOTS;
|
||||
|
||||
Prof_traverse(clear_stack);
|
||||
|
||||
Prof_End
|
||||
#endif // Prof_ENABLED
|
||||
}
|
||||
|
||||
static Prof_Report *allocate_buffer(int n)
|
||||
{
|
||||
int i;
|
||||
Prof_Report *pob = (Prof_Report *) malloc(sizeof(*pob));
|
||||
pob->num_record = n;
|
||||
pob->record = (Prof_Report_Record *) malloc(sizeof(*pob->record) * pob->num_record);
|
||||
pob->title[0] = pob->title[1] = NULL;
|
||||
for (i=0; i < NUM_TITLE; ++i)
|
||||
pob->title[i] = NULL;
|
||||
for (i=0; i < NUM_HEADER; ++i)
|
||||
pob->header[i] = NULL;
|
||||
for (i=0; i < n; ++i) {
|
||||
pob->record[i].values[0] = 0;
|
||||
pob->record[i].values[1] = 0;
|
||||
pob->record[i].values[2] = 0;
|
||||
pob->record[i].values[3] = 0;
|
||||
pob->record[i].value_flag = 0;
|
||||
pob->record[i].heat = 0;
|
||||
pob->record[i].indent = 0;
|
||||
pob->record[i].number = 0;
|
||||
}
|
||||
return pob;
|
||||
}
|
||||
|
||||
static int uncounted;
|
||||
static Prof_Recursion_Mode recurse = Prof_FLATTEN_RECURSION;
|
||||
|
||||
static void propogate_to_zone(Prof_Zone_Stack *c)
|
||||
{
|
||||
Prof_Zone *z = c->zone;
|
||||
Profile_Tracker_Data_Record *d = (Profile_Tracker_Data_Record *) c->highlevel;
|
||||
Prof_Report_Record *r;
|
||||
|
||||
#if 1
|
||||
r = (Prof_Report_Record *) z->highlevel;
|
||||
#else
|
||||
if (recurse == Prof_FLATTEN_RECURSION)
|
||||
r = (Prof_Report_Record *) z->highlevel;
|
||||
else
|
||||
r = ((Prof_Report_Record **) z->highlevel)[c->recursion_depth];
|
||||
#endif
|
||||
|
||||
if (d) {
|
||||
double t;
|
||||
|
||||
r->values[0] += 1000 * get_value(&d->self_time);
|
||||
r->values[1] += 1000 * get_value(&d->hierarchical_time);
|
||||
r->values[2] += get_value(&d->entry_count);
|
||||
|
||||
// arbitrary determination for how low a moving average
|
||||
// has to go to reach 0
|
||||
if (get_value(&d->entry_count) > INT_ZERO_THRESHHOLD) {
|
||||
if (d->max_recursion > r->number)
|
||||
r->number = d->max_recursion;
|
||||
if (c->parent->zone)
|
||||
((Prof_Report_Record *) c->parent->zone->highlevel)->prefix = '+';
|
||||
}
|
||||
|
||||
#ifdef Prof_CALL_HISTORY
|
||||
if (display_frame) return; // no variances when examining history
|
||||
#endif
|
||||
if (displayed_quantity == Prof_HIERARCHICAL_TIME) {
|
||||
t = d->hierarchical_time.variances[slot];
|
||||
} else {
|
||||
t = d->self_time.variances[slot];
|
||||
}
|
||||
|
||||
t = 1000 * 1000 * t;
|
||||
|
||||
if (r->heat == 0)
|
||||
r->heat = t;
|
||||
else
|
||||
r->heat = r->heat + t + 2 * sqrt(r->heat * t);
|
||||
} else {
|
||||
++uncounted;
|
||||
}
|
||||
}
|
||||
|
||||
static void propogate_expanded(Prof_Zone_Stack *c)
|
||||
{
|
||||
Profile_Tracker_Data_Record *d = (Profile_Tracker_Data_Record *) c->highlevel;
|
||||
if (d == NULL) {
|
||||
++uncounted;
|
||||
return;
|
||||
}
|
||||
if (c->parent->zone && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD) {
|
||||
((Prof_Report_Record *) c->parent->zone->highlevel)[0].prefix = '+';
|
||||
((Prof_Report_Record *) c->parent->zone->highlevel)[1].prefix = '+';
|
||||
((Prof_Report_Record *) c->parent->zone->highlevel)[2].prefix = '+';
|
||||
}
|
||||
|
||||
if (c->zone == expand) {
|
||||
Prof_Report_Record *r = (Prof_Report_Record *) expand->highlevel;
|
||||
// accumulate this time to ourselves
|
||||
r[2].values[0] += 1000 * get_value(&d->self_time);
|
||||
r[2].values[1] += 1000 * get_value(&d->hierarchical_time);
|
||||
r[2].values[2] += get_value(&d->entry_count);
|
||||
if (d->max_recursion > r[2].number && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD)
|
||||
r[2].number = d->max_recursion;
|
||||
// propogate it to the parents
|
||||
if (c->parent->zone) {
|
||||
r = (Prof_Report_Record *) c->parent->zone->highlevel;
|
||||
r[1].values[0] += 1000 * get_value(&d->self_time);
|
||||
r[1].values[1] += 1000 * get_value(&d->hierarchical_time);
|
||||
r[1].values[2] += get_value(&d->entry_count);
|
||||
d = (Profile_Tracker_Data_Record *) c->parent->highlevel;
|
||||
if (d->max_recursion > r[1].number && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD)
|
||||
r[1].number = d->max_recursion;
|
||||
}
|
||||
}
|
||||
|
||||
if (c->parent->zone == expand) {
|
||||
Prof_Report_Record *r = (Prof_Report_Record *) c->zone->highlevel;
|
||||
r[0].values[0] += 1000 * get_value(&d->self_time);
|
||||
r[0].values[1] += 1000 * get_value(&d->hierarchical_time);
|
||||
r[0].values[2] += get_value(&d->entry_count);
|
||||
if (d->max_recursion > r[0].number && get_value(&d->entry_count) > INT_ZERO_THRESHHOLD)
|
||||
r[0].number = d->max_recursion;
|
||||
}
|
||||
}
|
||||
|
||||
static double compute_heat(double variance, double value)
|
||||
{
|
||||
double factor, stdev;
|
||||
double fabs_value = fabs(value);
|
||||
const float VARIANCE_TOLERANCE_FACTOR = 0.5f;
|
||||
|
||||
variance = variance - value*value;
|
||||
if (variance < 0) variance = 0;
|
||||
stdev = sqrt(variance);
|
||||
|
||||
if (fabs_value < 0.000001) {
|
||||
return 0;
|
||||
} else {
|
||||
factor = (stdev / fabs_value) * (1.0f / VARIANCE_TOLERANCE_FACTOR);
|
||||
}
|
||||
|
||||
if (factor < 0) return 0;
|
||||
if (factor > 1) return 1;
|
||||
return factor;
|
||||
}
|
||||
|
||||
static int pob_compare(const void *p, const void *q)
|
||||
{
|
||||
double a = ((Prof_Report_Record *) p)->values[0];
|
||||
double b = ((Prof_Report_Record *) q)->values[0];
|
||||
|
||||
return (b < a) ? -1 : (b > a);
|
||||
}
|
||||
|
||||
static int pob_expand_compare(const void *p, const void *q)
|
||||
{
|
||||
Prof_Report_Record * a = (Prof_Report_Record *) p;
|
||||
Prof_Report_Record * b = (Prof_Report_Record *) q;
|
||||
|
||||
if (a->indent != b->indent) {
|
||||
if (a->indent == 5) return -1;
|
||||
if (b->indent == 5) return 1;
|
||||
if (a->indent == 3) return 1;
|
||||
if (b->indent == 3) return -1;
|
||||
return 0;
|
||||
}
|
||||
if (a->values[1] == b->values[1])
|
||||
return 0;
|
||||
|
||||
if (a->values[1] < b->values[1]) {
|
||||
if (a->indent == 5) return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (a->indent == 5) return 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int cursor;
|
||||
static int update_cursor;
|
||||
|
||||
Prof_Report *Prof_create_report(void)
|
||||
{
|
||||
double avg_frame_time,fps;
|
||||
char *displayed_quantity_name;
|
||||
int i,s;
|
||||
Prof_Report *pob;
|
||||
|
||||
if (displayed_quantity == Prof_CALL_GRAPH)
|
||||
s = 3;
|
||||
else
|
||||
s = 1;
|
||||
|
||||
pob = allocate_buffer(Prof_num_zones * s);
|
||||
for (i=0; i < Prof_num_zones; ++i) {
|
||||
Prof_Zone *z = Prof_zones[i];
|
||||
Prof_Report_Record *r = &pob->record[i*s];
|
||||
z->highlevel = (void *) r;
|
||||
if (displayed_quantity == Prof_CALL_GRAPH) {
|
||||
r[0].name = r[1].name = r[2].name = z->name;
|
||||
r[0].value_flag = 1 | 2 | 4;
|
||||
r[1].value_flag = 1 | 2 | 4;
|
||||
r[2].value_flag = 1 | 2 | 4;
|
||||
r[0].indent = 3;
|
||||
r[1].indent = 5;
|
||||
r[2].indent = 0;
|
||||
r[0].zone = r[1].zone = r[2].zone = (void *) z;
|
||||
r[0].prefix = r[1].prefix = r[2].prefix = 0;
|
||||
} else {
|
||||
r->value_flag = 1 | 2 | 4;
|
||||
r->name = z->name;
|
||||
r->zone = (void *) z;
|
||||
r->indent = 0;
|
||||
r->prefix = 0;
|
||||
}
|
||||
}
|
||||
|
||||
avg_frame_time = frame_time.values[slot];
|
||||
if (avg_frame_time == 0) avg_frame_time = 0.01f;
|
||||
fps = 1.0f / avg_frame_time;
|
||||
|
||||
displayed_quantity_name = "*error*";
|
||||
switch (displayed_quantity) {
|
||||
case Prof_SELF_TIME:
|
||||
displayed_quantity_name = "sort self";
|
||||
break;
|
||||
case Prof_HIERARCHICAL_TIME:
|
||||
displayed_quantity_name = "sort hier";
|
||||
break;
|
||||
case Prof_CALL_GRAPH:
|
||||
displayed_quantity_name = "sort hier";
|
||||
break;
|
||||
}
|
||||
|
||||
pob->title[0] = (char *) malloc(BUFSIZ);
|
||||
sprintf(pob->title[0],
|
||||
"%3.3lf ms/frame (fps: %3.2lf) %s",
|
||||
avg_frame_time * 1000, fps, displayed_quantity_name);
|
||||
|
||||
#ifdef Prof_CALL_HISTORY
|
||||
if (display_frame) {
|
||||
sprintf(pob->title[0] + strlen(pob->title[0]), " - %d frame%s ago",
|
||||
display_frame, display_frame == 1 ? "" : "s");
|
||||
} else {
|
||||
strcat(pob->title[0], " - current frame");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (speedstep_warning)
|
||||
pob->title[1] = _strdup("WARNING: SpeedStep-like timer inconsistencies detected. Results are unreliable!");
|
||||
|
||||
if (displayed_quantity == Prof_CALL_GRAPH) {
|
||||
Prof_Report_Record *r = (Prof_Report_Record *) expand->highlevel;
|
||||
int j=0;
|
||||
|
||||
Prof_traverse(propogate_expanded);
|
||||
|
||||
r[2].prefix = '-';
|
||||
|
||||
for (i=0; i < pob->num_record; ++i) {
|
||||
if (pob->record[i].values[0] || pob->record[i].values[1] || pob->record[i].values[2]) {
|
||||
pob->record[j] = pob->record[i];
|
||||
++j;
|
||||
}
|
||||
}
|
||||
pob->num_record = j;
|
||||
|
||||
qsort(pob->record, pob->num_record, sizeof(pob->record[0]), pob_expand_compare);
|
||||
|
||||
for (i=0; i < pob->num_record; ++i)
|
||||
if (pob->record[i].indent == 5)
|
||||
pob->record[i].indent = 3;
|
||||
} else {
|
||||
|
||||
uncounted = 0;
|
||||
Prof_traverse(propogate_to_zone);
|
||||
|
||||
for (i=0; i < Prof_num_zones; ++i) {
|
||||
if (displayed_quantity == Prof_HIERARCHICAL_TIME) {
|
||||
double t = pob->record[i].values[0];
|
||||
pob->record[i].values[0] = pob->record[i].values[1];
|
||||
pob->record[i].values[1] = t;
|
||||
}
|
||||
|
||||
pob->record[i].heat = compute_heat(pob->record[i].heat, pob->record[i].values[0]);
|
||||
}
|
||||
|
||||
qsort(pob->record, pob->num_record, sizeof(pob->record[0]), pob_compare);
|
||||
|
||||
}
|
||||
|
||||
if (update_cursor) {
|
||||
for (i=0; i < pob->num_record; ++i) {
|
||||
if (pob->record[i].zone == expand) {
|
||||
cursor = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
update_cursor = 0;
|
||||
}
|
||||
|
||||
pob->header[0] = _strdup("zone");
|
||||
if (displayed_quantity == Prof_HIERARCHICAL_TIME) {
|
||||
pob->header[1] = _strdup("hier");
|
||||
pob->header[2] = _strdup("self");
|
||||
} else {
|
||||
pob->header[1] = _strdup("self");
|
||||
pob->header[2] = _strdup("hier");
|
||||
}
|
||||
pob->header[3] = _strdup("count");
|
||||
|
||||
if (cursor < 0) cursor = 0;
|
||||
if (cursor >= pob->num_record) cursor = pob->num_record-1;
|
||||
pob->hilight = cursor;
|
||||
|
||||
return pob;
|
||||
}
|
||||
|
||||
void Prof_free_report(Prof_Report *z)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i < NUM_TITLE; ++i)
|
||||
if (z->title[i])
|
||||
free(z->title[i]);
|
||||
for (i=0; i < NUM_HEADER; ++i)
|
||||
if (z->header[i])
|
||||
free(z->header[i]);
|
||||
free(z->record);
|
||||
free(z);
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_move_cursor(int num)
|
||||
{
|
||||
cursor += num;
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_set_cursor(int num)
|
||||
{
|
||||
cursor = num;
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_select(void)
|
||||
{
|
||||
Prof_Report *b = Prof_create_report();
|
||||
if (b->hilight >= 0) {
|
||||
void *z = b->record[b->hilight].zone;
|
||||
if (z != NULL) {
|
||||
expand = (Prof_Zone *) z;
|
||||
displayed_quantity = Prof_CALL_GRAPH;
|
||||
}
|
||||
}
|
||||
Prof_free_report(b);
|
||||
update_cursor = 1;
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_select_parent(void)
|
||||
{
|
||||
int i;
|
||||
void *old = (void *) expand;
|
||||
Prof_Report *b = Prof_create_report();
|
||||
for (i=0; i < b->num_record; ++i) {
|
||||
if (b->record[i].indent == 0) break;
|
||||
if (b->record[i].zone == old) continue;
|
||||
expand = (Prof_Zone *) b->record[i].zone;
|
||||
}
|
||||
Prof_free_report(b);
|
||||
update_cursor = 1;
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_set_frame(int num)
|
||||
{
|
||||
if (num < 0) num = 0;
|
||||
if (num >= NUM_FRAME_SLOTS) num = NUM_FRAME_SLOTS-1;
|
||||
|
||||
display_frame = num;
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_move_frame(int delta)
|
||||
{
|
||||
// convert so negative delta = "into the past"
|
||||
Prof_set_frame(display_frame - delta);
|
||||
}
|
||||
|
||||
Prof_extern_C void Prof_set_smoothing(int x)
|
||||
{
|
||||
if (x <= 0) x = 0;
|
||||
if (x >= NUM_PROFILE_TRACKER_HISTORY_SLOTS)
|
||||
x = NUM_PROFILE_TRACKER_HISTORY_SLOTS-1;
|
||||
|
||||
slot = x;
|
||||
}
|
||||
|
||||
// currently does nothing
|
||||
Prof_extern_C void Prof_set_recursion(Prof_Recursion_Mode e)
|
||||
{
|
||||
recurse = e;
|
||||
}
|
||||
|
||||
static int id(Prof_Zone *z)
|
||||
{
|
||||
// hash the string so that the id is consistent from
|
||||
// run to run (rather than using the pointer itself which isn't)
|
||||
// @TODO: only compute this at zone init time?
|
||||
|
||||
unsigned int h = 0x55555555;
|
||||
char *n = z->name;
|
||||
|
||||
while (*n)
|
||||
h = (h << 5) + (h >> 27) + *n++;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
void Prof_graph(int num_frames, void (*callback)(int id, int x0, int x1, float *values, void *data), void *data)
|
||||
{
|
||||
#ifdef Prof_ZONE_HISTORY
|
||||
int i,h = history_index;
|
||||
if (num_frames > NUM_FRAME_SLOTS)
|
||||
num_frames = NUM_FRAME_SLOTS;
|
||||
|
||||
for (i=0; i < Prof_num_zones; ++i) {
|
||||
if (h >= num_frames) {
|
||||
callback(id(Prof_zones[i]), 0, num_frames, &zone_history[i][h-num_frames], data);
|
||||
} else {
|
||||
callback(id(Prof_zones[i]), num_frames - h, num_frames, &zone_history[i][0], data);
|
||||
callback(id(Prof_zones[i]), 0, num_frames-h, &zone_history[i][NUM_FRAME_SLOTS-(num_frames-h)], data);
|
||||
}
|
||||
}
|
||||
|
||||
// display frame "cursor"
|
||||
if (display_frame != 0) {
|
||||
float value[2] = { 2.0, 0 };
|
||||
callback(0, NUM_FRAME_SLOTS-1-display_frame, NUM_FRAME_SLOTS-1-display_frame, value, data);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
#define WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_EXTRA_LEAN
|
||||
#include <windows.h>
|
||||
#include <assert.h>
|
||||
|
||||
double Prof_get_time(void)
|
||||
{
|
||||
LARGE_INTEGER freq;
|
||||
LARGE_INTEGER time;
|
||||
|
||||
BOOL ok = QueryPerformanceFrequency(&freq);
|
||||
assert(ok == TRUE);
|
||||
|
||||
freq.QuadPart = freq.QuadPart;
|
||||
|
||||
ok = QueryPerformanceCounter(&time);
|
||||
assert(ok == TRUE);
|
||||
|
||||
return time.QuadPart / (double) freq.QuadPart;
|
||||
}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef Prof_INC_PROF_WIN32_H
|
||||
#define Prof_INC_PROF_WIN32_H
|
||||
|
||||
typedef __int64 Prof_Int64;
|
||||
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#elif _MSC_VER >= 1200
|
||||
__forceinline
|
||||
#else
|
||||
static
|
||||
#endif
|
||||
void Prof_get_timestamp(Prof_Int64 *result)
|
||||
{
|
||||
__asm {
|
||||
rdtsc;
|
||||
mov ebx, result
|
||||
mov [ebx], eax
|
||||
mov [ebx+4], edx
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in a new issue