Profiler threading support, flame graph
Changed the profiler into a node based system for better data access, more overhead than the simple struct+depth info but can hold more detail and less post processing of data Profiler now also profiles threads Added some test profile tags The profiler window now also has a FlameGraph
This commit is contained in:
+94
-62
@@ -1,134 +1,166 @@
|
||||
using System.Diagnostics;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Nerfed.Runtime;
|
||||
|
||||
public struct ProfilerScope : IDisposable
|
||||
{
|
||||
public ProfilerScope(string label) {
|
||||
public ProfilerScope(string label)
|
||||
{
|
||||
Profiler.BeginSample(label);
|
||||
}
|
||||
|
||||
public void Dispose() {
|
||||
public void Dispose()
|
||||
{
|
||||
Profiler.EndSample();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Profiler
|
||||
{
|
||||
public struct ProfileRecord
|
||||
public class Frame(uint frameCount)
|
||||
{
|
||||
public string label;
|
||||
public long startTime;
|
||||
public long endTime;
|
||||
public int depth;
|
||||
public uint FrameCount { get; } = frameCount;
|
||||
public long StartTime { get; } = Stopwatch.GetTimestamp();
|
||||
public long EndTime { get; private set; }
|
||||
|
||||
public readonly double ElapsedMilliseconds()
|
||||
// Use a concurrent list to collect all thread root nodes per frame.
|
||||
public ConcurrentBag<ScopeNode> RootNodes = new ConcurrentBag<ScopeNode>();
|
||||
|
||||
internal void End()
|
||||
{
|
||||
long elapsedTicks = endTime - startTime;
|
||||
return ((double)(elapsedTicks * 1000)) / Stopwatch.Frequency;
|
||||
}
|
||||
}
|
||||
|
||||
public class FrameData
|
||||
{
|
||||
public uint frame;
|
||||
public readonly List<ProfileRecord> records = new List<ProfileRecord>();
|
||||
public long startTime;
|
||||
public long endTime;
|
||||
|
||||
public FrameData(uint frame, long startTime)
|
||||
{
|
||||
this.frame = frame;
|
||||
this.startTime = startTime;
|
||||
EndTime = Stopwatch.GetTimestamp();
|
||||
}
|
||||
|
||||
public double ElapsedMilliseconds()
|
||||
{
|
||||
long elapsedTicks = endTime - startTime;
|
||||
long elapsedTicks = EndTime - StartTime;
|
||||
return ((double)(elapsedTicks * 1000)) / Stopwatch.Frequency;
|
||||
}
|
||||
}
|
||||
|
||||
public class ScopeNode(string label)
|
||||
{
|
||||
public string Label { get; } = label;
|
||||
public long StartTime { get; private set; } = Stopwatch.GetTimestamp(); // Start time in ticks
|
||||
public long EndTime { get; private set; }
|
||||
public int ManagedThreadId { get; } = Environment.CurrentManagedThreadId;
|
||||
public List<ScopeNode> Children { get; } = new List<ScopeNode>();
|
||||
|
||||
internal void End()
|
||||
{
|
||||
EndTime = Stopwatch.GetTimestamp(); // End time in ticks
|
||||
}
|
||||
|
||||
public double ElapsedMilliseconds()
|
||||
{
|
||||
return ((double)(EndTime - StartTime)) * 1000 / Stopwatch.Frequency; // Convert ticks to ms
|
||||
}
|
||||
|
||||
// Add a child node (used for nested scopes)
|
||||
internal ScopeNode AddChild(string label)
|
||||
{
|
||||
ScopeNode child = new ScopeNode(label);
|
||||
Children.Add(child);
|
||||
return child;
|
||||
}
|
||||
}
|
||||
|
||||
private const int maxFrames = 128;
|
||||
|
||||
public static readonly BoundedQueue<FrameData> frames = new(maxFrames);
|
||||
public static bool recording = true;
|
||||
public static bool IsRecording { get; private set; } = true;
|
||||
|
||||
private static readonly Stopwatch stopwatch = new Stopwatch();
|
||||
private static FrameData currentFrame = null;
|
||||
private static uint currentFrameIndex = 0;
|
||||
private static int currentDepth = 0;
|
||||
// Store only the last x amount of frames in memory.
|
||||
public static readonly BoundedQueue<Frame> Frames = new(maxFrames);
|
||||
|
||||
static Profiler()
|
||||
// Use ThreadLocal to store a stack of ScopeNodes per thread and enable tracking of thread-local values.
|
||||
private static readonly ThreadLocal<Stack<ScopeNode>> threadLocalScopes = new ThreadLocal<Stack<ScopeNode>>(() => new Stack<ScopeNode>(), true);
|
||||
|
||||
private static Frame currentFrame = null;
|
||||
private static uint frameCount = 0;
|
||||
|
||||
public static void SetActive(bool isRecording)
|
||||
{
|
||||
stopwatch.Start();
|
||||
IsRecording = isRecording;
|
||||
}
|
||||
|
||||
[Conditional("PROFILING")]
|
||||
public static void BeginFrame()
|
||||
{
|
||||
if (!recording)
|
||||
if (!IsRecording)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
currentFrame = new FrameData(currentFrameIndex, stopwatch.ElapsedTicks);
|
||||
currentDepth = 0;
|
||||
currentFrameIndex++;
|
||||
currentFrame = new Frame(frameCount);
|
||||
}
|
||||
|
||||
[Conditional("PROFILING")]
|
||||
public static void EndFrame()
|
||||
{
|
||||
if (!recording)
|
||||
if (!IsRecording)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
currentFrame.endTime = stopwatch.ElapsedTicks;
|
||||
frames.Enqueue(currentFrame);
|
||||
foreach (Stack<ScopeNode> scopes in threadLocalScopes.Values)
|
||||
{
|
||||
if (scopes.Count > 0)
|
||||
{
|
||||
// Pop the left over root nodes.
|
||||
ScopeNode currentScope = scopes.Pop();
|
||||
currentScope.End();
|
||||
}
|
||||
|
||||
// Clean up the thread-local stack to ensure it's empty for the next frame.
|
||||
scopes.Clear();
|
||||
}
|
||||
|
||||
currentFrame.End();
|
||||
Frames.Enqueue(currentFrame);
|
||||
frameCount++;
|
||||
}
|
||||
|
||||
[Conditional("PROFILING")]
|
||||
public static void BeginSample(string label)
|
||||
{
|
||||
if (!recording)
|
||||
if (!IsRecording)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
ProfileRecord record = new ProfileRecord
|
||||
Stack<ScopeNode> scopes = threadLocalScopes.Value; // Get the stack for the current thread
|
||||
|
||||
if (scopes.Count == 0)
|
||||
{
|
||||
label = label,
|
||||
startTime = stopwatch.ElapsedTicks,
|
||||
depth = currentDepth,
|
||||
};
|
||||
currentFrame.records.Add(record);
|
||||
//Log.Info($"{record.label} {record.depth} | {record.startTime}");
|
||||
currentDepth++; // Increase depth for nested scopes
|
||||
// First scope for this thread (new root for this thread)
|
||||
ScopeNode rootScopeNode = new ScopeNode($"Thread-{Environment.CurrentManagedThreadId}");
|
||||
scopes.Push(rootScopeNode);
|
||||
currentFrame.RootNodes.Add(rootScopeNode); // Add root node to the frame list
|
||||
}
|
||||
|
||||
// Create a new child under the current top of the stack
|
||||
ScopeNode newScope = scopes.Peek().AddChild(label);
|
||||
|
||||
scopes.Push(newScope); // Push new scope to the thread's stack
|
||||
}
|
||||
|
||||
[Conditional("PROFILING")]
|
||||
public static void EndSample()
|
||||
{
|
||||
if (!recording)
|
||||
if (!IsRecording)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
currentDepth--; // Decrease depth when exiting a scope
|
||||
Stack<ScopeNode> scopes = threadLocalScopes.Value;
|
||||
|
||||
// Find the last uncompleted record at the current depth and set the end time
|
||||
for (int i = currentFrame.records.Count - 1; i >= 0; i--)
|
||||
if (scopes.Count > 0)
|
||||
{
|
||||
if (currentFrame.records[i].endTime == 0)
|
||||
{
|
||||
ProfileRecord record = currentFrame.records[i];
|
||||
record.endTime = stopwatch.ElapsedTicks;
|
||||
currentFrame.records[i] = record; // Assign back to the list
|
||||
//Log.Info($"{record.label} | {record.depth} | {record.endTime}");
|
||||
break;
|
||||
}
|
||||
// Only pop if this is not the root node.
|
||||
//ScopeNode currentScope = scopes.Count > 1 ? scopes.Pop() : scopes.Peek();
|
||||
ScopeNode currentScope = scopes.Pop();
|
||||
currentScope.End();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user