Profiler threading support, flame graph

Changed the profiler into a node based system for better data access, more overhead than the simple struct+depth info but can hold more detail and less post processing of data
Profiler now also profiles threads
Added some test profile tags
The profiler window now also has a FlameGraph
This commit is contained in:
max 2024-10-20 23:17:41 +02:00
parent 2c84e650d6
commit 567714a52d
4 changed files with 371 additions and 101 deletions

View File

@ -1,7 +1,6 @@
using ImGuiNET;
using MoonTools.ECS;
using Nerfed.Runtime;
using System;
namespace Nerfed.Editor.Systems
{
@ -11,7 +10,9 @@ internal class EditorProfilerWindow : MoonTools.ECS.System
const ImGuiTreeNodeFlags treeNodeFlags = ImGuiTreeNodeFlags.SpanAllColumns;
const ImGuiTreeNodeFlags treeNodeLeafFlags = ImGuiTreeNodeFlags.SpanAllColumns | ImGuiTreeNodeFlags.Leaf | ImGuiTreeNodeFlags.NoTreePushOnOpen;
int frame = 0;
private int selectedFrame = 0;
private int previousSelectedFrame = -1;
private IOrderedEnumerable<KeyValuePair<string, (double ms, uint calls)>> orderedCombinedData = null;
public EditorProfilerWindow(World world) : base(world)
{
@ -19,7 +20,7 @@ public EditorProfilerWindow(World world) : base(world)
public override void Update(TimeSpan delta)
{
if (Profiler.frames.Count <= 0)
if (Profiler.Frames.Count <= 0)
{
return;
}
@ -27,70 +28,116 @@ public override void Update(TimeSpan delta)
ImGui.Begin("Profiler");
ImGui.BeginChild("Toolbar", new System.Numerics.Vector2(0, 0), ImGuiChildFlags.AutoResizeY);
if (ImGui.RadioButton("Recording", Profiler.recording))
if (ImGui.RadioButton("Recording", Profiler.IsRecording))
{
Profiler.recording = !Profiler.recording;
Profiler.SetActive(!Profiler.IsRecording);
}
ImGui.SameLine();
if (Profiler.recording)
if (Profiler.IsRecording)
{
frame = Profiler.frames.Count - 1;
// Select last frame when recording to see latest frame data.
selectedFrame = Profiler.Frames.Count - 1;
}
if (ImGui.SliderInt(string.Empty, ref frame, 0, Profiler.frames.Count - 1))
if (ImGui.SliderInt(string.Empty, ref selectedFrame, 0, Profiler.Frames.Count - 1))
{
Profiler.recording = false;
// Stop recording when browsing frames.
Profiler.SetActive(false);
}
Profiler.FrameData frameData = Profiler.frames.ElementAt(frame);
double ms = frameData.ElapsedMilliseconds();
Profiler.Frame frame = Profiler.Frames.ElementAt(selectedFrame);
double ms = frame.ElapsedMilliseconds();
double s = 1000;
ImGui.Text($"Frame: {frameData.frame} ({ms:0.000} ms | {(s / ms):0} fps)");
ImGui.Text($"Frame: {frame.FrameCount} ({ms:0.000} ms | {(s / ms):0} fps)");
ImGui.EndChild();
if (!Profiler.IsRecording) {
if (previousSelectedFrame != selectedFrame)
{
previousSelectedFrame = selectedFrame;
orderedCombinedData = CalculateCombinedData(frame);
}
DrawFlameGraph(frame);
DrawHierachy(frame);
ImGui.SameLine();
DrawCombined(orderedCombinedData);
}
ImGui.End();
}
private static void DrawHierachy(Profiler.Frame frame)
{
if(frame == null)
{
return;
}
ImGui.BeginChild("Hierachy", new System.Numerics.Vector2(150, 0), ImGuiChildFlags.ResizeX);
if (ImGui.BeginTable("ProfilerData", 2, tableFlags, new System.Numerics.Vector2(0, 0)))
if (ImGui.BeginTable("ProfilerData", 3, tableFlags, new System.Numerics.Vector2(0, 0)))
{
ImGui.TableSetupColumn("name", ImGuiTableColumnFlags.WidthStretch, 0.8f, 0);
ImGui.TableSetupColumn("thread", ImGuiTableColumnFlags.WidthStretch, 0.2f, 1);
ImGui.TableSetupColumn("ms", ImGuiTableColumnFlags.WidthStretch, 0.2f, 1);
ImGui.TableSetupScrollFreeze(0, 1); // Make row always visible
ImGui.TableHeadersRow();
foreach (Profiler.ProfileRecord record in frameData.records)
foreach (Profiler.ScopeNode node in frame.RootNodes)
{
ImGui.TableNextRow();
ImGui.TableNextColumn();
string indentation = new string(' ', record.depth); // Indentation based on depth
ImGui.Text($"{indentation}{record.label}");
ImGui.TableNextColumn();
ImGui.Text($"{record.ElapsedMilliseconds():0.000}");
DrawHierachyNode(node);
}
ImGui.EndTable();
}
ImGui.EndChild();
}
ImGui.SameLine();
ImGui.BeginChild("Combined", new System.Numerics.Vector2(0, 0));
// Gather combined data.
Dictionary<string, (double ms, uint calls)> combinedRecordData = new Dictionary<string, (double ms, uint calls)>(128);
foreach (Profiler.ProfileRecord record in frameData.records)
private static void DrawHierachyNode(Profiler.ScopeNode node)
{
if (combinedRecordData.TryGetValue(record.label, out (double ms, uint calls) combined))
{
combinedRecordData[record.label] = (combined.ms + record.ElapsedMilliseconds(), combined.calls + 1);
ImGui.TableNextRow();
ImGui.TableNextColumn();
bool isOpen = false;
bool isLeaf = node.Children.Count == 0;
if (isLeaf) {
ImGui.TreeNodeEx(node.Label, treeNodeLeafFlags);
}
else
{
combinedRecordData.Add(record.label, (record.ElapsedMilliseconds(), 1));
isOpen = ImGui.TreeNodeEx(node.Label, treeNodeFlags);
}
ImGui.TableNextColumn();
ImGui.Text($"{node.ManagedThreadId}");
ImGui.TableNextColumn();
ImGui.Text($"{node.ElapsedMilliseconds():0.000}");
if (isOpen)
{
for (int i = 0; i < node.Children.Count; i++)
{
DrawHierachyNode(node.Children[i]);
}
ImGui.TreePop();
}
}
IOrderedEnumerable<KeyValuePair<string, (double ms, uint calls)>> orderedCombinedData = combinedRecordData.OrderByDescending(x => x.Value.ms);
private static void DrawCombined(in IOrderedEnumerable<KeyValuePair<string, (double ms, uint calls)>> orderedCombinedData)
{
if(orderedCombinedData == null)
{
return;
}
ImGui.BeginChild("Combined", new System.Numerics.Vector2(0, 0));
if (ImGui.BeginTable("ProfilerCombinedData", 3, tableFlags, new System.Numerics.Vector2(0, 0)))
{
@ -115,12 +162,43 @@ public override void Update(TimeSpan delta)
}
ImGui.EndChild();
ImGui.End();
}
private void Draw()
private static IOrderedEnumerable<KeyValuePair<string, (double ms, uint calls)>> CalculateCombinedData(Profiler.Frame frame)
{
Dictionary<string, (double ms, uint calls)> combinedRecordData = new Dictionary<string, (double ms, uint calls)>(128);
foreach (Profiler.ScopeNode node in frame.RootNodes)
{
CalculateCombinedData(node, in combinedRecordData);
}
return combinedRecordData.OrderByDescending(x => x.Value.ms);
}
private static void CalculateCombinedData(Profiler.ScopeNode node, in Dictionary<string, (double ms, uint calls)> combinedRecordData)
{
if (combinedRecordData.TryGetValue(node.Label, out (double ms, uint calls) combined))
{
combinedRecordData[node.Label] = (combined.ms + node.ElapsedMilliseconds(), combined.calls + 1);
}
else
{
combinedRecordData.Add(node.Label, (node.ElapsedMilliseconds(), 1));
}
for (int i = 0; i < node.Children.Count; i++)
{
CalculateCombinedData(node.Children[i], combinedRecordData);
}
}
private static void DrawFlameGraph(Profiler.Frame frame)
{
if (frame == null)
{
return;
}
ProfilerVisualizer.RenderFlameGraph(frame);
}
}
}

View File

@ -1,134 +1,166 @@
using System.Diagnostics;
using System.Collections.Concurrent;
using System.Diagnostics;
namespace Nerfed.Runtime;
public struct ProfilerScope : IDisposable
{
public ProfilerScope(string label) {
public ProfilerScope(string label)
{
Profiler.BeginSample(label);
}
public void Dispose() {
public void Dispose()
{
Profiler.EndSample();
}
}
public static class Profiler
{
public struct ProfileRecord
public class Frame(uint frameCount)
{
public string label;
public long startTime;
public long endTime;
public int depth;
public uint FrameCount { get; } = frameCount;
public long StartTime { get; } = Stopwatch.GetTimestamp();
public long EndTime { get; private set; }
public readonly double ElapsedMilliseconds()
{
long elapsedTicks = endTime - startTime;
return ((double)(elapsedTicks * 1000)) / Stopwatch.Frequency;
}
}
// Use a concurrent list to collect all thread root nodes per frame.
public ConcurrentBag<ScopeNode> RootNodes = new ConcurrentBag<ScopeNode>();
public class FrameData
internal void End()
{
public uint frame;
public readonly List<ProfileRecord> records = new List<ProfileRecord>();
public long startTime;
public long endTime;
public FrameData(uint frame, long startTime)
{
this.frame = frame;
this.startTime = startTime;
EndTime = Stopwatch.GetTimestamp();
}
public double ElapsedMilliseconds()
{
long elapsedTicks = endTime - startTime;
long elapsedTicks = EndTime - StartTime;
return ((double)(elapsedTicks * 1000)) / Stopwatch.Frequency;
}
}
public class ScopeNode(string label)
{
public string Label { get; } = label;
public long StartTime { get; private set; } = Stopwatch.GetTimestamp(); // Start time in ticks
public long EndTime { get; private set; }
public int ManagedThreadId { get; } = Environment.CurrentManagedThreadId;
public List<ScopeNode> Children { get; } = new List<ScopeNode>();
internal void End()
{
EndTime = Stopwatch.GetTimestamp(); // End time in ticks
}
public double ElapsedMilliseconds()
{
return ((double)(EndTime - StartTime)) * 1000 / Stopwatch.Frequency; // Convert ticks to ms
}
// Add a child node (used for nested scopes)
internal ScopeNode AddChild(string label)
{
ScopeNode child = new ScopeNode(label);
Children.Add(child);
return child;
}
}
private const int maxFrames = 128;
public static readonly BoundedQueue<FrameData> frames = new(maxFrames);
public static bool recording = true;
public static bool IsRecording { get; private set; } = true;
private static readonly Stopwatch stopwatch = new Stopwatch();
private static FrameData currentFrame = null;
private static uint currentFrameIndex = 0;
private static int currentDepth = 0;
// Store only the last x amount of frames in memory.
public static readonly BoundedQueue<Frame> Frames = new(maxFrames);
static Profiler()
// Use ThreadLocal to store a stack of ScopeNodes per thread and enable tracking of thread-local values.
private static readonly ThreadLocal<Stack<ScopeNode>> threadLocalScopes = new ThreadLocal<Stack<ScopeNode>>(() => new Stack<ScopeNode>(), true);
private static Frame currentFrame = null;
private static uint frameCount = 0;
public static void SetActive(bool isRecording)
{
stopwatch.Start();
IsRecording = isRecording;
}
[Conditional("PROFILING")]
public static void BeginFrame()
{
if (!recording)
if (!IsRecording)
{
return;
}
currentFrame = new FrameData(currentFrameIndex, stopwatch.ElapsedTicks);
currentDepth = 0;
currentFrameIndex++;
currentFrame = new Frame(frameCount);
}
[Conditional("PROFILING")]
public static void EndFrame()
{
if (!recording)
if (!IsRecording)
{
return;
}
currentFrame.endTime = stopwatch.ElapsedTicks;
frames.Enqueue(currentFrame);
foreach (Stack<ScopeNode> scopes in threadLocalScopes.Values)
{
if (scopes.Count > 0)
{
// Pop the left over root nodes.
ScopeNode currentScope = scopes.Pop();
currentScope.End();
}
// Clean up the thread-local stack to ensure it's empty for the next frame.
scopes.Clear();
}
currentFrame.End();
Frames.Enqueue(currentFrame);
frameCount++;
}
[Conditional("PROFILING")]
public static void BeginSample(string label)
{
if (!recording)
if (!IsRecording)
{
return;
}
ProfileRecord record = new ProfileRecord
Stack<ScopeNode> scopes = threadLocalScopes.Value; // Get the stack for the current thread
if (scopes.Count == 0)
{
label = label,
startTime = stopwatch.ElapsedTicks,
depth = currentDepth,
};
currentFrame.records.Add(record);
//Log.Info($"{record.label} {record.depth} | {record.startTime}");
currentDepth++; // Increase depth for nested scopes
// First scope for this thread (new root for this thread)
ScopeNode rootScopeNode = new ScopeNode($"Thread-{Environment.CurrentManagedThreadId}");
scopes.Push(rootScopeNode);
currentFrame.RootNodes.Add(rootScopeNode); // Add root node to the frame list
}
// Create a new child under the current top of the stack
ScopeNode newScope = scopes.Peek().AddChild(label);
scopes.Push(newScope); // Push new scope to the thread's stack
}
[Conditional("PROFILING")]
public static void EndSample()
{
if (!recording)
if (!IsRecording)
{
return;
}
currentDepth--; // Decrease depth when exiting a scope
Stack<ScopeNode> scopes = threadLocalScopes.Value;
// Find the last uncompleted record at the current depth and set the end time
for (int i = currentFrame.records.Count - 1; i >= 0; i--)
if (scopes.Count > 0)
{
if (currentFrame.records[i].endTime == 0)
{
ProfileRecord record = currentFrame.records[i];
record.endTime = stopwatch.ElapsedTicks;
currentFrame.records[i] = record; // Assign back to the list
//Log.Info($"{record.label} | {record.depth} | {record.endTime}");
break;
}
// Only pop if this is not the root node.
//ScopeNode currentScope = scopes.Count > 1 ? scopes.Pop() : scopes.Peek();
ScopeNode currentScope = scopes.Pop();
currentScope.End();
}
}
}

View File

@ -0,0 +1,156 @@
using ImGuiNET;
using System.Numerics;
namespace Nerfed.Runtime;
public static class ProfilerVisualizer
{
private const float barHeight = 20f;
private const float barPadding = 2f;
// Render the flame graph across multiple threads
public static void RenderFlameGraph(Profiler.Frame frame)
{
if (frame == null) return;
if (frame.RootNodes == null) return;
// Calculate the total timeline duration (max end time across all nodes)
double totalDuration = frame.EndTime - frame.StartTime;
double startTime = frame.StartTime;
// Precompute the maximum depth for each thread's call stack
Dictionary<int, int> threadMaxDepths = new Dictionary<int, int>();
foreach (IGrouping<int, Profiler.ScopeNode> threadGroup in frame.RootNodes.GroupBy(node => node.ManagedThreadId))
{
int maxDepth = 0;
foreach (Profiler.ScopeNode rootNode in threadGroup)
{
maxDepth = Math.Max(maxDepth, GetMaxDepth(rootNode, 0));
}
threadMaxDepths[threadGroup.Key] = maxDepth;
}
// Start a child window to support scrolling
ImGui.BeginChild("FlameGraph", new Vector2(0, 64), ImGuiChildFlags.Border | ImGuiChildFlags.ResizeY, ImGuiWindowFlags.HorizontalScrollbar | ImGuiWindowFlags.AlwaysVerticalScrollbar);
ImDrawListPtr drawList = ImGui.GetWindowDrawList();
Vector2 windowPos = ImGui.GetCursorScreenPos();
// Sort nodes by ThreadID, ensuring main thread (Thread ID 1) is on top
IOrderedEnumerable<IGrouping<int, Profiler.ScopeNode>> threadGroups = frame.RootNodes.GroupBy(node => node.ManagedThreadId).OrderBy(g => g.Key);
// Initial Y position for drawing
float baseY = windowPos.Y;
bool alternate = false;
float contentWidth = ImGui.GetContentRegionAvail().X;
// Draw each thread's flame graph row by row
foreach (IGrouping<int, Profiler.ScopeNode> threadGroup in threadGroups)
{
int threadId = threadGroup.Key;
// Compute the base Y position for this thread
float threadBaseY = baseY;
// Calculate the maximum height for this thread's flame graph
float threadHeight = (threadMaxDepths[threadId] + 1) * (barHeight + barPadding);
// Draw the alternating background for each thread row
uint backgroundColor = ImGui.ColorConvertFloat4ToU32(alternate ? new Vector4(0.2f, 0.2f, 0.2f, 1f) : new Vector4(0.1f, 0.1f, 0.1f, 1f));
drawList.AddRectFilled(new Vector2(windowPos.X, threadBaseY), new Vector2(windowPos.X + contentWidth, threadBaseY + threadHeight), backgroundColor);
alternate = !alternate;
// Draw each root node in the group (one per thread)
foreach (Profiler.ScopeNode rootNode in threadGroup)
{
RenderNode(drawList, rootNode, startTime, totalDuration, windowPos.X, threadBaseY, 0, contentWidth, false);
}
// Move to the next thread's row (max depth * height per level)
baseY += (threadMaxDepths[threadId] + 1) * (barHeight + barPadding);
}
// Ensure that ImGui knows the size of the content.
ImGui.Dummy(new Vector2(contentWidth, baseY));
ImGui.EndChild();
}
private static void RenderNode(ImDrawListPtr drawList, Profiler.ScopeNode node, double startTime, double totalDuration, float startX, float baseY, int depth, float contentWidth, bool alternate)
{
if (node == null) return;
double nodeStartTime = node.StartTime - startTime;
double nodeEndTime = node.EndTime - startTime;
double nodeDuration = nodeEndTime - nodeStartTime;
// Calculate the position and width of the bar based on time
float xPos = (float)(startX + (nodeStartTime / totalDuration) * contentWidth);
float width = (float)((nodeDuration / totalDuration) * contentWidth);
// Calculate the Y position based on depth
float yPos = baseY + (depth * (barHeight + barPadding)) + (barPadding * 0.5f);
// Define the rectangle bounds for the node
Vector2 min = new Vector2(xPos, yPos);
Vector2 max = new Vector2(xPos + width, yPos + barHeight);
// Define color.
Vector4 barColor = alternate ? new Vector4(0.4f, 0.6f, 0.9f, 1f) : new Vector4(0.4f, 0.5f, 0.8f, 1f);
Vector4 textColor = new Vector4(1f, 1f, 1f, 1f);
if (depth != 0)
{
// Draw the bar for the node (colored based on thread depth)
drawList.AddRectFilled(min, max, ImGui.ColorConvertFloat4ToU32(barColor));
// Draw the label if it fits inside the bar
string label = $"{node.Label} ({node.ElapsedMilliseconds():0.000} ms)";
if (width > ImGui.CalcTextSize(label).X)
{
drawList.AddText(new Vector2(xPos + barPadding, yPos + barPadding), ImGui.ColorConvertFloat4ToU32(textColor), label);
}
// Add tooltip on hover
if (ImGui.IsMouseHoveringRect(min, max))
{
// Show tooltip when hovering over the node
ImGui.BeginTooltip();
ImGui.Text($"{node.Label}");
ImGui.Text($"{node.ElapsedMilliseconds():0.000} ms");
ImGui.Text($"{node.ManagedThreadId}");
ImGui.EndTooltip();
}
}
else
{
// Aka root node.
string label = $"{node.Label}";
drawList.AddText(new Vector2(startX + barPadding, yPos + barPadding), ImGui.ColorConvertFloat4ToU32(textColor), label);
}
// Draw each child node under this node
foreach (Profiler.ScopeNode child in node.Children)
{
alternate = !alternate;
RenderNode(drawList, child, startTime, totalDuration, startX, baseY, depth + 1, contentWidth, alternate);
}
}
// Recursive function to calculate the maximum depth of the node tree
private static int GetMaxDepth(Profiler.ScopeNode node, int currentDepth)
{
if (node.Children == null || node.Children.Count == 0)
{
return currentDepth;
}
int maxDepth = currentDepth;
foreach (Profiler.ScopeNode child in node.Children)
{
maxDepth = Math.Max(maxDepth, GetMaxDepth(child, currentDepth + 1));
}
return maxDepth;
}
}

View File

@ -15,7 +15,7 @@ namespace Nerfed.Runtime.Systems
{
public class LocalToWorldSystem : MoonTools.ECS.System
{
private readonly bool useParallelFor = false; // When having a low amount of transforms or when in debug mode this might be slower.
private readonly bool useParallelFor = true; // When having a low amount of transforms or when in debug mode this might be slower.
private readonly Filter rootEntitiesFilter;
private readonly Filter transformEntitiesFilter;
private readonly Action<int> updateWorldTransform;
@ -62,15 +62,19 @@ public override void Update(TimeSpan delta)
{
foreach (Entity entity in rootEntitiesFilter.Entities)
{
Profiler.BeginSample("UpdateWorldTransform");
UpdateWorldTransform(entity, Matrix4x4.Identity);
Profiler.EndSample();
}
}
}
private void UpdateWorldTransformByIndex(int entityFilterIndex)
{
Profiler.BeginSample("UpdateWorldTransformByIndex");
Entity entity = rootEntitiesFilter.NthEntity(entityFilterIndex);
UpdateWorldTransform(entity, Matrix4x4.Identity);
Profiler.EndSample();
}
private void UpdateWorldTransform(in Entity entity, Matrix4x4 localToWorldMatrix)