From 567714a52d169a2786254c9d10dc6ae2b9bef4b1 Mon Sep 17 00:00:00 2001 From: max Date: Sun, 20 Oct 2024 23:17:41 +0200 Subject: [PATCH] Profiler threading support, flame graph Changed the profiler into a node based system for better data access, more overhead than the simple struct+depth info but can hold more detail and less post processing of data Profiler now also profiles threads Added some test profile tags The profiler window now also has a FlameGraph --- Nerfed.Editor/Systems/EditorProfilerWindow.cs | 154 ++++++++++++----- Nerfed.Runtime/Profiler.cs | 156 +++++++++++------- Nerfed.Runtime/ProfilerVisualizer.cs | 156 ++++++++++++++++++ Nerfed.Runtime/Systems/LocalToWorldSystem.cs | 6 +- 4 files changed, 371 insertions(+), 101 deletions(-) create mode 100644 Nerfed.Runtime/ProfilerVisualizer.cs diff --git a/Nerfed.Editor/Systems/EditorProfilerWindow.cs b/Nerfed.Editor/Systems/EditorProfilerWindow.cs index 70c20d0..c0a8896 100644 --- a/Nerfed.Editor/Systems/EditorProfilerWindow.cs +++ b/Nerfed.Editor/Systems/EditorProfilerWindow.cs @@ -1,7 +1,6 @@ using ImGuiNET; using MoonTools.ECS; using Nerfed.Runtime; -using System; namespace Nerfed.Editor.Systems { @@ -11,7 +10,9 @@ internal class EditorProfilerWindow : MoonTools.ECS.System const ImGuiTreeNodeFlags treeNodeFlags = ImGuiTreeNodeFlags.SpanAllColumns; const ImGuiTreeNodeFlags treeNodeLeafFlags = ImGuiTreeNodeFlags.SpanAllColumns | ImGuiTreeNodeFlags.Leaf | ImGuiTreeNodeFlags.NoTreePushOnOpen; - int frame = 0; + private int selectedFrame = 0; + private int previousSelectedFrame = -1; + private IOrderedEnumerable> orderedCombinedData = null; public EditorProfilerWindow(World world) : base(world) { @@ -19,7 +20,7 @@ public EditorProfilerWindow(World world) : base(world) public override void Update(TimeSpan delta) { - if (Profiler.frames.Count <= 0) + if (Profiler.Frames.Count <= 0) { return; } @@ -27,71 +28,117 @@ public override void Update(TimeSpan delta) ImGui.Begin("Profiler"); ImGui.BeginChild("Toolbar", new System.Numerics.Vector2(0, 0), ImGuiChildFlags.AutoResizeY); - if (ImGui.RadioButton("Recording", Profiler.recording)) + if (ImGui.RadioButton("Recording", Profiler.IsRecording)) { - Profiler.recording = !Profiler.recording; + Profiler.SetActive(!Profiler.IsRecording); } ImGui.SameLine(); - if (Profiler.recording) + if (Profiler.IsRecording) { - frame = Profiler.frames.Count - 1; + // Select last frame when recording to see latest frame data. + selectedFrame = Profiler.Frames.Count - 1; } - if (ImGui.SliderInt(string.Empty, ref frame, 0, Profiler.frames.Count - 1)) + if (ImGui.SliderInt(string.Empty, ref selectedFrame, 0, Profiler.Frames.Count - 1)) { - Profiler.recording = false; + // Stop recording when browsing frames. + Profiler.SetActive(false); } - Profiler.FrameData frameData = Profiler.frames.ElementAt(frame); - double ms = frameData.ElapsedMilliseconds(); + Profiler.Frame frame = Profiler.Frames.ElementAt(selectedFrame); + double ms = frame.ElapsedMilliseconds(); double s = 1000; - ImGui.Text($"Frame: {frameData.frame} ({ms:0.000} ms | {(s / ms):0} fps)"); + ImGui.Text($"Frame: {frame.FrameCount} ({ms:0.000} ms | {(s / ms):0} fps)"); ImGui.EndChild(); + if (!Profiler.IsRecording) { + if (previousSelectedFrame != selectedFrame) + { + previousSelectedFrame = selectedFrame; + orderedCombinedData = CalculateCombinedData(frame); + } + + DrawFlameGraph(frame); + + DrawHierachy(frame); + + ImGui.SameLine(); + + DrawCombined(orderedCombinedData); + } + + ImGui.End(); + } + + private static void DrawHierachy(Profiler.Frame frame) + { + if(frame == null) + { + return; + } + ImGui.BeginChild("Hierachy", new System.Numerics.Vector2(150, 0), ImGuiChildFlags.ResizeX); - if (ImGui.BeginTable("ProfilerData", 2, tableFlags, new System.Numerics.Vector2(0, 0))) + if (ImGui.BeginTable("ProfilerData", 3, tableFlags, new System.Numerics.Vector2(0, 0))) { ImGui.TableSetupColumn("name", ImGuiTableColumnFlags.WidthStretch, 0.8f, 0); + ImGui.TableSetupColumn("thread", ImGuiTableColumnFlags.WidthStretch, 0.2f, 1); ImGui.TableSetupColumn("ms", ImGuiTableColumnFlags.WidthStretch, 0.2f, 1); ImGui.TableSetupScrollFreeze(0, 1); // Make row always visible ImGui.TableHeadersRow(); - foreach (Profiler.ProfileRecord record in frameData.records) + foreach (Profiler.ScopeNode node in frame.RootNodes) { - ImGui.TableNextRow(); - ImGui.TableNextColumn(); - string indentation = new string(' ', record.depth); // Indentation based on depth - ImGui.Text($"{indentation}{record.label}"); - ImGui.TableNextColumn(); - ImGui.Text($"{record.ElapsedMilliseconds():0.000}"); + DrawHierachyNode(node); } ImGui.EndTable(); } ImGui.EndChild(); + } - ImGui.SameLine(); + private static void DrawHierachyNode(Profiler.ScopeNode node) + { + ImGui.TableNextRow(); + ImGui.TableNextColumn(); + + bool isOpen = false; + bool isLeaf = node.Children.Count == 0; + + if (isLeaf) { + ImGui.TreeNodeEx(node.Label, treeNodeLeafFlags); + } + else + { + isOpen = ImGui.TreeNodeEx(node.Label, treeNodeFlags); + } + + ImGui.TableNextColumn(); + ImGui.Text($"{node.ManagedThreadId}"); + ImGui.TableNextColumn(); + ImGui.Text($"{node.ElapsedMilliseconds():0.000}"); + + if (isOpen) + { + for (int i = 0; i < node.Children.Count; i++) + { + DrawHierachyNode(node.Children[i]); + } + ImGui.TreePop(); + } + } + + private static void DrawCombined(in IOrderedEnumerable> orderedCombinedData) + { + if(orderedCombinedData == null) + { + return; + } ImGui.BeginChild("Combined", new System.Numerics.Vector2(0, 0)); - // Gather combined data. - Dictionary combinedRecordData = new Dictionary(128); - foreach (Profiler.ProfileRecord record in frameData.records) - { - if (combinedRecordData.TryGetValue(record.label, out (double ms, uint calls) combined)) - { - combinedRecordData[record.label] = (combined.ms + record.ElapsedMilliseconds(), combined.calls + 1); - } - else - { - combinedRecordData.Add(record.label, (record.ElapsedMilliseconds(), 1)); - } - } - IOrderedEnumerable> orderedCombinedData = combinedRecordData.OrderByDescending(x => x.Value.ms); - if (ImGui.BeginTable("ProfilerCombinedData", 3, tableFlags, new System.Numerics.Vector2(0, 0))) { ImGui.TableSetupColumn("name", ImGuiTableColumnFlags.WidthStretch, 0.6f, 0); @@ -115,12 +162,43 @@ public override void Update(TimeSpan delta) } ImGui.EndChild(); - ImGui.End(); } - private void Draw() + private static IOrderedEnumerable> CalculateCombinedData(Profiler.Frame frame) { + Dictionary combinedRecordData = new Dictionary(128); + foreach (Profiler.ScopeNode node in frame.RootNodes) + { + CalculateCombinedData(node, in combinedRecordData); + } + return combinedRecordData.OrderByDescending(x => x.Value.ms); + } + private static void CalculateCombinedData(Profiler.ScopeNode node, in Dictionary combinedRecordData) + { + if (combinedRecordData.TryGetValue(node.Label, out (double ms, uint calls) combined)) + { + combinedRecordData[node.Label] = (combined.ms + node.ElapsedMilliseconds(), combined.calls + 1); + } + else + { + combinedRecordData.Add(node.Label, (node.ElapsedMilliseconds(), 1)); + } + + for (int i = 0; i < node.Children.Count; i++) + { + CalculateCombinedData(node.Children[i], combinedRecordData); + } + } + + private static void DrawFlameGraph(Profiler.Frame frame) + { + if (frame == null) + { + return; + } + + ProfilerVisualizer.RenderFlameGraph(frame); } } } diff --git a/Nerfed.Runtime/Profiler.cs b/Nerfed.Runtime/Profiler.cs index 07fad5a..d968b3f 100644 --- a/Nerfed.Runtime/Profiler.cs +++ b/Nerfed.Runtime/Profiler.cs @@ -1,134 +1,166 @@ -using System.Diagnostics; +using System.Collections.Concurrent; +using System.Diagnostics; namespace Nerfed.Runtime; public struct ProfilerScope : IDisposable { - public ProfilerScope(string label) { + public ProfilerScope(string label) + { Profiler.BeginSample(label); } - public void Dispose() { + public void Dispose() + { Profiler.EndSample(); } } public static class Profiler { - public struct ProfileRecord + public class Frame(uint frameCount) { - public string label; - public long startTime; - public long endTime; - public int depth; + public uint FrameCount { get; } = frameCount; + public long StartTime { get; } = Stopwatch.GetTimestamp(); + public long EndTime { get; private set; } - public readonly double ElapsedMilliseconds() + // Use a concurrent list to collect all thread root nodes per frame. + public ConcurrentBag RootNodes = new ConcurrentBag(); + + internal void End() { - long elapsedTicks = endTime - startTime; - return ((double)(elapsedTicks * 1000)) / Stopwatch.Frequency; - } - } - - public class FrameData - { - public uint frame; - public readonly List records = new List(); - public long startTime; - public long endTime; - - public FrameData(uint frame, long startTime) - { - this.frame = frame; - this.startTime = startTime; + EndTime = Stopwatch.GetTimestamp(); } public double ElapsedMilliseconds() { - long elapsedTicks = endTime - startTime; + long elapsedTicks = EndTime - StartTime; return ((double)(elapsedTicks * 1000)) / Stopwatch.Frequency; } } + public class ScopeNode(string label) + { + public string Label { get; } = label; + public long StartTime { get; private set; } = Stopwatch.GetTimestamp(); // Start time in ticks + public long EndTime { get; private set; } + public int ManagedThreadId { get; } = Environment.CurrentManagedThreadId; + public List Children { get; } = new List(); + + internal void End() + { + EndTime = Stopwatch.GetTimestamp(); // End time in ticks + } + + public double ElapsedMilliseconds() + { + return ((double)(EndTime - StartTime)) * 1000 / Stopwatch.Frequency; // Convert ticks to ms + } + + // Add a child node (used for nested scopes) + internal ScopeNode AddChild(string label) + { + ScopeNode child = new ScopeNode(label); + Children.Add(child); + return child; + } + } + private const int maxFrames = 128; - public static readonly BoundedQueue frames = new(maxFrames); - public static bool recording = true; + public static bool IsRecording { get; private set; } = true; - private static readonly Stopwatch stopwatch = new Stopwatch(); - private static FrameData currentFrame = null; - private static uint currentFrameIndex = 0; - private static int currentDepth = 0; + // Store only the last x amount of frames in memory. + public static readonly BoundedQueue Frames = new(maxFrames); - static Profiler() + // Use ThreadLocal to store a stack of ScopeNodes per thread and enable tracking of thread-local values. + private static readonly ThreadLocal> threadLocalScopes = new ThreadLocal>(() => new Stack(), true); + + private static Frame currentFrame = null; + private static uint frameCount = 0; + + public static void SetActive(bool isRecording) { - stopwatch.Start(); + IsRecording = isRecording; } [Conditional("PROFILING")] public static void BeginFrame() { - if (!recording) + if (!IsRecording) { return; } - currentFrame = new FrameData(currentFrameIndex, stopwatch.ElapsedTicks); - currentDepth = 0; - currentFrameIndex++; + currentFrame = new Frame(frameCount); } [Conditional("PROFILING")] public static void EndFrame() { - if (!recording) + if (!IsRecording) { return; } - currentFrame.endTime = stopwatch.ElapsedTicks; - frames.Enqueue(currentFrame); + foreach (Stack scopes in threadLocalScopes.Values) + { + if (scopes.Count > 0) + { + // Pop the left over root nodes. + ScopeNode currentScope = scopes.Pop(); + currentScope.End(); + } + + // Clean up the thread-local stack to ensure it's empty for the next frame. + scopes.Clear(); + } + + currentFrame.End(); + Frames.Enqueue(currentFrame); + frameCount++; } [Conditional("PROFILING")] public static void BeginSample(string label) { - if (!recording) + if (!IsRecording) { return; } - ProfileRecord record = new ProfileRecord + Stack scopes = threadLocalScopes.Value; // Get the stack for the current thread + + if (scopes.Count == 0) { - label = label, - startTime = stopwatch.ElapsedTicks, - depth = currentDepth, - }; - currentFrame.records.Add(record); - //Log.Info($"{record.label} {record.depth} | {record.startTime}"); - currentDepth++; // Increase depth for nested scopes + // First scope for this thread (new root for this thread) + ScopeNode rootScopeNode = new ScopeNode($"Thread-{Environment.CurrentManagedThreadId}"); + scopes.Push(rootScopeNode); + currentFrame.RootNodes.Add(rootScopeNode); // Add root node to the frame list + } + + // Create a new child under the current top of the stack + ScopeNode newScope = scopes.Peek().AddChild(label); + + scopes.Push(newScope); // Push new scope to the thread's stack } [Conditional("PROFILING")] public static void EndSample() { - if (!recording) + if (!IsRecording) { return; } - currentDepth--; // Decrease depth when exiting a scope + Stack scopes = threadLocalScopes.Value; - // Find the last uncompleted record at the current depth and set the end time - for (int i = currentFrame.records.Count - 1; i >= 0; i--) + if (scopes.Count > 0) { - if (currentFrame.records[i].endTime == 0) - { - ProfileRecord record = currentFrame.records[i]; - record.endTime = stopwatch.ElapsedTicks; - currentFrame.records[i] = record; // Assign back to the list - //Log.Info($"{record.label} | {record.depth} | {record.endTime}"); - break; - } + // Only pop if this is not the root node. + //ScopeNode currentScope = scopes.Count > 1 ? scopes.Pop() : scopes.Peek(); + ScopeNode currentScope = scopes.Pop(); + currentScope.End(); } } } \ No newline at end of file diff --git a/Nerfed.Runtime/ProfilerVisualizer.cs b/Nerfed.Runtime/ProfilerVisualizer.cs new file mode 100644 index 0000000..116be63 --- /dev/null +++ b/Nerfed.Runtime/ProfilerVisualizer.cs @@ -0,0 +1,156 @@ +using ImGuiNET; +using System.Numerics; + +namespace Nerfed.Runtime; + +public static class ProfilerVisualizer +{ + private const float barHeight = 20f; + private const float barPadding = 2f; + + // Render the flame graph across multiple threads + public static void RenderFlameGraph(Profiler.Frame frame) + { + if (frame == null) return; + if (frame.RootNodes == null) return; + + // Calculate the total timeline duration (max end time across all nodes) + double totalDuration = frame.EndTime - frame.StartTime; + double startTime = frame.StartTime; + + // Precompute the maximum depth for each thread's call stack + Dictionary threadMaxDepths = new Dictionary(); + foreach (IGrouping threadGroup in frame.RootNodes.GroupBy(node => node.ManagedThreadId)) + { + int maxDepth = 0; + foreach (Profiler.ScopeNode rootNode in threadGroup) + { + maxDepth = Math.Max(maxDepth, GetMaxDepth(rootNode, 0)); + } + threadMaxDepths[threadGroup.Key] = maxDepth; + } + + // Start a child window to support scrolling + ImGui.BeginChild("FlameGraph", new Vector2(0, 64), ImGuiChildFlags.Border | ImGuiChildFlags.ResizeY, ImGuiWindowFlags.HorizontalScrollbar | ImGuiWindowFlags.AlwaysVerticalScrollbar); + + ImDrawListPtr drawList = ImGui.GetWindowDrawList(); + Vector2 windowPos = ImGui.GetCursorScreenPos(); + + // Sort nodes by ThreadID, ensuring main thread (Thread ID 1) is on top + IOrderedEnumerable> threadGroups = frame.RootNodes.GroupBy(node => node.ManagedThreadId).OrderBy(g => g.Key); + + // Initial Y position for drawing + float baseY = windowPos.Y; + bool alternate = false; + float contentWidth = ImGui.GetContentRegionAvail().X; + + // Draw each thread's flame graph row by row + foreach (IGrouping threadGroup in threadGroups) + { + int threadId = threadGroup.Key; + + // Compute the base Y position for this thread + float threadBaseY = baseY; + + // Calculate the maximum height for this thread's flame graph + float threadHeight = (threadMaxDepths[threadId] + 1) * (barHeight + barPadding); + + // Draw the alternating background for each thread row + uint backgroundColor = ImGui.ColorConvertFloat4ToU32(alternate ? new Vector4(0.2f, 0.2f, 0.2f, 1f) : new Vector4(0.1f, 0.1f, 0.1f, 1f)); + drawList.AddRectFilled(new Vector2(windowPos.X, threadBaseY), new Vector2(windowPos.X + contentWidth, threadBaseY + threadHeight), backgroundColor); + + alternate = !alternate; + + // Draw each root node in the group (one per thread) + foreach (Profiler.ScopeNode rootNode in threadGroup) + { + RenderNode(drawList, rootNode, startTime, totalDuration, windowPos.X, threadBaseY, 0, contentWidth, false); + } + + // Move to the next thread's row (max depth * height per level) + baseY += (threadMaxDepths[threadId] + 1) * (barHeight + barPadding); + } + + // Ensure that ImGui knows the size of the content. + ImGui.Dummy(new Vector2(contentWidth, baseY)); + + ImGui.EndChild(); + } + + private static void RenderNode(ImDrawListPtr drawList, Profiler.ScopeNode node, double startTime, double totalDuration, float startX, float baseY, int depth, float contentWidth, bool alternate) + { + if (node == null) return; + + double nodeStartTime = node.StartTime - startTime; + double nodeEndTime = node.EndTime - startTime; + double nodeDuration = nodeEndTime - nodeStartTime; + + // Calculate the position and width of the bar based on time + float xPos = (float)(startX + (nodeStartTime / totalDuration) * contentWidth); + float width = (float)((nodeDuration / totalDuration) * contentWidth); + + // Calculate the Y position based on depth + float yPos = baseY + (depth * (barHeight + barPadding)) + (barPadding * 0.5f); + + // Define the rectangle bounds for the node + Vector2 min = new Vector2(xPos, yPos); + Vector2 max = new Vector2(xPos + width, yPos + barHeight); + + // Define color. + Vector4 barColor = alternate ? new Vector4(0.4f, 0.6f, 0.9f, 1f) : new Vector4(0.4f, 0.5f, 0.8f, 1f); + Vector4 textColor = new Vector4(1f, 1f, 1f, 1f); + + if (depth != 0) + { + // Draw the bar for the node (colored based on thread depth) + drawList.AddRectFilled(min, max, ImGui.ColorConvertFloat4ToU32(barColor)); + + // Draw the label if it fits inside the bar + string label = $"{node.Label} ({node.ElapsedMilliseconds():0.000} ms)"; + if (width > ImGui.CalcTextSize(label).X) + { + drawList.AddText(new Vector2(xPos + barPadding, yPos + barPadding), ImGui.ColorConvertFloat4ToU32(textColor), label); + } + + // Add tooltip on hover + if (ImGui.IsMouseHoveringRect(min, max)) + { + // Show tooltip when hovering over the node + ImGui.BeginTooltip(); + ImGui.Text($"{node.Label}"); + ImGui.Text($"{node.ElapsedMilliseconds():0.000} ms"); + ImGui.Text($"{node.ManagedThreadId}"); + ImGui.EndTooltip(); + } + } + else + { + // Aka root node. + string label = $"{node.Label}"; + drawList.AddText(new Vector2(startX + barPadding, yPos + barPadding), ImGui.ColorConvertFloat4ToU32(textColor), label); + } + + // Draw each child node under this node + foreach (Profiler.ScopeNode child in node.Children) + { + alternate = !alternate; + RenderNode(drawList, child, startTime, totalDuration, startX, baseY, depth + 1, contentWidth, alternate); + } + } + + // Recursive function to calculate the maximum depth of the node tree + private static int GetMaxDepth(Profiler.ScopeNode node, int currentDepth) + { + if (node.Children == null || node.Children.Count == 0) + { + return currentDepth; + } + + int maxDepth = currentDepth; + foreach (Profiler.ScopeNode child in node.Children) + { + maxDepth = Math.Max(maxDepth, GetMaxDepth(child, currentDepth + 1)); + } + return maxDepth; + } +} \ No newline at end of file diff --git a/Nerfed.Runtime/Systems/LocalToWorldSystem.cs b/Nerfed.Runtime/Systems/LocalToWorldSystem.cs index cf460b2..56947a4 100644 --- a/Nerfed.Runtime/Systems/LocalToWorldSystem.cs +++ b/Nerfed.Runtime/Systems/LocalToWorldSystem.cs @@ -15,7 +15,7 @@ namespace Nerfed.Runtime.Systems { public class LocalToWorldSystem : MoonTools.ECS.System { - private readonly bool useParallelFor = false; // When having a low amount of transforms or when in debug mode this might be slower. + private readonly bool useParallelFor = true; // When having a low amount of transforms or when in debug mode this might be slower. private readonly Filter rootEntitiesFilter; private readonly Filter transformEntitiesFilter; private readonly Action updateWorldTransform; @@ -62,15 +62,19 @@ public override void Update(TimeSpan delta) { foreach (Entity entity in rootEntitiesFilter.Entities) { + Profiler.BeginSample("UpdateWorldTransform"); UpdateWorldTransform(entity, Matrix4x4.Identity); + Profiler.EndSample(); } } } private void UpdateWorldTransformByIndex(int entityFilterIndex) { + Profiler.BeginSample("UpdateWorldTransformByIndex"); Entity entity = rootEntitiesFilter.NthEntity(entityFilterIndex); UpdateWorldTransform(entity, Matrix4x4.Identity); + Profiler.EndSample(); } private void UpdateWorldTransform(in Entity entity, Matrix4x4 localToWorldMatrix)