Skip to content

Commit 7e515fe

Browse files
committed
Feature: Added completion states on command buffers, so callers know when they finished executing on the GPU
- Also updated the GPU profiler so individual views can be profiled independently (profiling entire frames was causing issues with queries across multiple command buffers)
1 parent b467466 commit 7e515fe

19 files changed

+373
-129
lines changed

Source/Foundation/bsfCore/Profiling/BsProfilerGPU.cpp

Lines changed: 115 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ namespace bs
1919
{
2020
while (!mUnresolvedFrames.empty())
2121
{
22-
ProfiledSample& frameSample = mUnresolvedFrames.front();
22+
ProfiledFrame& frame = mUnresolvedFrames.front();
2323

24-
freeSample(frameSample);
24+
freeFrame(mActiveFrame);
2525
mUnresolvedFrames.pop();
2626
}
2727

@@ -36,11 +36,9 @@ namespace bs
3636
return;
3737
}
3838

39-
mFrameSample = ProfiledSample();
40-
mFrameSample.name = "Frame";
41-
beginSampleInternal(mFrameSample, true);
42-
4339
mIsFrameActive = true;
40+
mActiveFrame.uncategorizedSamples.clear();
41+
mActiveFrame.viewSamples.clear();
4442
}
4543

4644
void ProfilerGPU::endFrame(bool discard)
@@ -51,19 +49,61 @@ namespace bs
5149
return;
5250
}
5351

54-
if (!mIsFrameActive)
52+
if (mIsViewActive)
53+
{
54+
BS_LOG(Error, Profiler, "Attempting to end a frame while a view is active.");
5555
return;
56+
}
5657

57-
endSampleInternal(mFrameSample);
58+
if (!mIsFrameActive)
59+
return;
5860

5961
if(!discard)
60-
mUnresolvedFrames.push(mFrameSample);
62+
mUnresolvedFrames.push(mActiveFrame);
6163
else
62-
freeSample(mFrameSample);
64+
freeFrame(mActiveFrame);
6365

6466
mIsFrameActive = false;
6567
}
6668

69+
void ProfilerGPU::beginView(UINT64 id, ProfilerString title)
70+
{
71+
if (!mIsFrameActive)
72+
{
73+
BS_LOG(Error, Profiler, "Cannot begin a view because no frame is active.");
74+
return;
75+
}
76+
77+
if (mIsViewActive)
78+
{
79+
BS_LOG(Error, Profiler, "Cannot begin a view because another view is active.");
80+
return;
81+
}
82+
83+
auto sample = mViewSamplePool.construct<ProfiledViewSample>();
84+
sample->viewId = id;
85+
86+
mActiveFrame.viewSamples.push_back(sample);
87+
88+
beginSampleInternal(*sample, true);
89+
mIsViewActive = true;
90+
}
91+
92+
void ProfilerGPU::endView()
93+
{
94+
if (!mActiveSamples.empty())
95+
{
96+
BS_LOG(Error, Profiler, "Attempting to end a view while a sample is active.");
97+
return;
98+
}
99+
100+
if (!mIsViewActive)
101+
return;
102+
103+
endSampleInternal(*mActiveFrame.viewSamples.back());
104+
mIsViewActive = false;
105+
}
106+
67107
void ProfilerGPU::beginSample(ProfilerString name)
68108
{
69109
if (!mIsFrameActive)
@@ -76,8 +116,13 @@ namespace bs
76116
sample->name = std::move(name);
77117
beginSampleInternal(*sample, false);
78118

79-
if(mActiveSamples.empty())
80-
mFrameSample.children.push_back(sample);
119+
if (mActiveSamples.empty())
120+
{
121+
if (mIsViewActive)
122+
mActiveFrame.viewSamples.back()->children.push_back(sample);
123+
else
124+
mActiveFrame.uncategorizedSamples.push_back(sample);
125+
}
81126
else
82127
{
83128
ProfiledSample* parent = mActiveSamples.top();
@@ -133,29 +178,53 @@ namespace bs
133178
{
134179
while (!mUnresolvedFrames.empty())
135180
{
136-
ProfiledSample& frameSample = mUnresolvedFrames.front();
181+
ProfiledFrame& frame = mUnresolvedFrames.front();
137182

138-
// Frame sample timer query is the last query we issued
139-
// so if it is complete, we may assume all queries are complete.
140-
if (frameSample.activeTimeQuery->isReady())
183+
// Make sure all the top-level queries have finished. If they have that implies
184+
// all their children have finished as well
185+
bool isReady = true;
186+
for(auto& entry : frame.viewSamples)
141187
{
142-
GPUProfilerReport report;
143-
resolveSample(frameSample, report.frameSample);
144-
145-
freeSample(frameSample);
146-
mUnresolvedFrames.pop();
188+
if (!entry->activeTimeQuery->isReady())
189+
{
190+
isReady = false;
191+
break;
192+
}
193+
}
147194

195+
for(auto& entry : frame.uncategorizedSamples)
196+
{
197+
if (!entry->activeTimeQuery->isReady())
148198
{
149-
Lock lock(mMutex);
150-
mReadyReports[(mReportHeadPos + mReportCount) % MAX_QUEUE_ELEMENTS] = report;
151-
if (mReportCount == MAX_QUEUE_ELEMENTS)
152-
mReportHeadPos = (mReportHeadPos + 1) % MAX_QUEUE_ELEMENTS;
153-
else
154-
mReportCount++;
199+
isReady = false;
200+
break;
155201
}
156202
}
157-
else
203+
204+
if (!isReady)
158205
break;
206+
207+
GPUProfilerReport report;
208+
report.viewSamples.resize(frame.viewSamples.size());
209+
report.uncategorizedSamples.resize(frame.uncategorizedSamples.size());
210+
211+
for (size_t i = 0; i < frame.viewSamples.size(); i++)
212+
resolveSample(*frame.viewSamples[i], report.viewSamples[i]);
213+
214+
for (size_t i = 0; i < frame.uncategorizedSamples.size(); i++)
215+
resolveSample(*frame.uncategorizedSamples[i], report.uncategorizedSamples[i]);
216+
217+
freeFrame(mActiveFrame);
218+
mUnresolvedFrames.pop();
219+
220+
{
221+
Lock lock(mMutex);
222+
mReadyReports[(mReportHeadPos + mReportCount) % MAX_QUEUE_ELEMENTS] = report;
223+
if (mReportCount == MAX_QUEUE_ELEMENTS)
224+
mReportHeadPos = (mReportHeadPos + 1) % MAX_QUEUE_ELEMENTS;
225+
else
226+
mReportCount++;
227+
}
159228
}
160229
}
161230

@@ -175,6 +244,24 @@ namespace bs
175244
mFreeOcclusionQueries.push(sample.activeOcclusionQuery);
176245
}
177246

247+
void ProfilerGPU::freeFrame(ProfiledFrame& frame)
248+
{
249+
for (size_t i = 0; i < frame.viewSamples.size(); i++)
250+
{
251+
freeSample(*frame.viewSamples[i]);
252+
mViewSamplePool.destruct(frame.viewSamples[i]);
253+
}
254+
255+
for (size_t i = 0; i < frame.uncategorizedSamples.size(); i++)
256+
{
257+
freeSample(*frame.uncategorizedSamples[i]);
258+
mSamplePool.destruct(frame.uncategorizedSamples[i]);
259+
}
260+
261+
frame.viewSamples.clear();
262+
frame.uncategorizedSamples.clear();
263+
}
264+
178265
void ProfilerGPU::resolveSample(const ProfiledSample& sample, GPUProfileSample& reportSample)
179266
{
180267
reportSample.name.assign(sample.name.data(), sample.name.size());

Source/Foundation/bsfCore/Profiling/BsProfilerGPU.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,17 @@ namespace bs
4343
Vector<GPUProfileSample> children;
4444
};
4545

46+
/** Contains various profiler statistics for a particular view. */
47+
struct GPUProfileViewSample : GPUProfileSample
48+
{
49+
UINT64 viewId;
50+
};
51+
4652
/** Profiler report containing information about GPU sampling data from a single frame. */
4753
struct GPUProfilerReport
4854
{
49-
GPUProfileSample frameSample; /**< Sample containing data for entire frame. */
55+
Vector<GPUProfileViewSample> viewSamples; /**< Profiler samples belonging to a particular view. */
56+
Vector<GPUProfileSample> uncategorizedSamples; /**< Profiler samples not grouped under a particular view. */
5057
};
5158

5259
/**
@@ -68,6 +75,17 @@ namespace bs
6875
Vector<ProfiledSample*> children;
6976
};
7077

78+
struct ProfiledViewSample : ProfiledSample
79+
{
80+
UINT64 viewId;
81+
};
82+
83+
struct ProfiledFrame
84+
{
85+
Vector<ProfiledViewSample*> viewSamples;
86+
Vector<ProfiledSample*> uncategorizedSamples;
87+
};
88+
7189
public:
7290
ProfilerGPU();
7391
~ProfilerGPU();
@@ -87,6 +105,19 @@ namespace bs
87105
*/
88106
void endFrame(bool discard = false);
89107

108+
/**
109+
* Signals that all following sample calls are used for rendering a particular view represented with the provided
110+
* id. A top-level timing and occlusion query is issued for the entire view and all following samples will
111+
* be grouped under the view in the output report. Must be followed by endView() when done sampling.
112+
*
113+
* @param[in] id Identifier that can be used to uniquely identify the view.
114+
* @param[in] title Title describing the view.
115+
*/
116+
void beginView(UINT64 id, ProfilerString title);
117+
118+
/** Signals the end of rendering for a particular view. Must match the corresponding beginView() call. */
119+
void endView();
120+
90121
/**
91122
* Begins sample measurement. Must be followed by endSample().
92123
*
@@ -155,21 +186,26 @@ namespace bs
155186
/** Frees the memory used by all the child samples. */
156187
void freeSample(ProfiledSample& sample);
157188

189+
/** Frees the memory used by all the samples in the frame. */
190+
void freeFrame(ProfiledFrame& frame);
191+
158192
/** Resolves an active sample and converts it to report sample. */
159193
void resolveSample(const ProfiledSample& sample, GPUProfileSample& reportSample);
160194

161195
private:
162-
ProfiledSample mFrameSample;
163196
bool mIsFrameActive = false;
197+
bool mIsViewActive = false;
164198
Stack<ProfiledSample*> mActiveSamples;
199+
ProfiledFrame mActiveFrame;
165200

166-
Queue<ProfiledSample> mUnresolvedFrames;
201+
Queue<ProfiledFrame> mUnresolvedFrames;
167202
GPUProfilerReport* mReadyReports = nullptr;
168203

169204
static const UINT32 MAX_QUEUE_ELEMENTS;
170205
UINT32 mReportHeadPos = 0;
171206
UINT32 mReportCount = 0;
172207

208+
PoolAlloc<sizeof(ProfiledViewSample), 16> mViewSamplePool;
173209
PoolAlloc<sizeof(ProfiledSample), 256> mSamplePool;
174210

175211
mutable Stack<SPtr<ct::TimerQuery>> mFreeTimerQueries;

Source/Foundation/bsfCore/RenderAPI/BsCommandBuffer.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,25 @@ namespace bs { namespace ct
3636
UINT32 mMask = 0;
3737
};
3838

39+
/** Possible states that a CommandBuffer can be in. */
40+
enum class CommandBufferState
41+
{
42+
/** Command buffer doesn't have any commands recorded, nor has it been queued for execution. */
43+
Empty,
44+
45+
/** Command buffer has one or multiple commands recorded, but they haven't been queued for execution. */
46+
Recording,
47+
48+
/**
49+
* Command buffer has been queued for execution, but still hasn't finished executing. Buffer that is
50+
* executing cannot be modified or re-submitted for execution until done executing.
51+
*/
52+
Executing,
53+
54+
/** Command buffer has been queued for execution and has finished executing. */
55+
Done
56+
};
57+
3958
/**
4059
* Contains a list of render API commands that can be queued for execution on the GPU. User is allowed to populate the
4160
* command buffer from any thread, ensuring render API command generation can be multi-threaded. Command buffers
@@ -79,6 +98,15 @@ namespace bs { namespace ct
7998
/** Returns the device index this buffer will execute on. */
8099
UINT32 getDeviceIdx() const { return mDeviceIdx; }
81100

101+
/** Returns the current state of the command buffer. */
102+
virtual CommandBufferState getState() const = 0;
103+
104+
/**
105+
* Resets the command buffer back into initial state. Must only be used if the command buffer is
106+
* not in the executing state.
107+
*/
108+
virtual void reset() = 0;
109+
82110
protected:
83111
CommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
84112

Source/Foundation/bsfEngine/GUI/BsProfilerOverlay.cpp

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,11 @@ namespace bs
538538

539539
if (ProfilerGPU::instance().getNumAvailableReports() > 0)
540540
{
541-
updateGPUSampleContents(ProfilerGPU::instance().getNextReport());
541+
GPUProfilerReport report = ProfilerGPU::instance().getNextReport();
542+
543+
// TODO - Currently displaying just the first view. I need to add a way to toggle between views
544+
if(!report.viewSamples.empty())
545+
updateGPUSampleContents(report.viewSamples[0]);
542546
}
543547
}
544548

@@ -705,26 +709,26 @@ namespace bs
705709
}
706710
}
707711

708-
void ProfilerOverlay::updateGPUSampleContents(const GPUProfilerReport& gpuReport)
712+
void ProfilerOverlay::updateGPUSampleContents(const GPUProfileSample& frameSample)
709713
{
710714
mGPUFrameNumStr.setParameter(0, toString((UINT64)gTime().getFrameIdx()));
711-
mGPUTimeStr.setParameter(0, toString(gpuReport.frameSample.timeMs));
712-
mGPUDrawCallsStr.setParameter(0, toString(gpuReport.frameSample.numDrawCalls));
713-
mGPURenTargetChangesStr.setParameter(0, toString(gpuReport.frameSample.numRenderTargetChanges));
714-
mGPUPresentsStr.setParameter(0, toString(gpuReport.frameSample.numPresents));
715-
mGPUClearsStr.setParameter(0, toString(gpuReport.frameSample.numClears));
716-
mGPUVerticesStr.setParameter(0, toString(gpuReport.frameSample.numVertices));
717-
mGPUPrimitivesStr.setParameter(0, toString(gpuReport.frameSample.numPrimitives));
718-
mGPUSamplesStr.setParameter(0, toString(gpuReport.frameSample.numDrawnSamples));
719-
mGPUPipelineStateChangesStr.setParameter(0, toString(gpuReport.frameSample.numPipelineStateChanges));
720-
721-
mGPUObjectsCreatedStr.setParameter(0, toString(gpuReport.frameSample.numObjectsCreated));
722-
mGPUObjectsDestroyedStr.setParameter(0, toString(gpuReport.frameSample.numObjectsDestroyed));
723-
mGPUResourceWritesStr.setParameter(0, toString(gpuReport.frameSample.numResourceWrites));
724-
mGPUResourceReadsStr.setParameter(0, toString(gpuReport.frameSample.numResourceReads));
725-
mGPUParamBindsStr.setParameter(0, toString(gpuReport.frameSample.numGpuParamBinds));
726-
mGPUVertexBufferBindsStr.setParameter(0, toString(gpuReport.frameSample.numVertexBufferBinds));
727-
mGPUIndexBufferBindsStr.setParameter(0, toString(gpuReport.frameSample.numIndexBufferBinds));
715+
mGPUTimeStr.setParameter(0, toString(frameSample.timeMs));
716+
mGPUDrawCallsStr.setParameter(0, toString(frameSample.numDrawCalls));
717+
mGPURenTargetChangesStr.setParameter(0, toString(frameSample.numRenderTargetChanges));
718+
mGPUPresentsStr.setParameter(0, toString(frameSample.numPresents));
719+
mGPUClearsStr.setParameter(0, toString(frameSample.numClears));
720+
mGPUVerticesStr.setParameter(0, toString(frameSample.numVertices));
721+
mGPUPrimitivesStr.setParameter(0, toString(frameSample.numPrimitives));
722+
mGPUSamplesStr.setParameter(0, toString(frameSample.numDrawnSamples));
723+
mGPUPipelineStateChangesStr.setParameter(0, toString(frameSample.numPipelineStateChanges));
724+
725+
mGPUObjectsCreatedStr.setParameter(0, toString(frameSample.numObjectsCreated));
726+
mGPUObjectsDestroyedStr.setParameter(0, toString(frameSample.numObjectsDestroyed));
727+
mGPUResourceWritesStr.setParameter(0, toString(frameSample.numResourceWrites));
728+
mGPUResourceReadsStr.setParameter(0, toString(frameSample.numResourceReads));
729+
mGPUParamBindsStr.setParameter(0, toString(frameSample.numGpuParamBinds));
730+
mGPUVertexBufferBindsStr.setParameter(0, toString(frameSample.numVertexBufferBinds));
731+
mGPUIndexBufferBindsStr.setParameter(0, toString(frameSample.numIndexBufferBinds));
728732

729733
mGPUFrameNumLbl->setContent(mGPUFrameNumStr);
730734
mGPUTimeLbl->setContent(mGPUTimeStr);
@@ -766,7 +770,7 @@ namespace bs
766770
UINT32 currentCount = 0;
767771

768772
Stack<Todo> todo;
769-
todo.push(Todo(gpuReport.frameSample, 0));
773+
todo.push(Todo(frameSample, 0));
770774

771775
while (!todo.empty())
772776
{

0 commit comments

Comments
 (0)