Vulkan: Add descriptor set allocation counters

Add descriptor set allocation counters for the following:
- ContextVk
  - Driver uniform allocations for graphics and compute pipelines.
- ProgramExecutableVk
  - ANGLE driver uniforms
  - Uniforms
  - Textures
  - Other shader resources
- UtilsVk
  - All of the UtilsVk::Function types increment the same counter

Each object's counters live within the object itself and the cumulative
total is output as part of that object's destruction. On Present, all of
the descriptor set counts are collected into a single total which is
used to update the overlay each frame.

In order to see the cumulative total output for each object, the
following GN args must be enabled:
is_debug = true
angle_enable_perf_counter_output = true

To see the descriptor set allocation overlay:
ANGLE_OVERLAY=VulkanDescriptorSetAllocations

Bug: angleproject:5067
Test: Manual verification with angle_perftests
Change-Id: Ie45fda56ade3e68bfba7bf6da9554eb05a02c6b6
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2429487
Commit-Queue: Tim Van Patten <timvp@google.com>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Charlie Lao <cclao@google.com>
This commit is contained in:
Tim Van Patten 2020-09-24 11:39:49 -06:00 коммит произвёл Commit Bot
Родитель 626a4185ea
Коммит ee4e0866cb
17 изменённых файлов: 276 добавлений и 16 удалений

Просмотреть файл

@ -606,6 +606,9 @@ config("libANGLE_config") {
if (angle_enable_overlay) {
defines += [ "ANGLE_ENABLE_OVERLAY=1" ]
}
if (angle_enable_perf_counter_output) {
defines += [ "ANGLE_ENABLE_PERF_COUNTER_OUTPUT=1" ]
}
}
angle_source_set("libANGLE_headers") {

Просмотреть файл

@ -131,6 +131,9 @@ declare_args() {
# Disable overlay by default
angle_enable_overlay = false
# Disable performance counter output by default
angle_enable_perf_counter_output = false
}
if (!defined(angle_zlib_compression_utils_dir)) {

Просмотреть файл

@ -1,10 +1,10 @@
{
"src/libANGLE/Overlay_autogen.cpp":
"6c9c7df59562390505222145c1f32277",
"849f447a220cb0ce00a41f99db179a6b",
"src/libANGLE/Overlay_autogen.h":
"03ac72d8286f1f933696fa3dabb75eb1",
"4f29dd0e9c2030b98b396fdf03eaeb29",
"src/libANGLE/gen_overlay_widgets.py":
"f4395481db010c82af2e2981353e8592",
"src/libANGLE/overlay_widgets.json":
"dd9d2a72035e754bbc5f614410e76df1"
"93205f3d9585228428bc62463f478dc6"
}

Просмотреть файл

@ -2798,7 +2798,7 @@ void CaptureMidExecutionSetup(const gl::Context *context,
const gl::ResourceMap<gl::Shader, gl::ShaderProgramID> &shaders =
shadersAndPrograms.getShadersForCapture();
const gl::ResourceMap<gl::Program, gl::ShaderProgramID> &programs =
shadersAndPrograms.getProgramsForCapture();
shadersAndPrograms.getProgramsForCaptureAndPerf();
// Capture Program binary state. Use max ID as a temporary shader ID.
gl::ShaderProgramID tempShaderID = {resourceTracker->getMaxShaderPrograms()};

Просмотреть файл

@ -300,10 +300,10 @@ void AppendWidgetDataHelper::AppendRunningHistogramCommon(const overlay::Widget
OverlayWidgetCounts *widgetCounts,
FormatHistogramTitleFunc formatFunc)
{
const overlay::RunningHistogram *secondaryCommandBufferPoolWaste =
const overlay::RunningHistogram *runningHistogram =
static_cast<const overlay::RunningHistogram *>(widget);
std::vector<size_t> histogram = CreateHistogram(secondaryCommandBufferPoolWaste->runningValues);
std::vector<size_t> histogram = CreateHistogram(runningHistogram->runningValues);
auto peakRangeIt = std::max_element(histogram.rbegin(), histogram.rend());
const size_t peakRangeValue = *peakRangeIt;
const int32_t graphHeight = std::abs(widget->coords[3] - widget->coords[1]);
@ -320,8 +320,8 @@ void AppendWidgetDataHelper::AppendRunningHistogramCommon(const overlay::Widget
size_t maxValueRange = std::distance(maxValueIter, histogram.rend() - 1);
std::string text = formatFunc(peakRange, maxValueRange, histogram.size());
AppendTextCommon(&secondaryCommandBufferPoolWaste->description, imageExtent, text,
textWidget, widgetCounts);
AppendTextCommon(&runningHistogram->description, imageExtent, text, textWidget,
widgetCounts);
}
}
@ -431,6 +431,21 @@ void AppendWidgetDataHelper::AppendVulkanWriteDescriptorSetCount(const overlay::
AppendRunningGraphCommon(widget, imageExtent, textWidget, graphWidget, widgetCounts, format);
}
void AppendWidgetDataHelper::AppendVulkanDescriptorSetAllocations(const overlay::Widget *widget,
const gl::Extents &imageExtent,
TextWidgetData *textWidget,
GraphWidgetData *graphWidget,
OverlayWidgetCounts *widgetCounts)
{
auto format = [](size_t maxValue) {
std::ostringstream text;
text << "Descriptor Set Allocations (Max: " << maxValue << ")";
return text.str();
};
AppendRunningGraphCommon(widget, imageExtent, textWidget, graphWidget, widgetCounts, format);
}
std::ostream &AppendWidgetDataHelper::OutputPerSecond(std::ostream &out,
const overlay::PerSecond *perSecond)
{

Просмотреть файл

@ -274,6 +274,49 @@ void Overlay::initOverlayWidgets()
widget->description.color[3] = 1.0f;
}
}
{
RunningGraph *widget = new RunningGraph(60);
{
const int32_t fontSize = GetFontSize(0, kLargeFont);
const int32_t offsetX = -50;
const int32_t offsetY = 250;
const int32_t width = 6 * static_cast<uint32_t>(widget->runningValues.size());
const int32_t height = 100;
widget->type = WidgetType::RunningGraph;
widget->fontSize = fontSize;
widget->coords[0] = offsetX - width;
widget->coords[1] = offsetY;
widget->coords[2] = offsetX;
widget->coords[3] = offsetY + height;
widget->color[0] = 1.0f;
widget->color[1] = 0.0f;
widget->color[2] = 0.294117647059f;
widget->color[3] = 0.78431372549f;
}
mState.mOverlayWidgets[WidgetId::VulkanDescriptorSetAllocations].reset(widget);
{
const int32_t fontSize = GetFontSize(kFontLayerSmall, kLargeFont);
const int32_t offsetX =
mState.mOverlayWidgets[WidgetId::VulkanDescriptorSetAllocations]->coords[0];
const int32_t offsetY =
mState.mOverlayWidgets[WidgetId::VulkanDescriptorSetAllocations]->coords[1];
const int32_t width = 40 * kFontGlyphWidths[fontSize];
const int32_t height = kFontGlyphHeights[fontSize];
widget->description.type = WidgetType::Text;
widget->description.fontSize = fontSize;
widget->description.coords[0] = offsetX;
widget->description.coords[1] = std::max(offsetY - height, 1);
widget->description.coords[2] = std::min(offsetX + width, -1);
widget->description.coords[3] = offsetY;
widget->description.color[0] = 1.0f;
widget->description.color[1] = 0.0f;
widget->description.color[2] = 0.294117647059f;
widget->description.color[3] = 1.0f;
}
}
}
} // namespace gl

Просмотреть файл

@ -26,6 +26,8 @@ enum class WidgetId
VulkanSecondaryCommandBufferPoolWaste,
// Number of Descriptor Set writes in a frame (Count).
VulkanWriteDescriptorSetCount,
// Descriptor Set Allocations.
VulkanDescriptorSetAllocations,
InvalidEnum,
EnumCount = InvalidEnum,
@ -39,6 +41,7 @@ enum class WidgetId
PROC(VulkanRenderPassCount) \
PROC(VulkanRenderPassBufferCount) \
PROC(VulkanSecondaryCommandBufferPoolWaste) \
PROC(VulkanWriteDescriptorSetCount)
PROC(VulkanWriteDescriptorSetCount) \
PROC(VulkanDescriptorSetAllocations)
} // namespace gl

Просмотреть файл

@ -164,9 +164,12 @@ class ShaderProgramManager : public ResourceManagerBase
return mPrograms.query(handle);
}
// For capture only.
// For capture and performance counters only.
const ResourceMap<Shader, ShaderProgramID> &getShadersForCapture() const { return mShaders; }
const ResourceMap<Program, ShaderProgramID> &getProgramsForCapture() const { return mPrograms; }
const ResourceMap<Program, ShaderProgramID> &getProgramsForCaptureAndPerf() const
{
return mPrograms;
}
protected:
~ShaderProgramManager() override;

Просмотреть файл

@ -991,7 +991,7 @@ Result SerializeContext(gl::BinaryOutputStream *bos, const gl::Context *context)
SerializeShader(bos, shaderPtr);
}
const gl::ResourceMap<gl::Program, gl::ShaderProgramID> &programManager =
shaderProgramManager.getProgramsForCapture();
shaderProgramManager.getProgramsForCaptureAndPerf();
for (const auto &program : programManager)
{
gl::Program *programPtr = program.second;

Просмотреть файл

@ -116,6 +116,22 @@
"font": "small",
"length": 40
}
},
{
"name": "VulkanDescriptorSetAllocations",
"comment": "Descriptor Set Allocations.",
"type": "RunningGraph(60)",
"color": [255, 0, 75, 200],
"coords": [-50, 250],
"bar_width": 6,
"height": 100,
"description": {
"color": [255, 0, 75, 255],
"coords": ["VulkanDescriptorSetAllocations.left.align",
"VulkanDescriptorSetAllocations.top.adjacent"],
"font": "small",
"length": 40
}
}
]
}

Просмотреть файл

@ -702,6 +702,7 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
mGpuEventTimestampOrigin(0),
mPerfCounters{},
mObjectPerfCounters{},
mContextPriority(renderer->getDriverPriority(GetContextPriority(state))),
mCurrentIndirectBuffer(nullptr),
mShareGroupVk(vk::GetImpl(state.getShareGroup()))
@ -784,12 +785,16 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
mObjectPerfCounters.descriptorSetsAllocated.fill(0);
}
ContextVk::~ContextVk() = default;
void ContextVk::onDestroy(const gl::Context *context)
{
outputCumulativePerfCounters();
// Remove context from the share group
mShareGroupVk->getShareContextSet()->erase(this);
@ -1778,6 +1783,42 @@ void ContextVk::updateOverlayOnPresent()
mPerfCounters.writeDescriptorSets = 0;
}
{
uint32_t descriptorSetAllocations = 0;
// ContextVk's descriptor set allocations
for (const uint32_t count : mObjectPerfCounters.descriptorSetsAllocated)
{
descriptorSetAllocations += count;
}
// UtilsVk's descriptor set allocations
descriptorSetAllocations += mUtils.getObjectPerfCounters().descriptorSetsAllocated;
// ProgramExecutableVk's descriptor set allocations
const gl::State &state = getState();
const gl::ShaderProgramManager &shadersAndPrograms =
state.getShaderProgramManagerForCapture();
const gl::ResourceMap<gl::Program, gl::ShaderProgramID> &programs =
shadersAndPrograms.getProgramsForCaptureAndPerf();
for (const std::pair<GLuint, gl::Program *> &resource : programs)
{
ProgramVk *programVk = vk::GetImpl(resource.second);
ProgramExecutableVk::PerfCounters progPerfCounters =
programVk->getExecutable().getObjectPerfCounters();
for (const uint32_t count : progPerfCounters.descriptorSetsAllocated)
{
descriptorSetAllocations += count;
}
}
gl::RunningGraphWidget *descriptorSetAllocationCount =
overlay->getRunningGraphWidget(gl::WidgetId::VulkanDescriptorSetAllocations);
descriptorSetAllocationCount->add(descriptorSetAllocations -
mPerfCounters.descriptorSetAllocations);
descriptorSetAllocationCount->next();
mPerfCounters.descriptorSetAllocations = descriptorSetAllocations;
}
}
void ContextVk::addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffer)
@ -4077,6 +4118,7 @@ angle::Result ContextVk::updateDriverUniformsDescriptorSet(
ANGLE_TRY(mDriverUniformsDescriptorPools[pipelineType].allocateSetsAndGetInfo(
this, driverUniforms->descriptorSetLayout.get().ptr(), 1,
&driverUniforms->descriptorPoolBinding, &driverUniforms->descriptorSet, &newPoolAllocated));
mObjectPerfCounters.descriptorSetsAllocated[ToUnderlying(pipelineType)]++;
// Clear descriptor set cache. It may no longer be valid.
if (newPoolAllocated)
@ -5340,4 +5382,27 @@ bool ContextVk::shouldSwitchToReadOnlyDepthFeedbackLoopMode(const gl::Context *c
texture->isBoundToFramebuffer(mDrawFramebuffer->getState().getFramebufferSerial()) &&
!mDrawFramebuffer->isReadOnlyDepthFeedbackLoopMode();
}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void ContextVk::outputCumulativePerfCounters()
{
if (!vk::kOutputCumulativePerfCounters)
{
return;
}
{
INFO() << "Context Descriptor Set Allocations: ";
for (size_t pipelineType = 0;
pipelineType < mObjectPerfCounters.descriptorSetsAllocated.size(); ++pipelineType)
{
uint32_t count = mObjectPerfCounters.descriptorSetsAllocated[pipelineType];
if (count > 0)
{
INFO() << " PipelineType " << pipelineType << ": " << count;
}
}
}
}
} // namespace rx

Просмотреть файл

@ -768,6 +768,14 @@ class ContextVk : public ContextImpl, public vk::Context
double cpuTimestampS;
};
// Performance Counters specific to this object type
using DescriptorSetList =
std::array<uint32_t, ToUnderlying(ContextVk::PipelineType::EnumCount)>;
struct PerfCounters
{
DescriptorSetList descriptorSetsAllocated;
};
class ScopedDescriptorSetUpdates;
angle::Result setupDraw(const gl::Context *context,
@ -989,6 +997,8 @@ class ContextVk : public ContextImpl, public vk::Context
bool shouldSwitchToReadOnlyDepthFeedbackLoopMode(const gl::Context *context,
gl::Texture *texture) const;
void outputCumulativePerfCounters();
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers;
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers;
@ -1157,6 +1167,7 @@ class ContextVk : public ContextImpl, public vk::Context
// A mix of per-frame and per-run counters.
vk::PerfCounters mPerfCounters;
PerfCounters mObjectPerfCounters;
gl::State::DirtyBits mPipelineDirtyBitsMask;

Просмотреть файл

@ -182,10 +182,14 @@ ProgramExecutableVk::ProgramExecutableVk()
mNumDefaultUniformDescriptors(0),
mDynamicBufferOffsets{},
mProgram(nullptr),
mProgramPipeline(nullptr)
mProgramPipeline(nullptr),
mObjectPerfCounters{}
{}
ProgramExecutableVk::~ProgramExecutableVk() = default;
ProgramExecutableVk::~ProgramExecutableVk()
{
outputCumulativePerfCounters();
}
void ProgramExecutableVk::reset(ContextVk *contextVk)
{
@ -424,6 +428,8 @@ angle::Result ProgramExecutableVk::allocateDescriptorSetAndGetInfo(
&mDescriptorSets[ToUnderlying(descriptorSetIndex)], newPoolAllocatedOut));
mEmptyDescriptorSets[ToUnderlying(descriptorSetIndex)] = VK_NULL_HANDLE;
++mObjectPerfCounters.descriptorSetsAllocated[ToUnderlying(descriptorSetIndex)];
return angle::Result::Continue;
}
@ -1561,6 +1567,8 @@ angle::Result ProgramExecutableVk::updateDescriptorSets(ContextVk *contextVk,
contextVk, descriptorSetLayout.ptr(), 1,
&mDescriptorPoolBindings[descriptorSetIndex],
&mEmptyDescriptorSets[descriptorSetIndex]));
++mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
}
descSet = mEmptyDescriptorSets[descriptorSetIndex];
}
@ -1582,4 +1590,44 @@ angle::Result ProgramExecutableVk::updateDescriptorSets(ContextVk *contextVk,
return angle::Result::Continue;
}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void ProgramExecutableVk::outputCumulativePerfCounters()
{
if (!vk::kOutputCumulativePerfCounters)
{
return;
}
{
std::ostringstream text;
for (size_t descriptorSetIndex = 0;
descriptorSetIndex < mObjectPerfCounters.descriptorSetsAllocated.size();
++descriptorSetIndex)
{
uint32_t count = mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
if (count > 0)
{
text << " DescriptorSetIndex " << descriptorSetIndex << ": " << count << "\n";
}
}
// Only output information for programs that allocated descriptor sets.
std::string textStr = text.str();
if (!textStr.empty())
{
INFO() << "ProgramExecutable: " << this << ":";
// Output each descriptor set allocation on a single line, so they're prefixed with the
// INFO information (file, line number, etc.).
// https://stackoverflow.com/a/12514641
std::istringstream iss(textStr);
for (std::string line; std::getline(iss, line);)
{
INFO() << line;
}
}
}
}
} // namespace rx

Просмотреть файл

@ -168,6 +168,15 @@ class ProgramExecutableVk
mProgramPipeline = pipeline;
}
using DescriptorSetCountList = std::array<uint32_t, DescriptorSetIndex::EnumCount>;
// Performance and resource counters.
struct PerfCounters
{
DescriptorSetCountList descriptorSetsAllocated;
};
const PerfCounters getObjectPerfCounters() const { return mObjectPerfCounters; }
private:
friend class ProgramVk;
friend class ProgramPipelineVk;
@ -224,6 +233,8 @@ class ProgramExecutableVk
DescriptorSetIndex descriptorSetIndex,
VkDescriptorSetLayout descriptorSetLayout);
void outputCumulativePerfCounters();
// Descriptor sets for uniform blocks and textures for this program.
vk::DescriptorSetLayoutArray<VkDescriptorSet> mDescriptorSets;
vk::DescriptorSetLayoutArray<VkDescriptorSet> mEmptyDescriptorSets;
@ -260,6 +271,8 @@ class ProgramExecutableVk
ProgramVk *mProgram;
ProgramPipelineVk *mProgramPipeline;
PerfCounters mObjectPerfCounters;
};
} // namespace rx

Просмотреть файл

@ -15,6 +15,7 @@
#include "libANGLE/renderer/vulkan/GlslangWrapperVk.h"
#include "libANGLE/renderer/vulkan/RenderTargetVk.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/renderer/vulkan/vk_utils.h"
namespace rx
{
@ -530,7 +531,7 @@ uint32_t UtilsVk::GetGenerateMipmapMaxLevels(ContextVk *contextVk)
: kGenerateMipmapMaxLevels;
}
UtilsVk::UtilsVk() = default;
UtilsVk::UtilsVk() : mObjectPerfCounters{} {}
UtilsVk::~UtilsVk() = default;
@ -538,6 +539,8 @@ void UtilsVk::destroy(RendererVk *renderer)
{
VkDevice device = renderer->getDevice();
outputCumulativePerfCounters();
for (Function f : angle::AllEnums<Function>())
{
for (auto &descriptorSetLayout : mDescriptorSetLayouts[f])
@ -2632,6 +2635,9 @@ angle::Result UtilsVk::allocateDescriptorSet(ContextVk *contextVk,
.ptr(),
1, bindingOut, descriptorSetOut));
bindingOut->get().updateSerial(contextVk->getCurrentQueueSerial());
mObjectPerfCounters.descriptorSetsAllocated++;
return angle::Result::Continue;
}
@ -2647,4 +2653,15 @@ UtilsVk::ClearFramebufferParameters::ClearFramebufferParameters()
depthStencilClearValue{}
{}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void UtilsVk::outputCumulativePerfCounters()
{
if (!vk::kOutputCumulativePerfCounters)
{
return;
}
INFO() << "Utils Descriptor Set Allocations: " << mObjectPerfCounters.descriptorSetsAllocated;
}
} // namespace rx

Просмотреть файл

@ -166,6 +166,12 @@ class UtilsVk : angle::NonCopyable
bool unresolveStencil;
};
struct PerfCounters
{
// Total descriptor set allocations for all UtilsVk::Functions
uint32_t descriptorSetsAllocated;
};
// Based on the maximum number of levels in GenerateMipmap.comp.
static constexpr uint32_t kGenerateMipmapMaxLevels = 6;
static uint32_t GetGenerateMipmapMaxLevels(ContextVk *contextVk);
@ -262,6 +268,8 @@ class UtilsVk : angle::NonCopyable
const vk::ImageView *destView,
const OverlayDrawParameters &params);
const PerfCounters getObjectPerfCounters() const { return mObjectPerfCounters; }
private:
ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
@ -502,6 +510,8 @@ class UtilsVk : angle::NonCopyable
vk::RefCountedDescriptorPoolBinding *bindingOut,
VkDescriptorSet *descriptorSetOut);
void outputCumulativePerfCounters();
angle::PackedEnumMap<Function, vk::DescriptorSetLayoutPointerArray> mDescriptorSetLayouts;
angle::PackedEnumMap<Function, vk::BindingPointer<vk::PipelineLayout>> mPipelineLayouts;
angle::PackedEnumMap<Function, vk::DynamicDescriptorPool> mDescriptorPools;
@ -531,6 +541,8 @@ class UtilsVk : angle::NonCopyable
vk::Sampler mPointSampler;
vk::Sampler mLinearSampler;
PerfCounters mObjectPerfCounters;
};
} // namespace rx

Просмотреть файл

@ -70,6 +70,7 @@ namespace rx
{
class DisplayVk;
class ImageVk;
class ProgramExecutableVk;
class RenderTargetVk;
class RendererVk;
class RenderPassCache;
@ -781,6 +782,12 @@ class ResourceSerialFactory final : angle::NonCopyable
std::atomic<uint32_t> mCurrentUniqueSerial;
};
#if defined(ANGLE_ENABLE_PERF_COUNTER_OUTPUT)
constexpr bool kOutputCumulativePerfCounters = ANGLE_ENABLE_PERF_COUNTER_OUTPUT;
#else
constexpr bool kOutputCumulativePerfCounters = false;
#endif
// Performance and resource counters.
struct RenderPassPerfCounters
{
@ -823,6 +830,7 @@ struct PerfCounters
uint32_t depthAttachmentResolves;
uint32_t stencilAttachmentResolves;
uint32_t readOnlyDepthStencilRenderPasses;
uint32_t descriptorSetAllocations;
};
// A Vulkan image level index.