WIP: Enhance shader compilation performance and control

This commit adds new settings and optimizations for shader compilation:

- Add new settings:
  - use_enhanced_shader_building: Enable enhanced shader compilation
  - shader_compilation_priority: Control shader compilation priority

- Improve shader compilation performance:
  - Optimize worker thread allocation based on CPU cores
  - Add smarter async shader compilation heuristics
  - Prioritize vertex and fragment shader compilation
  - Add performance tracking and logging

- Add performance monitoring:
  - Track shader compilation times
  - Log slow shader compilations
  - Monitor async shader compilation statistics

This is a work in progress commit. Further optimizations and refinements
will be needed based on testing and feedback.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-03-27 20:56:23 +10:00 committed by MrPurple666
parent bc86307ad6
commit c57a5fef92
8 changed files with 249 additions and 33 deletions

View file

@ -1,10 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project\
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <string>
#include <vector>
#include <chrono>
#include <functional>
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "common/thread_worker.h"
@ -234,26 +237,68 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv),
shader_notify, backend, in_parallel,
force_context_flush](ShaderContext::Context*) mutable {
// Track time for shader compilation for possible performance tuning
const auto start_time = std::chrono::high_resolution_clock::now();
// Prepare compilation steps for all shader stages
std::vector<std::function<void()>> compilation_steps;
compilation_steps.reserve(5); // Maximum number of shader stages
// Prepare all compilation steps first to better distribute work
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
case Settings::ShaderBackend::Glsl:
if (!sources_[stage].empty()) {
source_programs[stage] = CreateProgram(sources_[stage], Stage(stage));
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
source_programs[stage] = CreateProgram(source, Stage(stage));
});
}
break;
case Settings::ShaderBackend::Glasm:
if (!sources_[stage].empty()) {
assembly_programs[stage] =
CompileProgram(sources_[stage], AssemblyStage(stage));
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage));
});
}
break;
case Settings::ShaderBackend::SpirV:
if (!sources_spirv_[stage].empty()) {
source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage));
compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() {
source_programs[stage] = CreateProgram(source, Stage(stage));
});
}
break;
}
}
// If we're running in parallel, use high-priority execution for vertex and fragment shaders
// as these are typically needed first by the renderer
if (in_parallel && compilation_steps.size() > 1) {
// Execute vertex (0) and fragment (4) shaders first if they exist
for (size_t priority_stage : {0, 4}) {
for (size_t i = 0; i < compilation_steps.size(); ++i) {
if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) {
compilation_steps[i]();
compilation_steps[i] = [](){}; // Mark as executed
}
}
}
}
// Execute all remaining compilation steps
for (auto& step : compilation_steps) {
step(); // Will do nothing for already executed steps
}
// Performance measurement for possible logging or optimization
const auto end_time = std::chrono::high_resolution_clock::now();
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time).count();
if (compilation_time > 50) { // Only log slow compilations
LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time);
}
if (force_context_flush || in_parallel) {
std::scoped_lock lock{built_mutex};
built_fence.Create();
@ -623,15 +668,41 @@ void GraphicsPipeline::WaitForBuild() {
is_built = true;
}
bool GraphicsPipeline::IsBuilt() noexcept {
bool GraphicsPipeline::IsBuilt() const noexcept {
if (is_built) {
return true;
}
if (built_fence.handle == 0) {
if (!built_fence.handle) {
return false;
}
is_built = built_fence.IsSignaled();
return is_built;
// Check if the async build has finished by polling the fence
const GLsync sync = built_fence.handle;
const GLuint result = glClientWaitSync(sync, 0, 0);
if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) {
// Mark this as mutable even though we're in a const method - this is
// essentially a cached value update which is acceptable
const_cast<GraphicsPipeline*>(this)->is_built = true;
return true;
}
// For better performance tracking, capture time spent waiting for shaders
static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log;
static thread_local u32 shader_wait_count = 0;
auto now = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
now - last_shader_wait_log).count();
// Log shader compilation status periodically to help diagnose performance issues
if (elapsed >= 5) { // Log every 5 seconds
shader_wait_count++;
LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})",
shader_wait_count);
last_shader_wait_log = now;
}
return false;
}
} // namespace OpenGL

View file

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -102,7 +103,7 @@ public:
return uses_local_memory;
}
[[nodiscard]] bool IsBuilt() noexcept;
[[nodiscard]] bool IsBuilt() const noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {

View file

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
@ -608,9 +609,33 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
"GlShaderBuilder",
[this] { return Context{emu_window}; });
// Calculate optimal number of workers based on available CPU cores
// Leave at least 1 core for main thread and other operations
// Use more cores for more parallelism in shader compilation
const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U);
const u32 optimal_workers = num_worker_threads <= 3 ?
num_worker_threads - 1 : // On dual/quad core, leave 1 core free
num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks
auto worker = std::make_unique<ShaderWorker>(
optimal_workers,
"GlShaderBuilder",
[this] {
auto context = Context{emu_window};
// Apply thread priority based on settings
// This allows users to control how aggressive shader compilation is
const int priority = Settings::values.shader_compilation_priority.GetValue();
if (priority != 0) {
Common::SetCurrentThreadPriority(
priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low);
}
return context;
}
);
return worker;
}
} // namespace OpenGL