mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-10-19 21:47:54 +00:00
WIP: Enhance shader compilation performance and control
This commit adds new settings and optimizations for shader compilation: - Add new settings: - use_enhanced_shader_building: Enable enhanced shader compilation - shader_compilation_priority: Control shader compilation priority - Improve shader compilation performance: - Optimize worker thread allocation based on CPU cores - Add smarter async shader compilation heuristics - Prioritize vertex and fragment shader compilation - Add performance tracking and logging - Add performance monitoring: - Track shader compilation times - Log slow shader compilations - Monitor async shader compilation statistics This is a work in progress commit. Further optimizations and refinements will be needed based on testing and feedback. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
bc86307ad6
commit
c57a5fef92
8 changed files with 249 additions and 33 deletions
|
@ -1,10 +1,13 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project\
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
|
||||
#include "common/settings.h" // for enum class Settings::ShaderBackend
|
||||
#include "common/thread_worker.h"
|
||||
|
@ -234,26 +237,68 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
|
|||
auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv),
|
||||
shader_notify, backend, in_parallel,
|
||||
force_context_flush](ShaderContext::Context*) mutable {
|
||||
// Track time for shader compilation for possible performance tuning
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Prepare compilation steps for all shader stages
|
||||
std::vector<std::function<void()>> compilation_steps;
|
||||
compilation_steps.reserve(5); // Maximum number of shader stages
|
||||
|
||||
// Prepare all compilation steps first to better distribute work
|
||||
for (size_t stage = 0; stage < 5; ++stage) {
|
||||
switch (backend) {
|
||||
case Settings::ShaderBackend::Glsl:
|
||||
if (!sources_[stage].empty()) {
|
||||
source_programs[stage] = CreateProgram(sources_[stage], Stage(stage));
|
||||
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
|
||||
source_programs[stage] = CreateProgram(source, Stage(stage));
|
||||
});
|
||||
}
|
||||
break;
|
||||
case Settings::ShaderBackend::Glasm:
|
||||
if (!sources_[stage].empty()) {
|
||||
assembly_programs[stage] =
|
||||
CompileProgram(sources_[stage], AssemblyStage(stage));
|
||||
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
|
||||
assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage));
|
||||
});
|
||||
}
|
||||
break;
|
||||
case Settings::ShaderBackend::SpirV:
|
||||
if (!sources_spirv_[stage].empty()) {
|
||||
source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage));
|
||||
compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() {
|
||||
source_programs[stage] = CreateProgram(source, Stage(stage));
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're running in parallel, use high-priority execution for vertex and fragment shaders
|
||||
// as these are typically needed first by the renderer
|
||||
if (in_parallel && compilation_steps.size() > 1) {
|
||||
// Execute vertex (0) and fragment (4) shaders first if they exist
|
||||
for (size_t priority_stage : {0, 4}) {
|
||||
for (size_t i = 0; i < compilation_steps.size(); ++i) {
|
||||
if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) {
|
||||
compilation_steps[i]();
|
||||
compilation_steps[i] = [](){}; // Mark as executed
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute all remaining compilation steps
|
||||
for (auto& step : compilation_steps) {
|
||||
step(); // Will do nothing for already executed steps
|
||||
}
|
||||
|
||||
// Performance measurement for possible logging or optimization
|
||||
const auto end_time = std::chrono::high_resolution_clock::now();
|
||||
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
end_time - start_time).count();
|
||||
|
||||
if (compilation_time > 50) { // Only log slow compilations
|
||||
LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time);
|
||||
}
|
||||
|
||||
if (force_context_flush || in_parallel) {
|
||||
std::scoped_lock lock{built_mutex};
|
||||
built_fence.Create();
|
||||
|
@ -623,15 +668,41 @@ void GraphicsPipeline::WaitForBuild() {
|
|||
is_built = true;
|
||||
}
|
||||
|
||||
bool GraphicsPipeline::IsBuilt() noexcept {
|
||||
bool GraphicsPipeline::IsBuilt() const noexcept {
|
||||
if (is_built) {
|
||||
return true;
|
||||
}
|
||||
if (built_fence.handle == 0) {
|
||||
if (!built_fence.handle) {
|
||||
return false;
|
||||
}
|
||||
is_built = built_fence.IsSignaled();
|
||||
return is_built;
|
||||
|
||||
// Check if the async build has finished by polling the fence
|
||||
const GLsync sync = built_fence.handle;
|
||||
const GLuint result = glClientWaitSync(sync, 0, 0);
|
||||
if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) {
|
||||
// Mark this as mutable even though we're in a const method - this is
|
||||
// essentially a cached value update which is acceptable
|
||||
const_cast<GraphicsPipeline*>(this)->is_built = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// For better performance tracking, capture time spent waiting for shaders
|
||||
static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log;
|
||||
static thread_local u32 shader_wait_count = 0;
|
||||
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
now - last_shader_wait_log).count();
|
||||
|
||||
// Log shader compilation status periodically to help diagnose performance issues
|
||||
if (elapsed >= 5) { // Log every 5 seconds
|
||||
shader_wait_count++;
|
||||
LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})",
|
||||
shader_wait_count);
|
||||
last_shader_wait_log = now;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -102,7 +103,7 @@ public:
|
|||
return uses_local_memory;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsBuilt() noexcept;
|
||||
[[nodiscard]] bool IsBuilt() const noexcept;
|
||||
|
||||
template <typename Spec>
|
||||
static auto MakeConfigureSpecFunc() {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <atomic>
|
||||
|
@ -608,9 +609,33 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
|||
}
|
||||
|
||||
std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
|
||||
return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
|
||||
"GlShaderBuilder",
|
||||
[this] { return Context{emu_window}; });
|
||||
// Calculate optimal number of workers based on available CPU cores
|
||||
// Leave at least 1 core for main thread and other operations
|
||||
// Use more cores for more parallelism in shader compilation
|
||||
const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U);
|
||||
const u32 optimal_workers = num_worker_threads <= 3 ?
|
||||
num_worker_threads - 1 : // On dual/quad core, leave 1 core free
|
||||
num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks
|
||||
|
||||
auto worker = std::make_unique<ShaderWorker>(
|
||||
optimal_workers,
|
||||
"GlShaderBuilder",
|
||||
[this] {
|
||||
auto context = Context{emu_window};
|
||||
|
||||
// Apply thread priority based on settings
|
||||
// This allows users to control how aggressive shader compilation is
|
||||
const int priority = Settings::values.shader_compilation_priority.GetValue();
|
||||
if (priority != 0) {
|
||||
Common::SetCurrentThreadPriority(
|
||||
priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low);
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
);
|
||||
|
||||
return worker;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue