Compare commits

...

5 commits

Author SHA1 Message Date
wildcard
8f11ce89e3 Update src/video_core/renderer_vulkan/vk_buffer_cache.cpp 2025-10-10 02:09:07 +02:00
wildcard
6544cdd446 [Vulkan] Introduce Ring buffers for UB
Create 3 ring buffers which rotates between buffers each frame to avoid GPU/CPU conflicts
BindMappedUniformBuffer first tries to allocate from the ring buffer and falls back to staging pool only if allocation is too large.
Note to testers:- please test the performance since it is primarily a performance optimization and also look for visual bugs.
2025-10-10 02:09:07 +02:00
CamilleLaVey
b6241e4148
revert [vk] StreamBuffer Changes (#2684) (#2707)
revert [vk] StreamBuffer Changes (#2684)

Streambuffer changes did broke stuff in other games that got out of our scope of testing, we're going to study this changes in the future for better graphic stability.

Co-authored-by: Ribbit <ribbit@placeholder.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2684
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: Lizzie <lizzie@eden-emu.dev>
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Co-authored-by: Ribbit <ribbit@eden-emu.dev>
Co-committed-by: Ribbit <ribbit@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2707
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Co-committed-by: CamilleLaVey <camillelavey99@gmail.com>
2025-10-10 01:55:43 +02:00
MaranBr
bfffafe68b
[common] Change web offline applet default setting to HLE (#2705)
This prevents some games from ignoring the disable web applet setting.

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2705
Co-authored-by: MaranBr <maranbr@outlook.com>
Co-committed-by: MaranBr <maranbr@outlook.com>
2025-10-10 01:36:55 +02:00
CamilleLaVey
3c6ef765af
revert [vk] Fast UBO: fix tracking, resize heuristics, add debug guard (#2695) (#2706)
revert [vk] Fast UBO: fix tracking, resize heuristics, add debug guard (#2695)

Well, stuff showed up after testing phase, that showed us this change break SMO and some mods after being merged directly into master, we will keep stuying why happens this and add a better handling later.

Co-authored-by: Ribbit <ribbit@placeholder.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2695
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Co-authored-by: Ribbit <ribbit@eden-emu.dev>
Co-committed-by: Ribbit <ribbit@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2706
Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Co-committed-by: CamilleLaVey <camillelavey99@gmail.com>
2025-10-09 21:37:27 +02:00
10 changed files with 113 additions and 122 deletions

View file

@ -161,7 +161,7 @@ struct Values {
Category::LibraryApplet};
Setting<AppletMode> photo_viewer_applet_mode{
linkage, AppletMode::LLE, "photo_viewer_applet_mode", Category::LibraryApplet};
Setting<AppletMode> offline_web_applet_mode{linkage, AppletMode::LLE, "offline_web_applet_mode",
Setting<AppletMode> offline_web_applet_mode{linkage, AppletMode::HLE, "offline_web_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> login_share_applet_mode{linkage, AppletMode::HLE, "login_share_applet_mode",
Category::LibraryApplet};

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -112,9 +109,6 @@ public:
void ReadBlock(DAddr address, void* dest_pointer, size_t size);
void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
#ifdef YUZU_DEBUG
bool ReadBlockFastChecked(DAddr address, void* dest_pointer, size_t size);
#endif
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -470,29 +467,6 @@ void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_poin
});
}
#ifdef YUZU_DEBUG
template <typename Traits>
bool DeviceMemoryManager<Traits>::ReadBlockFastChecked(DAddr address, void* dest_pointer,
size_t size) {
bool success = true;
WalkBlock(
address, size,
[&](size_t copy_amount, DAddr current_vaddr) {
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", current_vaddr,
size);
std::memset(dest_pointer, 0, copy_amount);
success = false;
},
[&](size_t copy_amount, const u8* const src_ptr) {
std::memcpy(dest_pointer, src_ptr, copy_amount);
},
[&](const std::size_t copy_amount) {
dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
});
return success;
}
#endif
template <typename Traits>
void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
size_t size) {

View file

@ -386,10 +386,11 @@ void BufferCache<P>::BindHostComputeBuffers() {
template <class P>
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
const UniformBufferSizes* sizes) {
const bool mask_changed = channel_state->enabled_uniform_buffer_masks != mask;
if (mask_changed) {
channel_state->fast_bound_uniform_buffers.fill(0);
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
if (channel_state->enabled_uniform_buffer_masks != mask) {
if constexpr (IS_OPENGL) {
channel_state->fast_bound_uniform_buffers.fill(0);
}
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->uniform_buffer_binding_sizes.fill({});
}
@ -805,7 +806,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
@ -814,22 +815,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return;
}
}
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
if constexpr (IS_OPENGL) {
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
}
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
#ifdef YUZU_DEBUG
ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS);
ASSERT(span.size() >= size && "UBO stream span too small");
if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) {
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size);
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
ASSERT(false);
return;
}
#else
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
#endif
return;
}
// Classic cached path
@ -838,8 +830,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
// Skip binding if it's not needed and if the bound buffer is not the fast version
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index);
needs_bind |= was_fast_bound;
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
}
@ -848,6 +839,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
// Fast buffer will be unbound
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
@ -861,7 +855,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
} else {
runtime.BindUniformBuffer(buffer, offset, size);
}
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
}
template <class P>
@ -1796,7 +1789,12 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
template <class P>
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;
if constexpr (IS_OPENGL) {
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
} else {
// Only OpenGL has fast uniform buffers
return false;
}
}
template <class P>

View file

@ -53,7 +53,6 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
constexpr u32 NUM_STAGES = 5;
static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
@ -138,8 +137,8 @@ public:
u32 written_compute_texture_buffers = 0;
u32 image_compute_texture_buffers = 0;
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_hits{};
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_shots{};
std::array<u32, 16> uniform_cache_hits{};
std::array<u32, 16> uniform_cache_shots{};
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;

View file

@ -337,6 +337,11 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue);
}
const u32 ubo_align = static_cast<u32>(
device.GetUniformBufferAlignment() //check if the device has it
);
// add the ability to change the size in settings in future
uniform_ring.Init(device, memory_allocator, 8 * 1024 * 1024 /* 8 MiB */, ubo_align ? ubo_align : 256);
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
scheduler_, staging_pool_);
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
@ -355,6 +360,42 @@ void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
staging_pool.FreeDeferred(ref);
}
void BufferCacheRuntime::UniformRing::Init(const Device& device,
MemoryAllocator& alloc,
u64 bytes, u32 alignment) {
for (size_t i = 0; i < NUM_FRAMES; ++i) {
VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = bytes,
.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
buffers[i] = alloc.CreateBuffer(ci, MemoryUsage::Upload);
mapped[i] = buffers[i].Mapped().data();
}
size = bytes;
align = alignment ? alignment : 256;
head = 0;
current_frame = 0;
}
std::span<u8> BufferCacheRuntime::UniformRing::Alloc(u32 bytes, u32& out_offset) {
const u64 aligned = Common::AlignUp(head, static_cast<u64>(align));
u64 end = aligned + bytes;
if (end > size) {
return {}; // Fallback to staging pool
}
out_offset = static_cast<u32>(aligned);
head = end;
return {mapped[current_frame] + out_offset, bytes};
}
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
@ -375,6 +416,7 @@ void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noe
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
it->ResetUsageTracking();
}
uniform_ring.BeginFrame();
}
void BufferCacheRuntime::Finish() {

View file

@ -124,8 +124,15 @@ public:
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
[[maybe_unused]] u32 binding_index, u32 size) {
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
[[maybe_unused]] u32 /*binding_index*/,
u32 size) {
u32 offset = 0;
if (auto span = uniform_ring.Alloc(size, offset); !span.empty()) {
BindBuffer(*uniform_ring.buffers[uniform_ring.current_frame], offset, size);
return span;
}
// Fallback for giant requests
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
return ref.mapped_span;
@ -153,6 +160,24 @@ private:
void ReserveNullBuffer();
vk::Buffer CreateNullBuffer();
struct UniformRing {
static constexpr size_t NUM_FRAMES = 3;
std::array<vk::Buffer, NUM_FRAMES> buffers{};
std::array<u8*, NUM_FRAMES> mapped{};
u64 size = 0;
u64 head = 0;
u32 align = 256;
size_t current_frame = 0;
void Init(const Device& device, MemoryAllocator& alloc, u64 bytes, u32 alignment);
void BeginFrame() {
current_frame = (current_frame + 1) % NUM_FRAMES;
head = 0;
}
std::span<u8> Alloc(u32 bytes, u32& out_offset);
};
UniformRing uniform_ring;
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;

View file

@ -25,48 +25,35 @@ namespace {
using namespace Common::Literals;
// Minimum alignment we want to enforce for the streaming ring
constexpr VkDeviceSize MIN_STREAM_ALIGNMENT = 256;
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Stream buffer size in bytes
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
size_t GetStreamBufferSize(const Device& device) {
VkDeviceSize size{0};
if (device.HasDebuggingToolAttached()) {
bool found_heap = false;
ForEachDeviceLocalHostVisibleHeap(device, [&size, &found_heap](size_t /*index*/, VkMemoryHeap& heap) {
ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) {
size = (std::max)(size, heap.size);
found_heap = true;
});
// If no suitable heap was found fall back to the default cap to avoid creating a zero-sized stream buffer.
if (!found_heap) {
size = MAX_STREAM_BUFFER_SIZE;
} else if (size <= 256_MiB) {
// If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
// loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
// as the heap will be much larger.
// If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
// loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
// as the heap will be much larger.
if (size <= 256_MiB) {
size = size * 40 / 100;
}
} else {
size = MAX_STREAM_BUFFER_SIZE;
}
// Clamp to the configured maximum, align up for safety, and ensure a sane minimum so
// region_size (stream_buffer_size / NUM_SYNCS) never becomes zero.
const VkDeviceSize aligned =
(std::min)(Common::AlignUp(size, alignment), MAX_STREAM_BUFFER_SIZE);
const VkDeviceSize min_size = alignment * StagingBufferPool::NUM_SYNCS;
return static_cast<size_t>((std::max)(aligned, min_size));
return (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
stream_alignment{std::max<VkDeviceSize>(device_.GetUniformBufferAlignment(),
MIN_STREAM_ALIGNMENT)},
stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
StagingBufferPool::NUM_SYNCS} {
VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@ -119,54 +106,31 @@ void StagingBufferPool::TickFrame() {
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
const size_t alignment = static_cast<size_t>(stream_alignment);
const size_t aligned_size = Common::AlignUp(size, alignment);
const bool wraps = iterator + size >= stream_buffer_size;
const size_t new_iterator =
wraps ? aligned_size : Common::AlignUp(iterator + size, alignment);
const size_t begin_region = wraps ? 0 : Region(iterator);
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
const size_t guard_begin = (std::min)(Region(free_iterator) + 1, NUM_SYNCS);
if (!wraps) {
if (guard_begin < end_region && AreRegionsActive(guard_begin, end_region)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
} else if (guard_begin < NUM_SYNCS && AreRegionsActive(guard_begin, NUM_SYNCS)) {
if (AreRegionsActive(Region(free_iterator) + 1,
(std::min)(Region(iterator + size) + 1, NUM_SYNCS))) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
const u64 current_tick = scheduler.CurrentTick();
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
current_tick);
used_iterator = iterator;
free_iterator = (std::max)(free_iterator, iterator + size);
if (wraps) {
if (iterator + size >= stream_buffer_size) {
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
current_tick);
used_iterator = 0;
iterator = 0;
free_iterator = aligned_size;
const size_t head_last_byte = aligned_size == 0 ? 0 : aligned_size - 1;
const size_t head_end_region = (std::min)(Region(head_last_byte) + 1, NUM_SYNCS);
if (AreRegionsActive(0, head_end_region)) {
free_iterator = size;
if (AreRegionsActive(0, Region(size) + 1)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
}
std::fill(sync_ticks.begin() + begin_region, sync_ticks.begin() + end_region, current_tick);
const size_t offset = wraps ? 0 : iterator;
iterator = new_iterator;
if (!wraps) {
free_iterator = (std::max)(free_iterator, offset + aligned_size);
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@ -105,7 +102,6 @@ private:
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
VkDeviceSize stream_alignment;
vk::Buffer stream_buffer;
std::span<u8> stream_pointer;
VkDeviceSize stream_buffer_size;

View file

@ -83,8 +83,7 @@ void ConfigureDebug::SetConfiguration() {
#ifdef YUZU_USE_QT_WEB_ENGINE
ui->disable_web_applet->setChecked(UISettings::values.disable_web_applet.GetValue());
#else
ui->disable_web_applet->setEnabled(false);
ui->disable_web_applet->setText(tr("Web applet not compiled"));
ui->disable_web_applet->setVisible(false);
#endif
}