From 8dba6a2cb46baf14c8c8f9c44a24f94fa26ecfe0 Mon Sep 17 00:00:00 2001 From: SDK-Chan Date: Sun, 31 Aug 2025 07:32:54 +0200 Subject: [PATCH] [gpu/NVDRV] Finalize, improve AllocObjCtx (#333) Improves object allocation per channel, only allowing max amount of 6 objects contexts per channel. Previously objects were stored in a heap allocated vector which is sub-optimal for performance reasons. The new implementation instead uses a stack based array with a O(1) approach. This should boost performance in games which heavily rely on object context creation. Co-authored-by: MaranBr Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/333 Reviewed-by: crueter Reviewed-by: CamilleLaVey Co-authored-by: SDK-Chan Co-committed-by: SDK-Chan --- .../hle/service/nvdrv/devices/nvhost_gpu.cpp | 57 ++++++++++++++----- .../hle/service/nvdrv/devices/nvhost_gpu.h | 5 +- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 95bf18dbf7..5f754650d9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -219,28 +219,55 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(IoctlAllocGpfifoEx& params, DeviceFD fd) { return NvResult::Success; } -NvResult nvhost_gpu::AllocateObjectContext(IoctlAllocObjCtx& params) { - LOG_DEBUG(Service_NVDRV, "called, class_num={:X}, flags={:X}, obj_id={:X}", params.class_num, - params.flags, params.obj_id); +s32_le nvhost_gpu::GetObjectContextClassNumberIndex(CtxClasses class_number) { + constexpr s32_le invalid_class_number_index = -1; + switch (class_number) { + case CtxClasses::Ctx2D: return 0; + case CtxClasses::Ctx3D: return 1; + case CtxClasses::CtxCompute: return 2; + case CtxClasses::CtxKepler: return 3; + case CtxClasses::CtxDMA: return 4; + case CtxClasses::CtxChannelGPFIFO: return 5; + default: return invalid_class_number_index; + } +} - if (!channel_state->initialized) { +NvResult nvhost_gpu::AllocateObjectContext(IoctlAllocObjCtx& params) { + LOG_DEBUG(Service_NVDRV, "called, class_num={:#X}, flags={:#X}, obj_id={:#X}", params.class_num, + params.flags, params.obj_id); + + if (!channel_state || !channel_state->initialized) { LOG_CRITICAL(Service_NVDRV, "No address space bound to allocate a object context!"); return NvResult::NotInitialized; } - switch (static_cast(params.class_num)) { - case CtxClasses::Ctx2D: - case CtxClasses::Ctx3D: - case CtxClasses::CtxCompute: - case CtxClasses::CtxKepler: - case CtxClasses::CtxDMA: - case CtxClasses::CtxChannelGPFIFO: - ctxObj_params.push_back(params); - return NvResult::Success; - default: - LOG_ERROR(Service_NVDRV, "Invalid class number for object context: {:X}", params.class_num); + std::scoped_lock lk(channel_mutex); + + if (params.flags) { + LOG_WARNING(Service_NVDRV, "non-zero flags={:#X} for class={:#X}", params.flags, + params.class_num); + + constexpr u32 allowed_mask{}; + params.flags = allowed_mask; + } + + s32_le ctx_class_number_index = + GetObjectContextClassNumberIndex(static_cast(params.class_num)); + if (ctx_class_number_index < 0) { + LOG_ERROR(Service_NVDRV, "Invalid class number for object context: {:#X}", + params.class_num); return NvResult::BadParameter; } + + if (ctxObjs[ctx_class_number_index].has_value()) { + LOG_ERROR(Service_NVDRV, "Object context for class {:#X} already allocated on this channel", + params.class_num); + return NvResult::AlreadyAllocated; + } + + ctxObjs[ctx_class_number_index] = params; + + return NvResult::Success; } static boost::container::small_vector BuildWaitCommandList( diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index a017cc50d0..fb0a5be959 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -172,7 +172,7 @@ private: s32_le nvmap_fd{}; u64_le user_data{}; IoctlZCullBind zcull_params{}; - std::vector ctxObj_params{}; + std::array, 6> ctxObjs{}; u32_le channel_priority{}; u32_le channel_timeslice{}; @@ -184,9 +184,12 @@ private: NvResult SetChannelPriority(IoctlChannelSetPriority& params); NvResult AllocGPFIFOEx(IoctlAllocGpfifoEx& params, DeviceFD fd); NvResult AllocGPFIFOEx2(IoctlAllocGpfifoEx& params, DeviceFD fd); + + s32_le GetObjectContextClassNumberIndex(CtxClasses class_number); NvResult AllocateObjectContext(IoctlAllocObjCtx& params); NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries); + NvResult SubmitGPFIFOBase1(IoctlSubmitGpfifo& params, std::span commands, bool kickoff = false); NvResult SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,