[gpu/NVDRV] Finalize, improve AllocObjCtx (#333)

Improves object allocation per channel, only allowing max amount of 6 objects contexts per channel.
Previously objects were stored in a heap allocated vector which is sub-optimal for performance reasons.
The new implementation instead uses a stack based array with a O(1) approach.
This should boost performance in games which heavily rely on object context creation.

Co-authored-by: MaranBr <maranbr@outlook.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/333
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Co-authored-by: SDK-Chan <sdkchan@eden-emu.dev>
Co-committed-by: SDK-Chan <sdkchan@eden-emu.dev>
This commit is contained in:
SDK-Chan 2025-08-31 07:32:54 +02:00 committed by crueter
parent 4b5a8e0621
commit 8dba6a2cb4
Signed by: crueter
GPG key ID: 425ACD2D4830EBC6
2 changed files with 46 additions and 16 deletions

View file

@ -219,28 +219,55 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(IoctlAllocGpfifoEx& params, DeviceFD fd) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::AllocateObjectContext(IoctlAllocObjCtx& params) { s32_le nvhost_gpu::GetObjectContextClassNumberIndex(CtxClasses class_number) {
LOG_DEBUG(Service_NVDRV, "called, class_num={:X}, flags={:X}, obj_id={:X}", params.class_num, constexpr s32_le invalid_class_number_index = -1;
params.flags, params.obj_id); switch (class_number) {
case CtxClasses::Ctx2D: return 0;
case CtxClasses::Ctx3D: return 1;
case CtxClasses::CtxCompute: return 2;
case CtxClasses::CtxKepler: return 3;
case CtxClasses::CtxDMA: return 4;
case CtxClasses::CtxChannelGPFIFO: return 5;
default: return invalid_class_number_index;
}
}
if (!channel_state->initialized) { NvResult nvhost_gpu::AllocateObjectContext(IoctlAllocObjCtx& params) {
LOG_DEBUG(Service_NVDRV, "called, class_num={:#X}, flags={:#X}, obj_id={:#X}", params.class_num,
params.flags, params.obj_id);
if (!channel_state || !channel_state->initialized) {
LOG_CRITICAL(Service_NVDRV, "No address space bound to allocate a object context!"); LOG_CRITICAL(Service_NVDRV, "No address space bound to allocate a object context!");
return NvResult::NotInitialized; return NvResult::NotInitialized;
} }
switch (static_cast<CtxClasses>(params.class_num)) { std::scoped_lock lk(channel_mutex);
case CtxClasses::Ctx2D:
case CtxClasses::Ctx3D: if (params.flags) {
case CtxClasses::CtxCompute: LOG_WARNING(Service_NVDRV, "non-zero flags={:#X} for class={:#X}", params.flags,
case CtxClasses::CtxKepler: params.class_num);
case CtxClasses::CtxDMA:
case CtxClasses::CtxChannelGPFIFO: constexpr u32 allowed_mask{};
ctxObj_params.push_back(params); params.flags = allowed_mask;
return NvResult::Success; }
default:
LOG_ERROR(Service_NVDRV, "Invalid class number for object context: {:X}", params.class_num); s32_le ctx_class_number_index =
GetObjectContextClassNumberIndex(static_cast<CtxClasses>(params.class_num));
if (ctx_class_number_index < 0) {
LOG_ERROR(Service_NVDRV, "Invalid class number for object context: {:#X}",
params.class_num);
return NvResult::BadParameter; return NvResult::BadParameter;
} }
if (ctxObjs[ctx_class_number_index].has_value()) {
LOG_ERROR(Service_NVDRV, "Object context for class {:#X} already allocated on this channel",
params.class_num);
return NvResult::AlreadyAllocated;
}
ctxObjs[ctx_class_number_index] = params;
return NvResult::Success;
} }
static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList( static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList(

View file

@ -172,7 +172,7 @@ private:
s32_le nvmap_fd{}; s32_le nvmap_fd{};
u64_le user_data{}; u64_le user_data{};
IoctlZCullBind zcull_params{}; IoctlZCullBind zcull_params{};
std::vector<IoctlAllocObjCtx> ctxObj_params{}; std::array<std::optional<IoctlAllocObjCtx>, 6> ctxObjs{};
u32_le channel_priority{}; u32_le channel_priority{};
u32_le channel_timeslice{}; u32_le channel_timeslice{};
@ -184,9 +184,12 @@ private:
NvResult SetChannelPriority(IoctlChannelSetPriority& params); NvResult SetChannelPriority(IoctlChannelSetPriority& params);
NvResult AllocGPFIFOEx(IoctlAllocGpfifoEx& params, DeviceFD fd); NvResult AllocGPFIFOEx(IoctlAllocGpfifoEx& params, DeviceFD fd);
NvResult AllocGPFIFOEx2(IoctlAllocGpfifoEx& params, DeviceFD fd); NvResult AllocGPFIFOEx2(IoctlAllocGpfifoEx& params, DeviceFD fd);
s32_le GetObjectContextClassNumberIndex(CtxClasses class_number);
NvResult AllocateObjectContext(IoctlAllocObjCtx& params); NvResult AllocateObjectContext(IoctlAllocObjCtx& params);
NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries); NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries);
NvResult SubmitGPFIFOBase1(IoctlSubmitGpfifo& params, NvResult SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
std::span<Tegra::CommandListHeader> commands, bool kickoff = false); std::span<Tegra::CommandListHeader> commands, bool kickoff = false);
NvResult SubmitGPFIFOBase2(IoctlSubmitGpfifo& params, NvResult SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,