From a13b47f080bdfdb08ccf00c806e21be2a80654fc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:17:19 -0400 Subject: [PATCH 1/6] Shader_Ir: Downgrade precision and rounding asserts to debug asserts. This commit reduces the sevirity of asserts for FP precision and rounding as this are well known and have little to no consequences in gpu's accuracy. --- src/video_core/shader/decode/arithmetic.cpp | 8 ++++---- .../shader/decode/arithmetic_half_immediate.cpp | 2 +- src/video_core/shader/decode/ffma.cpp | 4 ++-- src/video_core/shader/decode/half_set_predicate.cpp | 2 +- src/video_core/shader/decode/hfma2.cpp | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa3..05a5f19d28 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,10 +42,10 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - UNIMPLEMENTED_IF_MSG( - instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", + DEBUG_ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + DEBUG_ASSERT_MSG( + instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f233..60381b4827 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); + DEBUG_ASSERT(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None); } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca32..a39283a9c2 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,9 +18,9 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", + DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", + DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8a..4587dbd00e 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); + DEBUG_ASSERT(instr.hsetp2.ftz == 0); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9d..5b44cb79cb 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); } else { - UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); } constexpr auto identity = HalfType::H0_H1; From 57a372439dfc0ecbef2e9004fb30ba82dc428431 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:20:31 -0400 Subject: [PATCH 2/6] Gl_Texture_Cache: Remove assert on component type in GetFormatTuple Textures can have different components types in different orders. This assert was completely inprecise and the effectiveness of such is better handled by case and within the texture cache. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b1f6bc7c20..6ecb02c45f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -137,7 +137,6 @@ constexpr std::array tex_format const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); const auto& format{tex_format_tuples[static_cast(pixel_format)]}; - ASSERT(component_type == format.component_type); return format; } From 62486dfb4ff5820894f1c9cb9d11c3dda85db464 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:31:38 -0400 Subject: [PATCH 3/6] MaxwellDMA/KeplerCopy: Downgrade DMA log message to Trace. This log was just to know which games used DMA. It's no longer important. --- src/video_core/engines/maxwell_dma.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0a..758c154cb8 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -38,7 +38,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { } void MaxwellDMA::HandleCopy() { - LOG_WARNING(HW_GPU, "Requested a DMA copy"); + LOG_TRACE(HW_GPU, "Requested a DMA copy"); const GPUVAddr source = regs.src_address.Address(); const GPUVAddr dest = regs.dst_address.Address(); From d6e1af6f230f079649d3bfa153805a23814f7878 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:54:42 -0400 Subject: [PATCH 4/6] GPU: Add missing puller methods. This adds some missing puller methods. We don't assert them as these are nop operations for us. --- src/video_core/gpu.cpp | 20 +++++++------------- src/video_core/gpu.h | 9 ++++++++- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1b4975498c..6cb5fd4e1c 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -143,12 +143,12 @@ enum class BufferMethods { NotifyIntr = 0x8, WrcacheFlush = 0x9, Unk28 = 0xA, - Unk2c = 0xB, + UnkCacheFlush = 0xB, RefCnt = 0x14, SemaphoreAcquire = 0x1A, SemaphoreRelease = 0x1B, - Unk70 = 0x1C, - Unk74 = 0x1D, + FenceValue = 0x1C, + FenceAction = 0x1D, Unk78 = 0x1E, Unk7c = 0x1F, Yield = 0x20, @@ -194,6 +194,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::SemaphoreAddressLow: case BufferMethods::SemaphoreSequence: case BufferMethods::RefCnt: + case BufferMethods::UnkCacheFlush: + case BufferMethods::WrcacheFlush: + case BufferMethods::FenceValue: + case BufferMethods::FenceAction: break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -204,21 +208,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); break; } - case BufferMethods::WrcacheFlush: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); - break; - } case BufferMethods::Unk28: { // TODO(Kmather73): Research and implement this method. LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); break; } - case BufferMethods::Unk2c: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); - break; - } case BufferMethods::SemaphoreAcquire: { ProcessSemaphoreAcquire(); break; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe66289230..5a8b1c74a2 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -194,7 +194,12 @@ public: u32 semaphore_acquire; u32 semaphore_release; - INSERT_PADDING_WORDS(0xE4); + u32 fence_value; + union { + BitField<4, 4, u32> operation; + BitField<8, 8, u32> id; + } fence_action; + INSERT_PADDING_WORDS(0xE2); // Puller state u32 acquire_mode; @@ -274,6 +279,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7); ASSERT_REG_POSITION(reference_count, 0x14); ASSERT_REG_POSITION(semaphore_acquire, 0x1A); ASSERT_REG_POSITION(semaphore_release, 0x1B); +ASSERT_REG_POSITION(fence_value, 0x1C); +ASSERT_REG_POSITION(fence_action, 0x1D); ASSERT_REG_POSITION(acquire_mode, 0x100); ASSERT_REG_POSITION(acquire_source, 0x101); From 3e0f5631c3455059a1317c056510c842983b71bb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 10:09:26 -0400 Subject: [PATCH 5/6] Shader_Ir: correct clang format --- src/video_core/shader/decode/ffma.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a39283a9c2..cb3a9cfc15 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -19,9 +19,9 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO + instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + instr.ffma.tab5980_1.Value()); const Node op_a = GetRegister(instr.gpr8); From 9a4a346b3f986c069669c44901ed0829d9e5137e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Jul 2019 22:15:34 -0400 Subject: [PATCH 6/6] Shader_Ir: Change Debug Asserts for Log Warnings --- src/video_core/shader/decode/arithmetic.cpp | 13 ++++++++----- .../shader/decode/arithmetic_half_immediate.cpp | 4 +++- src/video_core/shader/decode/ffma.cpp | 10 ++++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 05a5f19d28..1473c282a0 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - DEBUG_ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - DEBUG_ASSERT_MSG( - instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default + if (instr.fmul.tab5cb8_2 != 0) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + } + if (instr.fmul.tab5c68_0 != 1) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); + } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 60381b4827..6466fc0117 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - DEBUG_ASSERT(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index cb3a9cfc15..ca2f39e8db 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + if (instr.ffma.tab5980_0 != 1) { + LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + } + if (instr.ffma.tab5980_1 != 0) { + LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + } const Node op_a = GetRegister(instr.gpr8);