From a13b47f080bdfdb08ccf00c806e21be2a80654fc Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 18 Jul 2019 08:17:19 -0400
Subject: [PATCH 1/6] Shader_Ir: Downgrade precision and rounding asserts to
 debug asserts.

This commit reduces the sevirity of asserts for FP precision and
rounding as this are well known and have little to no consequences in
gpu's accuracy.
---
 src/video_core/shader/decode/arithmetic.cpp               | 8 ++++----
 .../shader/decode/arithmetic_half_immediate.cpp           | 2 +-
 src/video_core/shader/decode/ffma.cpp                     | 4 ++--
 src/video_core/shader/decode/half_set_predicate.cpp       | 2 +-
 src/video_core/shader/decode/hfma2.cpp                    | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 87d8fecaa3..05a5f19d28 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,10 +42,10 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::FMUL_R:
     case OpCode::Id::FMUL_IMM: {
         // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
-        UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
-                             instr.fmul.tab5cb8_2.Value());
-        UNIMPLEMENTED_IF_MSG(
-            instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
+        DEBUG_ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented",
+                         instr.fmul.tab5cb8_2.Value());
+        DEBUG_ASSERT_MSG(
+            instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented",
             instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
 
         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7bcf38f233..60381b4827 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
         }
     } else {
-        UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
+        DEBUG_ASSERT(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None);
     }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 29be25ca32..a39283a9c2 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,9 +18,9 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
-    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
+    DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented",
                          instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
-    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
+    DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
                          instr.ffma.tab5980_1.Value());
 
     const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8a..4587dbd00e 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
+    DEBUG_ASSERT(instr.hsetp2.ftz == 0);
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
     op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index c3bcf1ae9d..5b44cb79cb 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
-        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
+        DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
     } else {
-        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
+        DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
     }
 
     constexpr auto identity = HalfType::H0_H1;

From 57a372439dfc0ecbef2e9004fb30ba82dc428431 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 18 Jul 2019 08:20:31 -0400
Subject: [PATCH 2/6] Gl_Texture_Cache: Remove assert on component type in
 GetFormatTuple

Textures can have different components types in different orders. This
assert was completely inprecise and the effectiveness of such is better
handled by case and within the texture cache.
---
 src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b1f6bc7c20..6ecb02c45f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -137,7 +137,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
 const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
     ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
     const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
-    ASSERT(component_type == format.component_type);
     return format;
 }
 

From 62486dfb4ff5820894f1c9cb9d11c3dda85db464 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 18 Jul 2019 08:31:38 -0400
Subject: [PATCH 3/6] MaxwellDMA/KeplerCopy: Downgrade DMA log message to
 Trace.

This log was just to know which games used DMA. It's no longer
important.
---
 src/video_core/engines/maxwell_dma.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0a..758c154cb8 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -38,7 +38,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
 }
 
 void MaxwellDMA::HandleCopy() {
-    LOG_WARNING(HW_GPU, "Requested a DMA copy");
+    LOG_TRACE(HW_GPU, "Requested a DMA copy");
 
     const GPUVAddr source = regs.src_address.Address();
     const GPUVAddr dest = regs.dst_address.Address();

From d6e1af6f230f079649d3bfa153805a23814f7878 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 18 Jul 2019 08:54:42 -0400
Subject: [PATCH 4/6] GPU: Add missing puller methods.

This adds some missing puller methods. We don't assert them as these are
nop operations for us.
---
 src/video_core/gpu.cpp | 20 +++++++-------------
 src/video_core/gpu.h   |  9 ++++++++-
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 1b4975498c..6cb5fd4e1c 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -143,12 +143,12 @@ enum class BufferMethods {
     NotifyIntr = 0x8,
     WrcacheFlush = 0x9,
     Unk28 = 0xA,
-    Unk2c = 0xB,
+    UnkCacheFlush = 0xB,
     RefCnt = 0x14,
     SemaphoreAcquire = 0x1A,
     SemaphoreRelease = 0x1B,
-    Unk70 = 0x1C,
-    Unk74 = 0x1D,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
     Unk78 = 0x1E,
     Unk7c = 0x1F,
     Yield = 0x20,
@@ -194,6 +194,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::SemaphoreAddressLow:
     case BufferMethods::SemaphoreSequence:
     case BufferMethods::RefCnt:
+    case BufferMethods::UnkCacheFlush:
+    case BufferMethods::WrcacheFlush:
+    case BufferMethods::FenceValue:
+    case BufferMethods::FenceAction:
         break;
     case BufferMethods::SemaphoreTrigger: {
         ProcessSemaphoreTriggerMethod();
@@ -204,21 +208,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
         LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
         break;
     }
-    case BufferMethods::WrcacheFlush: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
-        break;
-    }
     case BufferMethods::Unk28: {
         // TODO(Kmather73): Research and implement this method.
         LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
         break;
     }
-    case BufferMethods::Unk2c: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
-        break;
-    }
     case BufferMethods::SemaphoreAcquire: {
         ProcessSemaphoreAcquire();
         break;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fe66289230..5a8b1c74a2 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -194,7 +194,12 @@ public:
 
                 u32 semaphore_acquire;
                 u32 semaphore_release;
-                INSERT_PADDING_WORDS(0xE4);
+                u32 fence_value;
+                union {
+                    BitField<4, 4, u32> operation;
+                    BitField<8, 8, u32> id;
+                } fence_action;
+                INSERT_PADDING_WORDS(0xE2);
 
                 // Puller state
                 u32 acquire_mode;
@@ -274,6 +279,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
 ASSERT_REG_POSITION(reference_count, 0x14);
 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
 ASSERT_REG_POSITION(semaphore_release, 0x1B);
+ASSERT_REG_POSITION(fence_value, 0x1C);
+ASSERT_REG_POSITION(fence_action, 0x1D);
 
 ASSERT_REG_POSITION(acquire_mode, 0x100);
 ASSERT_REG_POSITION(acquire_source, 0x101);

From 3e0f5631c3455059a1317c056510c842983b71bb Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 18 Jul 2019 10:09:26 -0400
Subject: [PATCH 5/6] Shader_Ir: correct clang format

---
 src/video_core/shader/decode/ffma.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index a39283a9c2..cb3a9cfc15 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,9 +19,9 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
 
     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
     DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented",
-                         instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
+                     instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
     DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
-                         instr.ffma.tab5980_1.Value());
+                     instr.ffma.tab5980_1.Value());
 
     const Node op_a = GetRegister(instr.gpr8);
 

From 9a4a346b3f986c069669c44901ed0829d9e5137e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Jul 2019 22:15:34 -0400
Subject: [PATCH 6/6] Shader_Ir: Change Debug Asserts for Log Warnings

---
 src/video_core/shader/decode/arithmetic.cpp         | 13 ++++++++-----
 .../shader/decode/arithmetic_half_immediate.cpp     |  4 +++-
 src/video_core/shader/decode/ffma.cpp               | 10 ++++++----
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 05a5f19d28..1473c282a0 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::FMUL_R:
     case OpCode::Id::FMUL_IMM: {
         // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
-        DEBUG_ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented",
-                         instr.fmul.tab5cb8_2.Value());
-        DEBUG_ASSERT_MSG(
-            instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented",
-            instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
+        if (instr.fmul.tab5cb8_2 != 0) {
+            LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+                        instr.fmul.tab5cb8_2.Value());
+        }
+        if (instr.fmul.tab5c68_0 != 1) {
+            LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+                        instr.fmul.tab5c68_0.Value());
+        }
 
         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
 
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 60381b4827..6466fc0117 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
         }
     } else {
-        DEBUG_ASSERT(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None);
+        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
+            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        }
     }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index cb3a9cfc15..ca2f39e8db 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
-    DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented",
-                     instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
-    DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
-                     instr.ffma.tab5980_1.Value());
+    if (instr.ffma.tab5980_0 != 1) {
+        LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+    }
+    if (instr.ffma.tab5980_1 != 0) {
+        LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+    }
 
     const Node op_a = GetRegister(instr.gpr8);