From 491e2cbfd7f8a1667a28e469ac43b732329eaa04 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 18:51:32 -0300 Subject: [PATCH 1/7] decode/register_set_predicate: Use move for shared pointers Avoid atomic counters used by shared pointers. --- .../shader/decode/register_set_predicate.cpp | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 8d54cce34e..a6733255d2 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -10,6 +12,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -23,7 +26,7 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - const Node apply_mask = [&] { + Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: case OpCode::Id::P2R_IMM: @@ -34,25 +37,23 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { } }(); - const auto offset = static_cast(instr.p2r_r2p.byte) * 8; + const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { - const Node mask = GetRegister(instr.gpr8); + Node mask = GetRegister(instr.gpr8); for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const auto shift = static_cast(pred); + const u32 shift = static_cast(pred); - const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); - const Node condition = - Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); + Node apply = BitfieldExtract(apply_mask, shift, 1); + Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - const Node value_compare = BitfieldExtract(mask, offset + shift, 1); - const Node value = - Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + Node compare = BitfieldExtract(mask, offset + shift, 1); + Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); - bb.push_back(Conditional(condition, {code})); + Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), move(value)); + bb.push_back(Conditional(condition, {move(code)})); } break; } @@ -61,12 +62,12 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit)); + value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } - value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8); + value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); + value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } default: From 26dc95c7bcae70e489c26ef9a357812edf1c902e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 19:05:34 -0300 Subject: [PATCH 2/7] decode/register_set_predicate: Implement CC P2R CC takes the state of condition codes and puts them into a register. We already have this implemented for PR (predicates). This commit implements CC over that. --- .../shader/decode/register_set_predicate.cpp | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index a6733255d2..6116c31aa4 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -17,15 +17,14 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; namespace { -constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7; -} +constexpr u64 NUM_CONDITION_CODES = 4; +constexpr u64 NUM_PREDICATES = 7; +} // namespace u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - Node apply_mask = [this, opcode, instr] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: @@ -39,12 +38,18 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; + const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; + const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; + const auto get_entry = [this, cc](u64 entry) { + return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); + }; + switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: { Node mask = GetRegister(instr.gpr8); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - const u32 shift = static_cast(pred); + for (u64 entry = 0; entry < num_entries; ++entry) { + const u32 shift = static_cast(entry); Node apply = BitfieldExtract(apply_mask, shift, 1); Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); @@ -52,15 +57,15 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { Node compare = BitfieldExtract(mask, offset + shift, 1); Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), move(value)); + Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); bb.push_back(Conditional(condition, {move(code)})); } break; } case OpCode::Id::P2R_IMM: { Node value = Immediate(0); - for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { - Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), + for (u64 entry = 0; entry < num_entries; ++entry) { + Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), Immediate(0)); value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); } From 7e8f51273c3cac70337eb18fbd875eb060456a6f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 21:56:11 -0300 Subject: [PATCH 3/7] shader/arithmetic_integer: Implement CC for IADD --- externals/sirit | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 10 +++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 11 ++++++++++ .../shader/decode/arithmetic_integer.cpp | 22 ++++++++++++++++--- src/video_core/shader/node.h | 2 ++ 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/externals/sirit b/externals/sirit index a712959f1e..414fc4dbd2 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit a712959f1e373a33b48042b5934e288a243d5954 +Subproject commit 414fc4dbd28d8fe48f735a0c389db8a234f733c0 diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0cd3ad7e13..3803a6f3ae 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1870,6 +1870,14 @@ private: return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); } + Expression LogicalAddCarry(Operation operation) { + const std::string carry = code.GenerateTemporary(); + code.AddLine("uint {};", carry); + code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), + VisitOperand(operation, 1).AsUint(), carry); + return {fmt::format("({} != 0)", carry), Type::Bool}; + } + Expression LogicalFIsNan(Operation operation) { return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); } @@ -2441,6 +2449,8 @@ private: &GLSLDecompiler::LogicalNotEqual, &GLSLDecompiler::LogicalGreaterEqual, + &GLSLDecompiler::LogicalAddCarry, + &GLSLDecompiler::Logical2HLessThan, &GLSLDecompiler::Logical2HEqual, &GLSLDecompiler::Logical2HLessEqual, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index aaa138f527..20b6ca0ad7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1584,6 +1584,15 @@ private: return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; } + Expression LogicalAddCarry(Operation operation) { + const Id op_a = AsUint(Visit(operation[0])); + const Id op_b = AsUint(Visit(operation[1])); + + const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); + const Id carry = OpCompositeExtract(t_uint, result, 1); + return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; + } + Expression LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -2518,6 +2527,8 @@ private: &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, + &SPIRVDecompiler::LogicalAddCarry, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 9af8c606d6..99b4b6342f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -40,10 +40,26 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::IAdd, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + if (instr.generates_cc) { + const Node i0 = Immediate(0); + + Node zero = Operation(OperationCode::LogicalIEqual, value, i0); + Node sign = Operation(OperationCode::LogicalILessThan, value, i0); + Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); + + Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); + Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); + Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); + Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); + + SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); + SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); + SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); + SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); + } + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::IADD3_C: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3eee961f53..3f5a7bc7af 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -132,6 +132,8 @@ enum class OperationCode { LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool + LogicalAddCarry, /// (uint a, uint b) -> bool + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 From 6404cd824bd5db9519cec6fb07f2e5f67867946d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:41:20 -0300 Subject: [PATCH 4/7] shader/arithmetic_integer: Implement IADD.X IADD.X takes the carry flag and adds it to the result. This is generally used to emulate 64-bit operations with 32-bit registers. --- src/video_core/engines/shader_bytecode.h | 4 ++++ src/video_core/shader/decode/arithmetic_integer.cpp | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cde3a26b9d..8dae754d49 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -813,6 +813,10 @@ union Instruction { BitField<49, 1, u64> negate_a; } alu_integer; + union { + BitField<43, 1, u64> x; + } iadd; + union { BitField<39, 1, u64> ftz; BitField<32, 1, u64> saturate; diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 99b4b6342f..2a3311cb8e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -42,6 +42,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { Node value = Operation(OperationCode::IAdd, op_a, op_b); + if (instr.iadd.x) { + Node carry = GetInternalFlag(InternalFlag::Carry); + Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); + value = Operation(OperationCode::IAdd, std::move(value), std::move(x)); + } + if (instr.generates_cc) { const Node i0 = Immediate(0); From 45cb8fc72ac4fe349056371e7d6b80b679ace123 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:42:33 -0300 Subject: [PATCH 5/7] shader/arithmetic_integer: Change IAdd to UAdd to avoid signed overflow Signed integer addition overflow might be undefined behavior. It's free to change operations to UAdd and use unsigned integers to avoid potential bugs. --- src/video_core/shader/decode/arithmetic_integer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 2a3311cb8e..addd7f5333 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -40,12 +40,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - Node value = Operation(OperationCode::IAdd, op_a, op_b); + Node value = Operation(OperationCode::UAdd, op_a, op_b); if (instr.iadd.x) { Node carry = GetInternalFlag(InternalFlag::Carry); Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::IAdd, std::move(value), std::move(x)); + value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); } if (instr.generates_cc) { From 7442f21c5b31668c3d6ce122cbbfca13914ac78c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 25 Apr 2020 22:53:54 -0300 Subject: [PATCH 6/7] shader/arithmetic_integer: Fix edge case and mark IADD.X Rd.CC as unimplemented IADD.X Rd.CC requires some extra logic that is not currently implemented. Abort when this is hit. --- src/video_core/shader/decode/arithmetic_integer.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index addd7f5333..ced5c3dc12 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -35,7 +35,8 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); + UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); + UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); @@ -49,6 +50,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } if (instr.generates_cc) { + // Avoid changing result's carry flag + SetTemporary(bb, 0, std::move(value)); + value = GetTemporary(0); + const Node i0 = Immediate(0); Node zero = Operation(OperationCode::LogicalIEqual, value, i0); From 8d28a56a6cbf88f65dba24d332ad605935894913 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 28 Apr 2020 17:14:53 -0300 Subject: [PATCH 7/7] shader/arithmetic_integer: Fix tracking issue in temporary This temporary is not needed as we mark Rd.CC + IADD.X as unimplemented. It caused issues when tracking global buffers. --- src/video_core/shader/decode/arithmetic_integer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index ced5c3dc12..a041519b74 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -50,10 +50,6 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { } if (instr.generates_cc) { - // Avoid changing result's carry flag - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - const Node i0 = Immediate(0); Node zero = Operation(OperationCode::LogicalIEqual, value, i0);