 a7109c767b
			
		
	
	
		a7109c767b
		
			
		
	
	
	
	
		
			
			* Rewrite shader decoding stage * Fix P2R constant buffer encoding * Fix PSET/PSETP * PR feedback * Log unimplemented shader instructions * Implement NOP * Remove using * PR feedback
		
			
				
	
	
		
			527 lines
		
	
	
		
			No EOL
		
	
	
		
			17 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			527 lines
		
	
	
		
			No EOL
		
	
	
		
			17 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| using Ryujinx.Graphics.Shader.Decoders;
 | |
| using Ryujinx.Graphics.Shader.IntermediateRepresentation;
 | |
| using Ryujinx.Graphics.Shader.Translation;
 | |
| 
 | |
| using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
 | |
| using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
 | |
| 
 | |
| namespace Ryujinx.Graphics.Shader.Instructions
 | |
| {
 | |
|     static partial class InstEmit
 | |
|     {
 | |
|         private enum MemoryRegion
 | |
|         {
 | |
|             Local,
 | |
|             Shared
 | |
|         }
 | |
| 
 | |
|         public static void Atom(EmitterContext context)
 | |
|         {
 | |
|             InstAtom op = context.GetOp<InstAtom>();
 | |
| 
 | |
|             int sOffset = (op.Imm20 << 12) >> 12;
 | |
| 
 | |
|             (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
 | |
| 
 | |
|             Operand value = GetSrcReg(context, op.SrcB);
 | |
| 
 | |
|             Operand res = EmitAtomicOp(context, Instruction.MrGlobal, op.Op, op.Size, addrLow, addrHigh, value);
 | |
| 
 | |
|             context.Copy(GetDest(op.Dest), res);
 | |
|         }
 | |
| 
 | |
|         public static void Atoms(EmitterContext context)
 | |
|         {
 | |
|             InstAtoms op = context.GetOp<InstAtoms>();
 | |
| 
 | |
|             Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
 | |
| 
 | |
|             int sOffset = (op.Imm22 << 10) >> 10;
 | |
| 
 | |
|             offset = context.IAdd(offset, Const(sOffset));
 | |
| 
 | |
|             Operand value = GetSrcReg(context, op.SrcB);
 | |
| 
 | |
|             AtomSize size = op.AtomsSize switch
 | |
|             {
 | |
|                 AtomsSize.S32 => AtomSize.S32,
 | |
|                 AtomsSize.U64 => AtomSize.U64,
 | |
|                 AtomsSize.S64 => AtomSize.S64,
 | |
|                 _ => AtomSize.U32
 | |
|             };
 | |
| 
 | |
|             Operand res = EmitAtomicOp(context, Instruction.MrShared, op.AtomOp, size, offset, Const(0), value);
 | |
| 
 | |
|             context.Copy(GetDest(op.Dest), res);
 | |
|         }
 | |
| 
 | |
|         public static void Ldc(EmitterContext context)
 | |
|         {
 | |
|             InstLdc op = context.GetOp<InstLdc>();
 | |
| 
 | |
|             if (op.LsSize > LsSize2.B64)
 | |
|             {
 | |
|                 context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             bool isSmallInt = op.LsSize < LsSize2.B32;
 | |
| 
 | |
|             int count = op.LsSize == LsSize2.B64 ? 2 : 1;
 | |
| 
 | |
|             Operand slot = Const(op.CbufSlot);
 | |
|             Operand srcA = GetSrcReg(context, op.SrcA);
 | |
| 
 | |
|             if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
 | |
|             {
 | |
|                 slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
 | |
|                 srcA = context.BitwiseAnd(srcA, Const(0xffff));
 | |
|             }
 | |
| 
 | |
|             Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
 | |
|             Operand wordOffset = context.ShiftRightU32(addr, Const(2));
 | |
|             Operand bitOffset = GetBitOffset(context, addr);
 | |
| 
 | |
|             for (int index = 0; index < count; index++)
 | |
|             {
 | |
|                 Register dest = new Register(op.Dest + index, RegisterType.Gpr);
 | |
| 
 | |
|                 if (dest.IsRZ)
 | |
|                 {
 | |
|                     break;
 | |
|                 }
 | |
| 
 | |
|                 Operand offset = context.IAdd(wordOffset, Const(index));
 | |
|                 Operand value = context.LoadConstant(slot, offset);
 | |
| 
 | |
|                 if (isSmallInt)
 | |
|                 {
 | |
|                     value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
 | |
|                 }
 | |
| 
 | |
|                 context.Copy(Register(dest), value);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public static void Ldg(EmitterContext context)
 | |
|         {
 | |
|             InstLdg op = context.GetOp<InstLdg>();
 | |
| 
 | |
|             EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
 | |
|         }
 | |
| 
 | |
|         public static void Ldl(EmitterContext context)
 | |
|         {
 | |
|             InstLdl op = context.GetOp<InstLdl>();
 | |
| 
 | |
|             EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
 | |
|         }
 | |
| 
 | |
|         public static void Lds(EmitterContext context)
 | |
|         {
 | |
|             InstLds op = context.GetOp<InstLds>();
 | |
| 
 | |
|             EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
 | |
|         }
 | |
| 
 | |
|         public static void Red(EmitterContext context)
 | |
|         {
 | |
|             InstRed op = context.GetOp<InstRed>();
 | |
| 
 | |
|             (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
 | |
| 
 | |
|             EmitAtomicOp(context, Instruction.MrGlobal, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
 | |
|         }
 | |
| 
 | |
|         public static void Stg(EmitterContext context)
 | |
|         {
 | |
|             InstStg op = context.GetOp<InstStg>();
 | |
| 
 | |
|             EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
 | |
|         }
 | |
| 
 | |
|         public static void Stl(EmitterContext context)
 | |
|         {
 | |
|             InstStl op = context.GetOp<InstStl>();
 | |
| 
 | |
|             EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
 | |
|         }
 | |
| 
 | |
|         public static void Sts(EmitterContext context)
 | |
|         {
 | |
|             InstSts op = context.GetOp<InstSts>();
 | |
| 
 | |
|             EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
 | |
|         }
 | |
| 
 | |
|         private static Operand EmitAtomicOp(
 | |
|             EmitterContext context,
 | |
|             Instruction mr,
 | |
|             AtomOp op,
 | |
|             AtomSize type,
 | |
|             Operand addrLow,
 | |
|             Operand addrHigh,
 | |
|             Operand value)
 | |
|         {
 | |
|             Operand res = Const(0);
 | |
| 
 | |
|             switch (op)
 | |
|             {
 | |
|                 case AtomOp.Add:
 | |
|                     if (type == AtomSize.S32 || type == AtomSize.U32)
 | |
|                     {
 | |
|                         res = context.AtomicAdd(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
 | |
|                     }
 | |
|                     break;
 | |
|                 case AtomOp.And:
 | |
|                     if (type == AtomSize.S32 || type == AtomSize.U32)
 | |
|                     {
 | |
|                         res = context.AtomicAnd(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
 | |
|                     }
 | |
|                     break;
 | |
|                 case AtomOp.Xor:
 | |
|                     if (type == AtomSize.S32 || type == AtomSize.U32)
 | |
|                     {
 | |
|                         res = context.AtomicXor(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
 | |
|                     }
 | |
|                     break;
 | |
|                 case AtomOp.Or:
 | |
|                     if (type == AtomSize.S32 || type == AtomSize.U32)
 | |
|                     {
 | |
|                         res = context.AtomicOr(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
 | |
|                     }
 | |
|                     break;
 | |
|                 case AtomOp.Max:
 | |
|                     if (type == AtomSize.S32)
 | |
|                     {
 | |
|                         res = context.AtomicMaxS32(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else if (type == AtomSize.U32)
 | |
|                     {
 | |
|                         res = context.AtomicMaxU32(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
 | |
|                     }
 | |
|                     break;
 | |
|                 case AtomOp.Min:
 | |
|                     if (type == AtomSize.S32)
 | |
|                     {
 | |
|                         res = context.AtomicMinS32(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else if (type == AtomSize.U32)
 | |
|                     {
 | |
|                         res = context.AtomicMinU32(mr, addrLow, addrHigh, value);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
 | |
|                     }
 | |
|                     break;
 | |
|             }
 | |
| 
 | |
|             return res;
 | |
|         }
 | |
| 
 | |
|         private static void EmitLoad(
 | |
|             EmitterContext context,
 | |
|             MemoryRegion region,
 | |
|             LsSize2 size,
 | |
|             Operand srcA,
 | |
|             int rd,
 | |
|             int offset)
 | |
|         {
 | |
|             if (size > LsSize2.B128)
 | |
|             {
 | |
|                 context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             bool isSmallInt = size < LsSize2.B32;
 | |
| 
 | |
|             int count = 1;
 | |
| 
 | |
|             switch (size)
 | |
|             {
 | |
|                 case LsSize2.B64: count = 2; break;
 | |
|                 case LsSize2.B128: count = 4; break;
 | |
|             }
 | |
| 
 | |
|             Operand baseOffset = context.IAdd(srcA, Const(offset));
 | |
|             Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
 | |
|             Operand bitOffset = GetBitOffset(context, baseOffset);
 | |
| 
 | |
|             for (int index = 0; index < count; index++)
 | |
|             {
 | |
|                 Register dest = new Register(rd + index, RegisterType.Gpr);
 | |
| 
 | |
|                 if (dest.IsRZ)
 | |
|                 {
 | |
|                     break;
 | |
|                 }
 | |
| 
 | |
|                 Operand elemOffset = context.IAdd(wordOffset, Const(index));
 | |
|                 Operand value = null;
 | |
| 
 | |
|                 switch (region)
 | |
|                 {
 | |
|                     case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
 | |
|                     case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
 | |
|                 }
 | |
| 
 | |
|                 if (isSmallInt)
 | |
|                 {
 | |
|                     value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
 | |
|                 }
 | |
| 
 | |
|                 context.Copy(Register(dest), value);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private static void EmitLdg(
 | |
|             EmitterContext context,
 | |
|             LsSize size,
 | |
|             int ra,
 | |
|             int rd,
 | |
|             int offset,
 | |
|             bool extended)
 | |
|         {
 | |
|             bool isSmallInt = size < LsSize.B32;
 | |
| 
 | |
|             int count = GetVectorCount(size);
 | |
| 
 | |
|             (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
 | |
| 
 | |
|             Operand bitOffset = GetBitOffset(context, addrLow);
 | |
| 
 | |
|             for (int index = 0; index < count; index++)
 | |
|             {
 | |
|                 Register dest = new Register(rd + index, RegisterType.Gpr);
 | |
| 
 | |
|                 if (dest.IsRZ)
 | |
|                 {
 | |
|                     break;
 | |
|                 }
 | |
| 
 | |
|                 Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
 | |
| 
 | |
|                 if (isSmallInt)
 | |
|                 {
 | |
|                     value = ExtractSmallInt(context, size, bitOffset, value);
 | |
|                 }
 | |
| 
 | |
|                 context.Copy(Register(dest), value);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private static void EmitStore(
 | |
|             EmitterContext context,
 | |
|             MemoryRegion region,
 | |
|             LsSize2 size,
 | |
|             Operand srcA,
 | |
|             int rd,
 | |
|             int offset)
 | |
|         {
 | |
|             if (size > LsSize2.B128)
 | |
|             {
 | |
|                 context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             bool isSmallInt = size < LsSize2.B32;
 | |
| 
 | |
|             int count = 1;
 | |
| 
 | |
|             switch (size)
 | |
|             {
 | |
|                 case LsSize2.B64: count = 2; break;
 | |
|                 case LsSize2.B128: count = 4; break;
 | |
|             }
 | |
| 
 | |
|             Operand baseOffset = context.IAdd(srcA, Const(offset));
 | |
|             Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
 | |
|             Operand bitOffset = GetBitOffset(context, baseOffset);
 | |
| 
 | |
|             for (int index = 0; index < count; index++)
 | |
|             {
 | |
|                 bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
 | |
| 
 | |
|                 Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
 | |
|                 Operand elemOffset = context.IAdd(wordOffset, Const(index));
 | |
| 
 | |
|                 if (isSmallInt)
 | |
|                 {
 | |
|                     Operand word = null;
 | |
| 
 | |
|                     switch (region)
 | |
|                     {
 | |
|                         case MemoryRegion.Local: word = context.LoadLocal(elemOffset); break;
 | |
|                         case MemoryRegion.Shared: word = context.LoadShared(elemOffset); break;
 | |
|                     }
 | |
| 
 | |
|                     value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
 | |
|                 }
 | |
| 
 | |
|                 switch (region)
 | |
|                 {
 | |
|                     case MemoryRegion.Local: context.StoreLocal(elemOffset, value); break;
 | |
|                     case MemoryRegion.Shared: context.StoreShared(elemOffset, value); break;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private static void EmitStg(
 | |
|             EmitterContext context,
 | |
|             LsSize2 size,
 | |
|             int ra,
 | |
|             int rd,
 | |
|             int offset,
 | |
|             bool extended)
 | |
|         {
 | |
|             if (size > LsSize2.B128)
 | |
|             {
 | |
|                 context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             bool isSmallInt = size < LsSize2.B32;
 | |
| 
 | |
|             int count = GetVectorCount((LsSize)size);
 | |
| 
 | |
|             (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
 | |
| 
 | |
|             Operand bitOffset = GetBitOffset(context, addrLow);
 | |
| 
 | |
|             for (int index = 0; index < count; index++)
 | |
|             {
 | |
|                 bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
 | |
| 
 | |
|                 Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
 | |
| 
 | |
|                 if (isSmallInt)
 | |
|                 {
 | |
|                     Operand word = context.LoadGlobal(addrLow, addrHigh);
 | |
| 
 | |
|                     value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
 | |
|                 }
 | |
| 
 | |
|                 context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private static int GetVectorCount(LsSize size)
 | |
|         {
 | |
|             switch (size)
 | |
|             {
 | |
|                 case LsSize.B64:
 | |
|                     return 2;
 | |
|                 case LsSize.B128:
 | |
|                 case LsSize.UB128:
 | |
|                     return 4;
 | |
|             }
 | |
| 
 | |
|             return 1;
 | |
|         }
 | |
| 
 | |
|         private static (Operand, Operand) Get40BitsAddress(
 | |
|             EmitterContext context,
 | |
|             Register ra,
 | |
|             bool extended,
 | |
|             int offset)
 | |
|         {
 | |
|             Operand addrLow = Register(ra);
 | |
|             Operand addrHigh;
 | |
| 
 | |
|             if (extended && !ra.IsRZ)
 | |
|             {
 | |
|                 addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 addrHigh = Const(0);
 | |
|             }
 | |
| 
 | |
|             Operand offs = Const(offset);
 | |
| 
 | |
|             addrLow = context.IAdd(addrLow, offs);
 | |
| 
 | |
|             if (extended)
 | |
|             {
 | |
|                 Operand carry = context.ICompareLessUnsigned(addrLow, offs);
 | |
| 
 | |
|                 addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
 | |
|             }
 | |
| 
 | |
|             return (addrLow, addrHigh);
 | |
|         }
 | |
| 
 | |
|         private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
 | |
|         {
 | |
|             // Note: bit offset = (baseOffset & 0b11) * 8.
 | |
|             // Addresses should be always aligned to the integer type,
 | |
|             // so we don't need to take unaligned addresses into account.
 | |
|             return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
 | |
|         }
 | |
| 
 | |
|         private static Operand ExtractSmallInt(
 | |
|             EmitterContext context,
 | |
|             LsSize size,
 | |
|             Operand bitOffset,
 | |
|             Operand value)
 | |
|         {
 | |
|             value = context.ShiftRightU32(value, bitOffset);
 | |
| 
 | |
|             switch (size)
 | |
|             {
 | |
|                 case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
 | |
|                 case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
 | |
|                 case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
 | |
|                 case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
 | |
|             }
 | |
| 
 | |
|             return value;
 | |
|         }
 | |
| 
 | |
|         private static Operand InsertSmallInt(
 | |
|             EmitterContext context,
 | |
|             LsSize size,
 | |
|             Operand bitOffset,
 | |
|             Operand word,
 | |
|             Operand value)
 | |
|         {
 | |
|             switch (size)
 | |
|             {
 | |
|                 case LsSize.U8:
 | |
|                 case LsSize.S8:
 | |
|                     value = context.BitwiseAnd(value, Const(0xff));
 | |
|                     value = context.BitfieldInsert(word, value, bitOffset, Const(8));
 | |
|                     break;
 | |
| 
 | |
|                 case LsSize.U16:
 | |
|                 case LsSize.S16:
 | |
|                     value = context.BitwiseAnd(value, Const(0xffff));
 | |
|                     value = context.BitfieldInsert(word, value, bitOffset, Const(16));
 | |
|                     break;
 | |
|             }
 | |
| 
 | |
|             return value;
 | |
|         }
 | |
|     }
 | |
| } |