CPU (A64): Add FP16/FP32 fast paths (F16C Intrinsics) for Fcvt_S, Fcvtl_V & Fcvtn_V Instructions. Now HardwareCapabilities uses CpuId. (#1650)
* net5.0
* CPU (A64): Add FP16/FP32 fast paths (F16C Intrinsics) for Fcvt_S, Fcvtl_V & Fcvtn_V Instructions. Switch to .NET 5.0.
Nits.
Tests performed successfully in both debug and release mode (for all instructions involved).
* Address comment.
* Update appveyor.yml
* Revert "Update appveyor.yml"
This reverts commit 27cdd59e8b.
* Remove Assembler CpuId.
* Update appveyor.yml
* Address comment.
			
			
This commit is contained in:
		
							parent
							
								
									eafee34fee
								
							
						
					
					
						commit
						0679084f11
					
				
					 9 changed files with 136 additions and 62 deletions
				
			
		|  | @ -104,7 +104,6 @@ namespace ARMeilleure.CodeGen.X86 | |||
|             Add(X86Instruction.Cmpxchg8,   new InstructionInfo(0x00000fb0, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Reg8Src)); | ||||
|             Add(X86Instruction.Comisd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Comiss,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex)); | ||||
|             Add(X86Instruction.Cpuid,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fa2, InstructionFlags.RegOnly)); | ||||
|             Add(X86Instruction.Crc32,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f1, InstructionFlags.PrefixF2)); | ||||
|             Add(X86Instruction.Crc32_16,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Crc32_8,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src)); | ||||
|  | @ -270,6 +269,8 @@ namespace ARMeilleure.CodeGen.X86 | |||
|             Add(X86Instruction.Unpcklps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex)); | ||||
|             Add(X86Instruction.Vblendvpd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Vblendvps,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Vcvtph2ps,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Vcvtps2ph,  new InstructionInfo(0x000f3a1d, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Vpblendvb,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|             Add(X86Instruction.Xor,        new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp,      0x00000033, InstructionFlags.None)); | ||||
|             Add(X86Instruction.Xorpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); | ||||
|  | @ -386,11 +387,6 @@ namespace ARMeilleure.CodeGen.X86 | |||
|             WriteInstruction(src1, null, src2, X86Instruction.Comiss); | ||||
|         } | ||||
| 
 | ||||
|         public void Cpuid() | ||||
|         { | ||||
|             WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid); | ||||
|         } | ||||
| 
 | ||||
|         public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) | ||||
|         { | ||||
|             WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); | ||||
|  |  | |||
|  | @ -1,20 +1,60 @@ | |||
| using System; | ||||
| using System.Runtime.Intrinsics.X86; | ||||
| 
 | ||||
| namespace ARMeilleure.CodeGen.X86 | ||||
| { | ||||
|     static class HardwareCapabilities | ||||
|     { | ||||
|         public static bool SupportsSse => Sse.IsSupported; | ||||
|         public static bool SupportsSse2 => Sse2.IsSupported; | ||||
|         public static bool SupportsSse3 => Sse3.IsSupported; | ||||
|         public static bool SupportsSsse3 => Ssse3.IsSupported; | ||||
|         public static bool SupportsSse41 => Sse41.IsSupported; | ||||
|         public static bool SupportsSse42 => Sse42.IsSupported; | ||||
|         public static bool SupportsPclmulqdq => Pclmulqdq.IsSupported; | ||||
|         public static bool SupportsFma => Fma.IsSupported; | ||||
|         public static bool SupportsPopcnt => Popcnt.IsSupported; | ||||
|         public static bool SupportsAesni => Aes.IsSupported; | ||||
|         public static bool SupportsAvx => Avx.IsSupported; | ||||
|         static HardwareCapabilities() | ||||
|         { | ||||
|             if (!X86Base.IsSupported) | ||||
|             { | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             (_, _, int ecx, int edx) = X86Base.CpuId(0x00000001, 0x00000000); | ||||
| 
 | ||||
|             FeatureInfoEdx = (FeatureFlagsEdx)edx; | ||||
|             FeatureInfoEcx = (FeatureFlagsEcx)ecx; | ||||
|         } | ||||
| 
 | ||||
|         [Flags] | ||||
|         public enum FeatureFlagsEdx | ||||
|         { | ||||
|             Sse = 1 << 25, | ||||
|             Sse2 = 1 << 26 | ||||
|         } | ||||
| 
 | ||||
|         [Flags] | ||||
|         public enum FeatureFlagsEcx | ||||
|         { | ||||
|             Sse3 = 1 << 0, | ||||
|             Pclmulqdq = 1 << 1, | ||||
|             Ssse3 = 1 << 9, | ||||
|             Fma = 1 << 12, | ||||
|             Sse41 = 1 << 19, | ||||
|             Sse42 = 1 << 20, | ||||
|             Popcnt = 1 << 23, | ||||
|             Aes = 1 << 25, | ||||
|             Avx = 1 << 28, | ||||
|             F16c = 1 << 29 | ||||
|         } | ||||
| 
 | ||||
|         public static FeatureFlagsEdx FeatureInfoEdx { get; } | ||||
|         public static FeatureFlagsEcx FeatureInfoEcx { get; } | ||||
| 
 | ||||
|         public static bool SupportsSse => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse); | ||||
|         public static bool SupportsSse2 => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse2); | ||||
|         public static bool SupportsSse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse3); | ||||
|         public static bool SupportsPclmulqdq => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Pclmulqdq); | ||||
|         public static bool SupportsSsse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Ssse3); | ||||
|         public static bool SupportsFma => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Fma); | ||||
|         public static bool SupportsSse41 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse41); | ||||
|         public static bool SupportsSse42 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse42); | ||||
|         public static bool SupportsPopcnt => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Popcnt); | ||||
|         public static bool SupportsAesni => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Aes); | ||||
|         public static bool SupportsAvx => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Avx); | ||||
|         public static bool SupportsF16c => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.F16c); | ||||
| 
 | ||||
|         public static bool ForceLegacySse { get; set; } | ||||
| 
 | ||||
|  |  | |||
|  | @ -162,6 +162,8 @@ namespace ARMeilleure.CodeGen.X86 | |||
|             Add(Intrinsic.X86Unpckhps,   new IntrinsicInfo(X86Instruction.Unpckhps,   IntrinsicType.Binary)); | ||||
|             Add(Intrinsic.X86Unpcklpd,   new IntrinsicInfo(X86Instruction.Unpcklpd,   IntrinsicType.Binary)); | ||||
|             Add(Intrinsic.X86Unpcklps,   new IntrinsicInfo(X86Instruction.Unpcklps,   IntrinsicType.Binary)); | ||||
|             Add(Intrinsic.X86Vcvtph2ps,  new IntrinsicInfo(X86Instruction.Vcvtph2ps,  IntrinsicType.Unary)); | ||||
|             Add(Intrinsic.X86Vcvtps2ph,  new IntrinsicInfo(X86Instruction.Vcvtps2ph,  IntrinsicType.BinaryImm)); | ||||
|             Add(Intrinsic.X86Xorpd,      new IntrinsicInfo(X86Instruction.Xorpd,      IntrinsicType.Binary)); | ||||
|             Add(Intrinsic.X86Xorps,      new IntrinsicInfo(X86Instruction.Xorps,      IntrinsicType.Binary)); | ||||
|         } | ||||
|  |  | |||
|  | @ -33,7 +33,6 @@ namespace ARMeilleure.CodeGen.X86 | |||
|         Cmpxchg8, | ||||
|         Comisd, | ||||
|         Comiss, | ||||
|         Cpuid, | ||||
|         Crc32, | ||||
|         Crc32_16, | ||||
|         Crc32_8, | ||||
|  | @ -199,6 +198,8 @@ namespace ARMeilleure.CodeGen.X86 | |||
|         Unpcklps, | ||||
|         Vblendvpd, | ||||
|         Vblendvps, | ||||
|         Vcvtph2ps, | ||||
|         Vcvtps2ph, | ||||
|         Vpblendvb, | ||||
|         Xor, | ||||
|         Xorpd, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 LDj3SNuD
						LDj3SNuD