diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-06 22:49:01 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-06 22:49:01 +0000 |
| commit | cb6c38612e7b46d92820a5e000ddc9532086b20f (patch) | |
| tree | 425b5ae432edaa72281f963f9ea960a5ddfcb3cf | |
| parent | 86d24f1668aaeb41362031fa31b4c4269488a68b (diff) | |
| download | bcm5719-llvm-cb6c38612e7b46d92820a5e000ddc9532086b20f.tar.gz bcm5719-llvm-cb6c38612e7b46d92820a5e000ddc9532086b20f.zip | |
[X86] Make FeatureAVX512 imply FeatureFMA.
Previously our VEX patterns were checking Subtarget.hasFMA() which checked FMA || AVX512. So we were behaving as if AVX512 implied it anyway. Which means we'd allow VEX encoded 128/256 FMA when AVX512F was enabled but AVX512VL is off. Regardless of the FMA flag.
EVEX to VEX also transforms scalar EVEX FMA instructions to their VEX versions even without the FMA flag. Similarly for 128/256 under AVX512VL.
So this makes AVX512 imply FeatureFMA to make our current behavior explicit.
All known CPUs that support AVX512 have VEX FMA instructions.
llvm-svn: 317520
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 2 |
2 files changed, 5 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index f4021d7639b..49790373190 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -116,9 +116,12 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; +def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", + "Enable three-operand fused multiple-add", + [FeatureAVX]>; def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", "Enable AVX-512 instructions", - [FeatureAVX2]>; + [FeatureAVX2, FeatureFMA]>; def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", "Enable AVX-512 Exponential and Reciprocal Instructions", [FeatureAVX512]>; @@ -154,9 +157,6 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; -def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", - "Enable three-operand fused multiple-add", - [FeatureAVX]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add", [FeatureAVX, FeatureSSE4A]>; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index a8d7f290688..a21d068c7f4 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -463,7 +463,7 @@ public: bool hasPCLMUL() const { return HasPCLMUL; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. - bool hasFMA() const { return (HasFMA || hasAVX512()) && !HasFMA4; } + bool hasFMA() const { return HasFMA && !HasFMA4; } bool hasFMA4() const { return HasFMA4; } bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } |

