diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-09 06:17:05 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-09 06:17:05 +0000 |
| commit | 7a6e294a6c02b5670d1906331b7d8e923a6d92de (patch) | |
| tree | 3fe736ebca154c35f463ff346846f8d9c325b39d | |
| parent | 722339e40562003b4d32693fa568e98ab38da831 (diff) | |
| download | bcm5719-llvm-7a6e294a6c02b5670d1906331b7d8e923a6d92de.tar.gz bcm5719-llvm-7a6e294a6c02b5670d1906331b7d8e923a6d92de.zip | |
[X86] Make X86ISD::FMADDS3 isel patterns commutable.
This was missed when FMADDS3 was split from X86ISD::FMADDS3_RND.
llvm-svn: 317769
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-fma-commute.ll | 95 |
2 files changed, 99 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index d30400836bb..83663ac3c78 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -491,10 +491,10 @@ def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>; def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>; def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>; -def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp>; -def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp>; -def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp>; -def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp>; +def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>; // Scalar FMA intrinsics with passthru bits in operand 3. def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; diff --git a/llvm/test/CodeGen/X86/avx512-fma-commute.ll b/llvm/test/CodeGen/X86/avx512-fma-commute.ll new file mode 100644 index 00000000000..8dd484787a9 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-fma-commute.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s + +declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) +declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) +declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) +declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) + +define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load0: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %x0 = load <4 x float>, <4 x float>* %x0ptr + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load1: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %x1 = load <4 x float>, <4 x float>* %x1ptr + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) + ret <4 x float> %res +} + +define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load0: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovapd %xmm1, %xmm0 +; CHECK-NEXT: retq + %x0 = load <2 x double>, <2 x double>* %x0ptr + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load1: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovapd %xmm1, %xmm0 +; CHECK-NEXT: retq + %x1 = load <2 x double>, <2 x double>* %x1ptr + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) + ret <2 x double> %res +} + +define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load0: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %x0 = load <4 x float>, <4 x float>* %x0ptr + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load1: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %x1 = load <4 x float>, <4 x float>* %x1ptr + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) + ret <4 x float> %res +} + +define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load0: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovapd %xmm1, %xmm0 +; CHECK-NEXT: retq + %x0 = load <2 x double>, <2 x double>* %x0ptr + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){ +; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load1: +; CHECK: ## BB#0: +; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vmovapd %xmm1, %xmm0 +; CHECK-NEXT: retq + %x1 = load <2 x double>, <2 x double>* %x1ptr + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) + ret <2 x double> %res +} |

