diff options
| author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2019-01-24 23:44:28 +0000 |
|---|---|---|
| committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2019-01-24 23:44:28 +0000 |
| commit | b9b75de0aebce6fec139ab52ab6bf4bca2d41ee9 (patch) | |
| tree | 0b61ccfdb5b115a65bc89d394c1516c51ee3f46b /llvm/test | |
| parent | 6bab7ab11e75f675f561162f40c9b28525b4e473 (diff) | |
| download | bcm5719-llvm-b9b75de0aebce6fec139ab52ab6bf4bca2d41ee9.tar.gz bcm5719-llvm-b9b75de0aebce6fec139ab52ab6bf4bca2d41ee9.zip | |
[PowerPC] Exploit store instructions that store a single vector element
This patch exploits the instructions that store a single element from a vector
to preform a (store (extract_elt)). We already have code that does this with
ISA 3.0 instructions that were added to handle i8/i16 types. However, we had
never exploited the existing ones that handle f32/f64/i32/i64 types.
Differential revision: https://reviews.llvm.org/D56175
llvm-svn: 352131
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/extract-and-store.ll | 293 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll | 57 |
2 files changed, 248 insertions, 102 deletions
diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll index 6426db7923c..474ba86395a 100644 --- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll +++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll @@ -7,26 +7,66 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unkknown-unknown \ ; RUN: -ppc-asm-full-reg-names -verify-machineinstrs -O2 < %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-P9 -; Function Attrs: norecurse nounwind writeonly +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unkknown-unknown \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs -O2 < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9-BE + +define <2 x i64> @testllv(<2 x i64> returned %a, <2 x i64> %b, i64* nocapture %ap, i64 %Idx) local_unnamed_addr #0 { +; CHECK-LABEL: testllv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: sldi r3, r8, 3 +; CHECK-NEXT: stfdx f0, r7, r3 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: testllv: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r8, 3 +; CHECK-BE-NEXT: stxsdx vs34, r7, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-P9-LABEL: testllv: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: sldi r3, r8, 3 +; CHECK-P9-NEXT: stfdx f0, r7, r3 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testllv: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: sldi r3, r8, 3 +; CHECK-P9-BE-NEXT: stxsdx vs34, r7, r3 +; CHECK-P9-BE-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %a, i32 0 + %arrayidx = getelementptr inbounds i64, i64* %ap, i64 %Idx + store i64 %vecext, i64* %arrayidx, align 8 + ret <2 x i64> %a +} + define <2 x i64> @testll0(<2 x i64> returned %a, <2 x i64> %b, i64* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testll0: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: mfvsrd r3, f0 -; CHECK-NEXT: std r3, 24(r7) +; CHECK-NEXT: stfd f0, 24(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testll0: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mfvsrd r3, vs34 -; CHECK-BE-NEXT: std r3, 24(r7) +; CHECK-BE-NEXT: addi r3, r7, 24 +; CHECK-BE-NEXT: stxsdx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testll0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrld r3, vs34 -; CHECK-P9-NEXT: std r3, 24(r7) +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 24(r7) ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testll0: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: stxsd v2, 24(r7) +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 0 %arrayidx = getelementptr inbounds i64, i64* %ap, i64 3 @@ -38,22 +78,26 @@ entry: define <2 x i64> @testll1(<2 x i64> returned %a, i64 %b, i64* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testll1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mfvsrd r3, vs34 -; CHECK-NEXT: std r3, 24(r6) +; CHECK-NEXT: addi r3, r6, 24 +; CHECK-NEXT: stxsdx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testll1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: mfvsrd r3, f0 -; CHECK-BE-NEXT: std r3, 24(r6) +; CHECK-BE-NEXT: stfd f0, 24(r6) ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testll1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrd r3, vs34 -; CHECK-P9-NEXT: std r3, 24(r6) +; CHECK-P9-NEXT: stxsd v2, 24(r6) ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testll1: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-NEXT: stfd f0, 24(r6) +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 1 %arrayidx = getelementptr inbounds i64, i64* %ap, i64 3 @@ -61,7 +105,39 @@ entry: ret <2 x i64> %a } -; Function Attrs: norecurse nounwind writeonly +define <2 x double> @testdv(<2 x double> returned %a, <2 x double> %b, double* nocapture %ap, i64 %Idx) local_unnamed_addr #0 { +; CHECK-LABEL: testdv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: sldi r3, r8, 3 +; CHECK-NEXT: stfdx f0, r7, r3 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: testdv: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r8, 3 +; CHECK-BE-NEXT: stxsdx vs34, r7, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-P9-LABEL: testdv: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: sldi r3, r8, 3 +; CHECK-P9-NEXT: stfdx f0, r7, r3 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testdv: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: sldi r3, r8, 3 +; CHECK-P9-BE-NEXT: stxsdx vs34, r7, r3 +; CHECK-P9-BE-NEXT: blr +entry: + %vecext = extractelement <2 x double> %a, i32 0 + %arrayidx = getelementptr inbounds double, double* %ap, i64 %Idx + store double %vecext, double* %arrayidx, align 8 + ret <2 x double> %a +} + define <2 x double> @testd0(<2 x double> returned %a, <2 x double> %b, double* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testd0: ; CHECK: # %bb.0: # %entry @@ -80,6 +156,11 @@ define <2 x double> @testd0(<2 x double> returned %a, <2 x double> %b, double* n ; CHECK-P9-NEXT: xxswapd vs0, vs34 ; CHECK-P9-NEXT: stfd f0, 24(r7) ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testd0: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: stxsd v2, 24(r7) +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <2 x double> %a, i32 0 %arrayidx = getelementptr inbounds double, double* %ap, i64 3 @@ -105,6 +186,12 @@ define <2 x double> @testd1(<2 x double> returned %a, <2 x double> %b, double* n ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: stxsd v2, 24(r7) ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testd1: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-NEXT: stfd f0, 24(r7) +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <2 x double> %a, i32 1 %arrayidx = getelementptr inbounds double, double* %ap, i64 3 @@ -116,23 +203,31 @@ entry: define <4 x float> @testf0(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf0: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvspdpn f0, vs34 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testf0: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 0 %arrayidx = getelementptr inbounds float, float* %ap, i64 3 @@ -144,24 +239,29 @@ entry: define <4 x float> @testf1(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stxsiwx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxswapd vs0, vs34 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testf1: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 1 %arrayidx = getelementptr inbounds float, float* %ap, i64 3 @@ -173,24 +273,29 @@ entry: define <4 x float> @testf2(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stxsiwx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stxsiwx vs34, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testf2: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 2 %arrayidx = getelementptr inbounds float, float* %ap, i64 3 @@ -202,22 +307,31 @@ entry: define <4 x float> @testf3(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvspdpn f0, vs34 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvspdpn f0, vs34 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testf3: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3 %arrayidx = getelementptr inbounds float, float* %ap, i64 3 @@ -229,24 +343,31 @@ entry: define <4 x i32> @testi0(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testi0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi0: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testi0: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3 @@ -259,22 +380,28 @@ define <4 x i32> @testi1(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap ; CHECK-LABEL: testi1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mfvsrwz r3, vs34 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stxsiwx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testi1: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 1 %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3 @@ -286,22 +413,29 @@ entry: define <4 x i32> @testi2(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testi2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mfvsrwz r3, vs34 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stxsiwx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrwz r3, vs34 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stxsiwx vs34, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testi2: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3 @@ -314,23 +448,30 @@ define <4 x i32> @testi3(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap ; CHECK-LABEL: testi3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 12 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: testi3: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-BE-NEXT: addi r3, r7, 12 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 3 %arrayidx = getelementptr inbounds i32, i32* %ap, i64 3 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll index 96c823b7310..af695c58f1b 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ @@ -15,11 +16,10 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P9LE-NEXT: xvaddsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9LE-NEXT: xscvspdpn f0, vs0 -; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: stfiwx f0, 0, r5 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: test_liwzx1: ; P9BE: # %bb.0: ; P9BE-NEXT: lfiwzx f0, 0, r3 @@ -27,10 +27,10 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P9BE-NEXT: xvaddsp vs0, vs0, vs1 -; P9BE-NEXT: xscvspdpn f0, vs0 -; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: stfiwx f0, 0, r5 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: test_liwzx1: ; P8LE: # %bb.0: ; P8LE-NEXT: lfiwzx f0, 0, r3 @@ -38,11 +38,10 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P8LE-NEXT: xvaddsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8LE-NEXT: xscvspdpn f0, vs0 -; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P8LE-NEXT: stfiwx f0, 0, r5 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: test_liwzx1: ; P8BE: # %bb.0: ; P8BE-NEXT: lfiwzx f0, 0, r3 @@ -50,9 +49,12 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P8BE-NEXT: xvaddsp vs0, vs0, vs1 -; P8BE-NEXT: xscvspdpn f0, vs0 -; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8BE-NEXT: stfiwx f0, 0, r5 ; P8BE-NEXT: blr + + + %a = load <1 x float>, <1 x float>* %A %b = load <1 x float>, <1 x float>* %B %X = fadd <1 x float> %a, %b @@ -68,12 +70,11 @@ define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P9LE-NEXT: xvsubsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 ; P9LE-NEXT: mr r3, r5 -; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: stfiwx f0, 0, r5 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: test_liwzx2: ; P9BE: # %bb.0: ; P9BE-NEXT: lfiwzx f0, 0, r3 @@ -81,11 +82,11 @@ define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P9BE-NEXT: xvsubsp vs0, vs0, vs1 -; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 ; P9BE-NEXT: mr r3, r5 -; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: stfiwx f0, 0, r5 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: test_liwzx2: ; P8LE: # %bb.0: ; P8LE-NEXT: lfiwzx f0, 0, r3 @@ -94,11 +95,10 @@ define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P8LE-NEXT: xvsubsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8LE-NEXT: xscvspdpn f0, vs0 -; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P8LE-NEXT: stfiwx f0, 0, r5 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: test_liwzx2: ; P8BE: # %bb.0: ; P8BE-NEXT: lfiwzx f0, 0, r3 @@ -107,9 +107,14 @@ define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P8BE-NEXT: xvsubsp vs0, vs0, vs1 -; P8BE-NEXT: xscvspdpn f0, vs0 -; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8BE-NEXT: stfiwx f0, 0, r5 ; P8BE-NEXT: blr + + + + + %a = load <1 x float>, <1 x float>* %A %b = load <1 x float>, <1 x float>* %B %X = fsub <1 x float> %a, %b |

