diff options
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/build-vector-tests.ll')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/build-vector-tests.ll | 6513 |
1 files changed, 4053 insertions, 2460 deletions
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll index 3fc5ffe2ace..e208a69825f 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ ; RUN: -check-prefix=P9BE -implicit-check-not frsp @@ -730,157 +731,233 @@ ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allZeroi() { +; P9BE-LABEL: allZeroi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxlxor v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allZeroi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxlxor v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allZeroi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxlxor v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allZeroi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxlxor v2, v2, v2 +; P8LE-NEXT: blr entry: ret <4 x i32> zeroinitializer -; P9BE-LABEL: allZeroi -; P9LE-LABEL: allZeroi -; P8BE-LABEL: allZeroi -; P8LE-LABEL: allZeroi -; P9BE: xxlxor v2, v2, v2 -; P9BE: blr -; P9LE: xxlxor v2, v2, v2 -; P9LE: blr -; P8BE: xxlxor v2, v2, v2 -; P8BE: blr -; P8LE: xxlxor v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allOnei() { +; P9BE-LABEL: allOnei: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxspltib v2, 255 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allOnei: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxspltib v2, 255 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allOnei: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisb v2, -1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allOnei: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisb v2, -1 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> -; P9BE-LABEL: allOnei -; P9LE-LABEL: allOnei -; P8BE-LABEL: allOnei -; P8LE-LABEL: allOnei -; P9BE: xxspltib v2, 255 -; P9BE: blr -; P9LE: xxspltib v2, 255 -; P9LE: blr -; P8BE: vspltisb v2, -1 -; P8BE: blr -; P8LE: vspltisb v2, -1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst1i() { +; P9BE-LABEL: spltConst1i: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst1i: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst1i: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst1i: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, 1 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 1, i32 1, i32 1, i32 1> -; P9BE-LABEL: spltConst1i -; P9LE-LABEL: spltConst1i -; P8BE-LABEL: spltConst1i -; P8LE-LABEL: spltConst1i -; P9BE: vspltisw v2, 1 -; P9BE: blr -; P9LE: vspltisw v2, 1 -; P9LE: blr -; P8BE: vspltisw v2, 1 -; P8BE: blr -; P8LE: vspltisw v2, 1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst16ki() { +; P9BE-LABEL: spltConst16ki: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, -15 +; P9BE-NEXT: vsrw v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst16ki: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, -15 +; P9LE-NEXT: vsrw v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst16ki: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, -15 +; P8BE-NEXT: vsrw v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst16ki: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, -15 +; P8LE-NEXT: vsrw v2, v2, v2 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767> -; P9BE-LABEL: spltConst16ki -; P9LE-LABEL: spltConst16ki -; P8BE-LABEL: spltConst16ki -; P8LE-LABEL: spltConst16ki -; P9BE: vspltisw v2, -15 -; P9BE: vsrw v2, v2, v2 -; P9BE: blr -; P9LE: vspltisw v2, -15 -; P9LE: vsrw v2, v2, v2 -; P9LE: blr -; P8BE: vspltisw v2, -15 -; P8BE: vsrw v2, v2, v2 -; P8BE: blr -; P8LE: vspltisw v2, -15 -; P8LE: vsrw v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst32ki() { +; P9BE-LABEL: spltConst32ki: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, -16 +; P9BE-NEXT: vsrw v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst32ki: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, -16 +; P9LE-NEXT: vsrw v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst32ki: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, -16 +; P8BE-NEXT: vsrw v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst32ki: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, -16 +; P8LE-NEXT: vsrw v2, v2, v2 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> -; P9BE-LABEL: spltConst32ki -; P9LE-LABEL: spltConst32ki -; P8BE-LABEL: spltConst32ki -; P8LE-LABEL: spltConst32ki -; P9BE: vspltisw v2, -16 -; P9BE: vsrw v2, v2, v2 -; P9BE: blr -; P9LE: vspltisw v2, -16 -; P9LE: vsrw v2, v2, v2 -; P9LE: blr -; P8BE: vspltisw v2, -16 -; P8BE: vsrw v2, v2, v2 -; P8BE: blr -; P8LE: vspltisw v2, -16 -; P8LE: vsrw v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) { +; P9BE-LABEL: fromRegsi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: rldimi r6, r5, 32, 0 +; P9BE-NEXT: rldimi r4, r3, 32, 0 +; P9BE-NEXT: mtvsrdd v2, r4, r6 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: rldimi r3, r4, 32, 0 +; P9LE-NEXT: rldimi r5, r6, 32, 0 +; P9LE-NEXT: mtvsrdd v2, r5, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: rldimi r6, r5, 32, 0 +; P8BE-NEXT: rldimi r4, r3, 32, 0 +; P8BE-NEXT: mtvsrd f0, r6 +; P8BE-NEXT: mtvsrd f1, r4 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: rldimi r3, r4, 32, 0 +; P8LE-NEXT: rldimi r5, r6, 32, 0 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd f1, r5 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3 ret <4 x i32> %vecinit3 -; P9BE-LABEL: fromRegsi -; P9LE-LABEL: fromRegsi -; P8BE-LABEL: fromRegsi -; P8LE-LABEL: fromRegsi -; P9BE-DAG: rldimi r6, r5, 32, 0 -; P9BE-DAG: rldimi r4, r3, 32, 0 -; P9BE: mtvsrdd v2, r4, r6 -; P9BE: blr -; P9LE-DAG: rldimi r3, r4, 32, 0 -; P9LE-DAG: rldimi r5, r6, 32, 0 -; P9LE: mtvsrdd v2, r5, r3 -; P9LE: blr -; P8BE-DAG: rldimi r6, r5, 32, 0 -; P8BE-DAG: rldimi r4, r3, 32, 0 -; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6 -; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4 -; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]] -; P8BE: blr -; P8LE-DAG: rldimi r3, r4, 32, 0 -; P8LE-DAG: rldimi r5, r6, 32, 0 -; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3 -; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5 -; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]] -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsi() { +; P9BE-LABEL: fromDiffConstsi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19> -; P9BE-LABEL: fromDiffConstsi -; P9LE-LABEL: fromDiffConstsi -; P8BE-LABEL: fromDiffConstsi -; P8LE-LABEL: fromDiffConstsi -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lvx -; P8LE-NOT: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAi(i32* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsAi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %0 = load i32, i32* %arr, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -894,23 +971,46 @@ entry: %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromDiffMemConsAi -; P9LE-LABEL: fromDiffMemConsAi -; P8BE-LABEL: fromDiffMemConsAi -; P8LE-LABEL: fromDiffMemConsAi -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsDi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI8_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI8_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r4, r2, .LCPI8_0@toc@ha +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: addi r4, r4, .LCPI8_0@toc@l +; P8BE-NEXT: lxvw4x v3, 0, r4 +; P8BE-NEXT: vperm v2, v2, v2, v3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI8_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI8_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 %0 = load i32, i32* %arrayidx, align 4 @@ -924,31 +1024,34 @@ entry: %3 = load i32, i32* %arr, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromDiffMemConsDi -; P9LE-LABEL: fromDiffMemConsDi -; P8BE-LABEL: fromDiffMemConsDi -; P8LE-LABEL: fromDiffMemConsDi -; P9BE: lxv -; P9BE: lxv -; P9BE: vperm -; P9BE: blr -; P9LE: lxv -; P9LE: lxv -; P9LE: vperm -; P9LE: blr -; P8BE: lxvw4x -; P8BE: lxvw4x -; P8BE: vperm -; P8BE: blr -; P8LE: lxvd2x -; P8LE-DAG: lvx -; P8LE: xxswapd -; P8LE: vperm -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAi(i32* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lxvx v2, r3, r4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lxvx v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lxvw4x v2, r3, r4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lxvd2x vs0, r3, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom @@ -970,27 +1073,58 @@ entry: %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 -; P9BE-LABEL: fromDiffMemVarAi -; P9LE-LABEL: fromDiffMemVarAi -; P8BE-LABEL: fromDiffMemVarAi -; P8LE-LABEL: fromDiffMemVarAi -; P9BE: sldi r4, r4, 2 -; P9BE: lxvx v2, r3, r4 -; P9BE: blr -; P9LE: sldi r4, r4, 2 -; P9LE: lxvx v2, r3, r4 -; P9LE: blr -; P8BE: sldi r4, r4, 2 -; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4 -; P8BE: blr -; P8LE: sldi r4, r4, 2 -; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4 -; P8LE: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addi r3, r3, -12 +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI10_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: addi r3, r3, -12 +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI10_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: addis r5, r2, .LCPI10_0@toc@ha +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: addi r4, r5, .LCPI10_0@toc@l +; P8BE-NEXT: addi r3, r3, -12 +; P8BE-NEXT: lxvw4x v3, 0, r4 +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: vperm v2, v2, v2, v3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: addis r5, r2, .LCPI10_0@toc@ha +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r3, r3, -12 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addi r3, r5, .LCPI10_0@toc@l +; P8LE-NEXT: lvx v3, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: vperm v2, v2, v2, v3 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom @@ -1012,35 +1146,57 @@ entry: %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 -; P9BE-LABEL: fromDiffMemVarDi -; P9LE-LABEL: fromDiffMemVarDi -; P8BE-LABEL: fromDiffMemVarDi -; P8LE-LABEL: fromDiffMemVarDi -; P9BE: sldi {{r[0-9]+}}, r4, 2 -; P9BE-DAG: lxvx {{v[0-9]+}} -; P9BE-DAG: lxvx -; P9BE: vperm -; P9BE: blr -; P9LE: sldi {{r[0-9]+}}, r4, 2 -; P9LE-DAG: lxvx {{v[0-9]+}} -; P9LE-DAG: lxvx -; P9LE: vperm -; P9LE: blr -; P8BE: sldi {{r[0-9]+}}, r4, 2 -; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3 -; P8BE-DAG: lxvw4x -; P8BE: vperm -; P8BE: blr -; P8LE: sldi {{r[0-9]+}}, r4, 2 -; P8LE-DAG: lxvd2x -; P8LE-DAG: lxvd2x -; P8LE: xxswapd -; P8LE: vperm -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) { +; P9BE-LABEL: fromRandMemConsi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lwz r4, 16(r3) +; P9BE-NEXT: lwz r5, 72(r3) +; P9BE-NEXT: lwz r6, 8(r3) +; P9BE-NEXT: lwz r3, 352(r3) +; P9BE-NEXT: rldimi r3, r6, 32, 0 +; P9BE-NEXT: rldimi r5, r4, 32, 0 +; P9BE-NEXT: mtvsrdd v2, r5, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemConsi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lwz r4, 16(r3) +; P9LE-NEXT: lwz r5, 72(r3) +; P9LE-NEXT: lwz r6, 8(r3) +; P9LE-NEXT: lwz r3, 352(r3) +; P9LE-NEXT: rldimi r4, r5, 32, 0 +; P9LE-NEXT: rldimi r6, r3, 32, 0 +; P9LE-NEXT: mtvsrdd v2, r6, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemConsi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lwz r4, 8(r3) +; P8BE-NEXT: lwz r5, 352(r3) +; P8BE-NEXT: lwz r6, 16(r3) +; P8BE-NEXT: lwz r3, 72(r3) +; P8BE-NEXT: rldimi r5, r4, 32, 0 +; P8BE-NEXT: rldimi r3, r6, 32, 0 +; P8BE-NEXT: mtvsrd f0, r5 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemConsi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lwz r4, 16(r3) +; P8LE-NEXT: lwz r5, 72(r3) +; P8LE-NEXT: lwz r6, 8(r3) +; P8LE-NEXT: lwz r3, 352(r3) +; P8LE-NEXT: rldimi r4, r5, 32, 0 +; P8LE-NEXT: rldimi r6, r3, 32, 0 +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4 %0 = load i32, i32* %arrayidx, align 4 @@ -1055,46 +1211,65 @@ entry: %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromRandMemConsi -; P9LE-LABEL: fromRandMemConsi -; P8BE-LABEL: fromRandMemConsi -; P8LE-LABEL: fromRandMemConsi -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: rldimi -; P9BE: rldimi -; P9BE: mtvsrdd -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: rldimi -; P9LE: rldimi -; P9LE: mtvsrdd -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: rldimi -; P8BE: rldimi -; P8BE: mtvsrd -; P8BE: mtvsrd -; P8BE: xxmrghd -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: rldimi -; P8LE: rldimi -; P8LE: mtvsrd -; P8LE: mtvsrd -; P8LE: xxmrghd } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromRandMemVari: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: lwz r4, 16(r3) +; P9BE-NEXT: lwz r5, 4(r3) +; P9BE-NEXT: lwz r6, 8(r3) +; P9BE-NEXT: lwz r3, 32(r3) +; P9BE-NEXT: rldimi r3, r6, 32, 0 +; P9BE-NEXT: rldimi r5, r4, 32, 0 +; P9BE-NEXT: mtvsrdd v2, r5, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemVari: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: lwz r4, 16(r3) +; P9LE-NEXT: lwz r5, 4(r3) +; P9LE-NEXT: lwz r6, 8(r3) +; P9LE-NEXT: lwz r3, 32(r3) +; P9LE-NEXT: rldimi r4, r5, 32, 0 +; P9LE-NEXT: rldimi r6, r3, 32, 0 +; P9LE-NEXT: mtvsrdd v2, r6, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemVari: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: lwz r4, 8(r3) +; P8BE-NEXT: lwz r5, 32(r3) +; P8BE-NEXT: lwz r6, 16(r3) +; P8BE-NEXT: lwz r3, 4(r3) +; P8BE-NEXT: rldimi r5, r4, 32, 0 +; P8BE-NEXT: rldimi r3, r6, 32, 0 +; P8BE-NEXT: mtvsrd f0, r5 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemVari: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: lwz r4, 16(r3) +; P8LE-NEXT: lwz r5, 4(r3) +; P8LE-NEXT: lwz r6, 8(r3) +; P8LE-NEXT: lwz r3, 32(r3) +; P8LE-NEXT: rldimi r4, r5, 32, 0 +; P8LE-NEXT: rldimi r6, r3, 32, 0 +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 @@ -1117,119 +1292,151 @@ entry: %3 = load i32, i32* %arrayidx11, align 4 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3 ret <4 x i32> %vecinit12 -; P9BE-LABEL: fromRandMemVari -; P9LE-LABEL: fromRandMemVari -; P8BE-LABEL: fromRandMemVari -; P8LE-LABEL: fromRandMemVari -; P9BE: sldi r4, r4, 2 -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: rldimi -; P9BE: rldimi -; P9BE: mtvsrdd -; P9LE: sldi r4, r4, 2 -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: rldimi -; P9LE: rldimi -; P9LE: mtvsrdd -; P8BE: sldi r4, r4, 2 -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: rldimi -; P8BE: rldimi -; P8BE: mtvsrd -; P8BE: mtvsrd -; P8BE: xxmrghd -; P8LE: sldi r4, r4, 2 -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: rldimi -; P8LE: rldimi -; P8LE: mtvsrd -; P8LE: mtvsrd -; P8LE: xxmrghd } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegVali(i32 signext %val) { +; P9BE-LABEL: spltRegVali: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mtvsrws v2, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegVali: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mtvsrws v2, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegVali: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mtvsrwz f0, r3 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegVali: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mtvsrwz f0, r3 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltRegVali -; P9LE-LABEL: spltRegVali -; P8BE-LABEL: spltRegVali -; P8LE-LABEL: spltRegVali -; P9BE: mtvsrws v2, r3 -; P9BE: blr -; P9LE: mtvsrws v2, r3 -; P9LE: blr -; P8BE: mtvsrwz {{[vsf0-9]+}}, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 -; P8BE: blr -; P8LE: mtvsrwz {{[vsf0-9]+}}, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemVali: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxspltw v2, vs0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemVali: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxspltw v2, vs0, 3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemVali: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxspltw v2, vs0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemVali: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxspltw v2, vs0, 3 +; P8LE-NEXT: blr entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltMemVali -; P9LE-LABEL: spltMemVali -; P8BE-LABEL: spltMemVali -; P8LE-LABEL: spltMemVali -; P9BE: lfiwzx f0, 0, r3 -; P9BE: xxsldwi vs0, f0, f0, 1 -; P9BE: xxspltw v2, vs0, 0 -; P9BE: blr -; P9LE: lfiwzx f0, 0, r3 -; P9LE: xxpermdi vs0, f0, f0, 2 -; P9LE: xxspltw v2, vs0, 3 -; P9LE: blr -; P8BE: lfiwzx f0, 0, r3 -; P8BE: xxsldwi vs0, f0, f0, 1 -; P8BE: xxspltw v2, vs0, 0 -; P8BE: blr -; P8LE: lfiwzx f0, 0, r3 -; P8LE: xxpermdi vs0, f0, f0, 2 -; P8LE: xxspltw v2, vs0, 3 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvftoi() { +; P9BE-LABEL: spltCnstConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, 4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, 4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, 4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, 4 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; P9BE-LABEL: spltCnstConvftoi -; P9LE-LABEL: spltCnstConvftoi -; P8BE-LABEL: spltCnstConvftoi -; P8LE-LABEL: spltCnstConvftoi -; P9BE: vspltisw v2, 4 -; P9BE: blr -; P9LE: vspltisw v2, 4 -; P9LE: blr -; P8BE: vspltisw v2, 4 -; P8BE: blr -; P8LE: vspltisw v2, 4 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) { +; P9BE-LABEL: fromRegsConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: xxmrghd vs0, vs2, vs4 +; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xvcvdpsxws v2, vs0 +; P9BE-NEXT: xxmrghd vs0, vs1, vs3 +; P9BE-NEXT: xvcvdpsxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: xvcvdpsxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs4, vs2 +; P9LE-NEXT: xvcvdpsxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: xxmrghd vs0, vs2, vs4 +; P8BE-NEXT: xxmrghd vs1, vs1, vs3 +; P8BE-NEXT: xvcvdpsxws v2, vs0 +; P8BE-NEXT: xvcvdpsxws v3, vs1 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: xxmrghd vs0, vs3, vs1 +; P8LE-NEXT: xxmrghd vs1, vs4, vs2 +; P8LE-NEXT: xvcvdpsxws v2, vs0 +; P8LE-NEXT: xvcvdpsxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %conv = fptosi float %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 @@ -1240,79 +1447,116 @@ entry: %conv5 = fptosi float %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromRegsConvftoi -; P9LE-LABEL: fromRegsConvftoi -; P8BE-LABEL: fromRegsConvftoi -; P8LE-LABEL: fromRegsConvftoi -; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvftoi() { +; P9BE-LABEL: fromDiffConstsConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI17_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI17_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI17_0@toc@l +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI17_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 24, i32 234, i32 988, i32 422> -; P9BE-LABEL: fromDiffConstsConvftoi -; P9LE-LABEL: fromDiffConstsConvftoi -; P8BE-LABEL: fromDiffConstsConvftoi -; P8LE-LABEL: fromDiffConstsConvftoi -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lvx -; P8LE-NOT: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvftoi(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xvcvspsxws v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xvcvspsxws v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvw4x vs0, 0, r3 +; P8BE-NEXT: xvcvspsxws v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xvcvspsxws v2, v2 +; P8LE-NEXT: blr entry: %0 = bitcast float* %ptr to <4 x float>* %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = fptosi <4 x float> %1 to <4 x i32> ret <4 x i32> %2 -; P9BE-LABEL: fromDiffMemConsAConvftoi -; P9LE-LABEL: fromDiffMemConsAConvftoi -; P8BE-LABEL: fromDiffMemConsAConvftoi -; P8LE-LABEL: fromDiffMemConsAConvftoi -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE: xvcvspsxws v2, [[REG1]] -; P9BE: blr -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE: xvcvspsxws v2, [[REG1]] -; P9LE: blr -; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 -; P8BE: xvcvspsxws v2, [[REG1]] -; P8BE: blr -; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 -; P8LE: xxswapd -; P8LE: xvcvspsxws v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: addis r3, r2, .LCPI19_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI19_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: xvcvspsxws v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI19_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI19_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: xvcvspsxws v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r4, r2, .LCPI19_0@toc@ha +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: addi r4, r4, .LCPI19_0@toc@l +; P8BE-NEXT: lxvw4x v3, 0, r4 +; P8BE-NEXT: vperm v2, v2, v2, v3 +; P8BE-NEXT: xvcvspsxws v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI19_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI19_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: xvcvspsxws v2, v2 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 @@ -1330,35 +1574,69 @@ entry: %conv8 = fptosi float %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 -; P9BE-LABEL: fromDiffMemConsDConvftoi -; P9LE-LABEL: fromDiffMemConsDConvftoi -; P8BE-LABEL: fromDiffMemConsDConvftoi -; P8LE-LABEL: fromDiffMemConsDConvftoi -; P9BE: lxv -; P9BE: lxv -; P9BE: vperm -; P9BE: xvcvspsxws -; P9BE: blr -; P9LE: lxv -; P9LE: lxv -; P9LE: vperm -; P9LE: xvcvspsxws -; P9LE: blr -; P8BE: lxvw4x -; P8BE: lxvw4x -; P8BE: vperm -; P8BE: xvcvspsxws -; P8BE: blr -; P8LE: lxvd2x -; P8LE-DAG: lvx -; P8LE: xxswapd -; P8LE: vperm -; P8LE: xvcvspsxws -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvftoi(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, 12(r3) +; P9BE-NEXT: lfs f2, 4(r3) +; P9BE-NEXT: xxmrghd vs1, vs2, vs1 +; P9BE-NEXT: xvcvdpsp v2, vs1 +; P9BE-NEXT: lfs f1, 8(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsp v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: xvcvspsxws v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, 8(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: lfs f1, 12(r3) +; P9LE-NEXT: xvcvdpsp v2, vs0 +; P9LE-NEXT: lfs f0, 4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsp v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: xvcvspsxws v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, 12(r3) +; P8BE-NEXT: lfs f2, 4(r3) +; P8BE-NEXT: lfs f3, 8(r3) +; P8BE-NEXT: xxmrghd vs1, vs2, vs1 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpsp v2, vs1 +; P8BE-NEXT: xvcvdpsp v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: xvcvspsxws v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, 8(r3) +; P8LE-NEXT: lfs f2, 4(r3) +; P8LE-NEXT: lfs f3, 12(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsp v2, vs0 +; P8LE-NEXT: xvcvdpsp v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: xvcvspsxws v2, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -1384,19 +1662,70 @@ entry: %conv13 = fptosi float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarAConvftoi -; P9LE-LABEL: fromDiffMemVarAConvftoi -; P8BE-LABEL: fromDiffMemVarAConvftoi -; P8LE-LABEL: fromDiffMemVarAConvftoi ; FIXME: implement finding consecutive loads with pre-inc -; P9BE: lfsux -; P9LE: lfsux -; P8BE: lfsux -; P8LE: lfsux } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvftoi(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, -12(r3) +; P9BE-NEXT: lfs f2, -4(r3) +; P9BE-NEXT: xxmrghd vs1, vs2, vs1 +; P9BE-NEXT: xvcvdpsp v2, vs1 +; P9BE-NEXT: lfs f1, -8(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsp v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: xvcvspsxws v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, -8(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: lfs f1, -12(r3) +; P9LE-NEXT: xvcvdpsp v2, vs0 +; P9LE-NEXT: lfs f0, -4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsp v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: xvcvspsxws v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, -12(r3) +; P8BE-NEXT: lfs f2, -4(r3) +; P8BE-NEXT: lfs f3, -8(r3) +; P8BE-NEXT: xxmrghd vs1, vs2, vs1 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpsp v2, vs1 +; P8BE-NEXT: xvcvdpsp v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: xvcvspsxws v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, -8(r3) +; P8LE-NEXT: lfs f2, -4(r3) +; P8LE-NEXT: lfs f3, -12(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsp v2, vs0 +; P8LE-NEXT: xvcvdpsp v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: xvcvspsxws v2, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -1422,86 +1751,154 @@ entry: %conv13 = fptosi float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarDConvftoi -; P9LE-LABEL: fromDiffMemVarDConvftoi -; P8BE-LABEL: fromDiffMemVarDConvftoi -; P8LE-LABEL: fromDiffMemVarDConvftoi ; FIXME: implement finding consecutive loads with pre-inc -; P9BE: lfsux -; P9LE: lfsux -; P8BE: lfsux -; P8LE: lfsux } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvftoi(float %val) { +; P9BE-LABEL: spltRegValConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpsxws f0, f1 +; P9BE-NEXT: xxspltw v2, vs0, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpsxws f0, f1 +; P9LE-NEXT: xxspltw v2, vs0, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpsxws f0, f1 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpsxws f0, f1 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %conv = fptosi float %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltRegValConvftoi -; P9LE-LABEL: spltRegValConvftoi -; P8BE-LABEL: spltRegValConvftoi -; P8LE-LABEL: spltRegValConvftoi -; P9BE: xscvdpsxws f[[REG1:[0-9]+]], f1 -; P9BE: xxspltw v2, vs[[REG1]], 1 -; P9BE: blr -; P9LE: xscvdpsxws f[[REG1:[0-9]+]], f1 -; P9LE: xxspltw v2, vs[[REG1]], 1 -; P9LE: blr -; P8BE: xscvdpsxws f[[REG1:[0-9]+]], f1 -; P8BE: xxspltw v2, vs[[REG1]], 1 -; P8BE: blr -; P8LE: xscvdpsxws f[[REG1:[0-9]+]], f1 -; P8LE: xxspltw v2, vs[[REG1]], 1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxvwsx vs0, 0, r3 +; P9BE-NEXT: xvcvspsxws v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvwsx vs0, 0, r3 +; P9LE-NEXT: xvcvspsxws v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvftoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: xscvdpsxws f0, f0 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvftoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: xscvdpsxws f0, f0 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 %conv = fptosi float %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltMemValConvftoi -; P9LE-LABEL: spltMemValConvftoi -; P8BE-LABEL: spltMemValConvftoi -; P8LE-LABEL: spltMemValConvftoi -; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3 -; P9BE: xvcvspsxws v2, [[REG1]] -; P9LE: [[REG1:[vs0-9]+]], 0, r3 -; P9LE: xvcvspsxws v2, [[REG1]] -; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 -; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]] -; P8BE: xxspltw v2, vs[[REG2]], 1 -; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 -; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]] -; P8LE: xxspltw v2, vs[[REG2]], 1 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvdtoi() { +; P9BE-LABEL: spltCnstConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, 4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, 4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, 4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, 4 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; P9BE-LABEL: spltCnstConvdtoi -; P9LE-LABEL: spltCnstConvdtoi -; P8BE-LABEL: spltCnstConvdtoi -; P8LE-LABEL: spltCnstConvdtoi -; P9BE: vspltisw v2, 4 -; P9BE: blr -; P9LE: vspltisw v2, 4 -; P9LE: blr -; P8BE: vspltisw v2, 4 -; P8BE: blr -; P8LE: vspltisw v2, 4 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { +; P9BE-LABEL: fromRegsConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: xxmrghd vs0, vs2, vs4 +; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xvcvdpsxws v2, vs0 +; P9BE-NEXT: xxmrghd vs0, vs1, vs3 +; P9BE-NEXT: xvcvdpsxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: xvcvdpsxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs4, vs2 +; P9LE-NEXT: xvcvdpsxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: xxmrghd vs0, vs2, vs4 +; P8BE-NEXT: xxmrghd vs1, vs1, vs3 +; P8BE-NEXT: xvcvdpsxws v2, vs0 +; P8BE-NEXT: xvcvdpsxws v3, vs1 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: xxmrghd vs0, vs3, vs1 +; P8LE-NEXT: xxmrghd vs1, vs4, vs2 +; P8LE-NEXT: xvcvdpsxws v2, vs0 +; P8LE-NEXT: xvcvdpsxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %conv = fptosi double %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 @@ -1512,53 +1909,90 @@ entry: %conv5 = fptosi double %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromRegsConvdtoi -; P9LE-LABEL: fromRegsConvdtoi -; P8BE-LABEL: fromRegsConvdtoi -; P8LE-LABEL: fromRegsConvdtoi -; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvdtoi() { +; P9BE-LABEL: fromDiffConstsConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI26_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI26_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI26_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI26_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI26_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI26_0@toc@l +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI26_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI26_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 24, i32 234, i32 988, i32 422> -; P9BE-LABEL: fromDiffConstsConvdtoi -; P9LE-LABEL: fromDiffConstsConvdtoi -; P8BE-LABEL: fromDiffConstsConvdtoi -; P8LE-LABEL: fromDiffConstsConvdtoi -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lvx -; P8LE-NOT: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvdtoi(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: lxv vs1, 16(r3) +; P9BE-NEXT: xxmrgld vs2, vs0, vs1 +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsxws v2, vs2 +; P9BE-NEXT: xvcvdpsxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: lxv vs1, 16(r3) +; P9LE-NEXT: xxmrgld vs2, vs1, vs0 +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsxws v2, vs2 +; P9LE-NEXT: xvcvdpsxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: li r4, 16 +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: lxvd2x vs1, r3, r4 +; P8BE-NEXT: xxmrgld vs2, vs0, vs1 +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpsxws v2, vs2 +; P8BE-NEXT: xvcvdpsxws v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: li r4, 16 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: lxvd2x vs1, r3, r4 +; P8LE-NEXT: xxswapd vs0, vs0 +; P8LE-NEXT: xxswapd vs1, vs1 +; P8LE-NEXT: xxmrgld vs2, vs1, vs0 +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpsxws v2, vs2 +; P8LE-NEXT: xvcvdpsxws v3, vs0 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 @@ -1569,44 +2003,61 @@ entry: %5 = fptosi <2 x double> %4 to <2 x i32> %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x i32> %vecinit9 -; P9BE-LABEL: fromDiffMemConsAConvdtoi -; P9LE-LABEL: fromDiffMemConsAConvdtoi -; P8BE-LABEL: fromDiffMemConsAConvdtoi -; P8LE-LABEL: fromDiffMemConsAConvdtoi -; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) -; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] -; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] -; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] -; P9BE: vmrgew v2, [[REG6]], [[REG5]] -; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) -; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] -; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] -; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] -; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] -; P9LE: vmrgew v2, [[REG6]], [[REG5]] -; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 -; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 -; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] -; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] -; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] -; P8BE: vmrgew v2, [[REG6]], [[REG5]] -; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 -; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 -; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] -; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] -; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] -; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] -; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]] -; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]] -; P8LE: vmrgew v2, [[REG8]], [[REG7]] } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 24(r3) +; P9BE-NEXT: lfd f1, 16(r3) +; P9BE-NEXT: lfd f2, 8(r3) +; P9BE-NEXT: lfd f3, 0(r3) +; P9BE-NEXT: xxmrghd vs1, vs1, vs3 +; P9BE-NEXT: xxmrghd vs0, vs0, vs2 +; P9BE-NEXT: xvcvdpsxws v2, vs1 +; P9BE-NEXT: xvcvdpsxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 24(r3) +; P9LE-NEXT: lfd f2, 8(r3) +; P9LE-NEXT: lfd f1, 16(r3) +; P9LE-NEXT: lfd f3, 0(r3) +; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: xvcvdpsxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: xvcvdpsxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f3, 0, r3 +; P8BE-NEXT: lfd f0, 24(r3) +; P8BE-NEXT: lfd f1, 8(r3) +; P8BE-NEXT: lfd f2, 16(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xxmrghd vs1, vs2, vs3 +; P8BE-NEXT: xvcvdpsxws v2, vs0 +; P8BE-NEXT: xvcvdpsxws v3, vs1 +; P8BE-NEXT: vmrgew v2, v2, v3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f3, 0, r3 +; P8LE-NEXT: lfd f0, 24(r3) +; P8LE-NEXT: lfd f1, 8(r3) +; P8LE-NEXT: lfd f2, 16(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsxws v2, vs0 +; P8LE-NEXT: xvcvdpsxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 @@ -1624,50 +2075,65 @@ entry: %conv8 = fptosi double %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 -; P9BE-LABEL: fromDiffMemConsDConvdtoi -; P9LE-LABEL: fromDiffMemConsDConvdtoi -; P8BE-LABEL: fromDiffMemConsDConvdtoi -; P8LE-LABEL: fromDiffMemConsDConvdtoi -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: xxmrghd -; P9BE: xxmrghd -; P9BE: xvcvdpsxws -; P9BE: xvcvdpsxws -; P9BE: vmrgew v2 -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: xxmrghd -; P9LE: xvcvdpsxws -; P9LE: xxmrghd -; P9LE: xvcvdpsxws -; P9LE: vmrgew v2 -; P8BE: lfdx -; P8BE: lfd -; P8BE: lfd -; P8BE: lfd -; P8BE: xxmrghd -; P8BE: xxmrghd -; P8BE: xvcvdpsxws -; P8BE: xvcvdpsxws -; P8BE: vmrgew v2 -; P8LE: lfdx -; P8LE: lfd -; P8LE: lfd -; P8LE: lfd -; P8LE: xxmrghd -; P8LE: xxmrghd -; P8LE: xvcvdpsxws -; P8LE: xvcvdpsxws -; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvdtoi(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lfdux f0, r3, r4 +; P9BE-NEXT: lfd f1, 8(r3) +; P9BE-NEXT: lfd f2, 16(r3) +; P9BE-NEXT: lfd f3, 24(r3) +; P9BE-NEXT: xxmrghd vs1, vs1, vs3 +; P9BE-NEXT: xxmrghd vs0, vs0, vs2 +; P9BE-NEXT: xvcvdpsxws v2, vs1 +; P9BE-NEXT: xvcvdpsxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lfdux f0, r3, r4 +; P9LE-NEXT: lfd f2, 16(r3) +; P9LE-NEXT: lfd f1, 8(r3) +; P9LE-NEXT: lfd f3, 24(r3) +; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: xvcvdpsxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: xvcvdpsxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lfdux f0, r3, r4 +; P8BE-NEXT: lfd f1, 8(r3) +; P8BE-NEXT: lfd f2, 24(r3) +; P8BE-NEXT: lfd f3, 16(r3) +; P8BE-NEXT: xxmrghd vs1, vs1, vs2 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpsxws v2, vs1 +; P8BE-NEXT: xvcvdpsxws v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lfdux f0, r3, r4 +; P8LE-NEXT: lfd f1, 16(r3) +; P8LE-NEXT: lfd f2, 8(r3) +; P8LE-NEXT: lfd f3, 24(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsxws v2, vs0 +; P8LE-NEXT: xvcvdpsxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -1693,50 +2159,65 @@ entry: %conv13 = fptosi double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarAConvdtoi -; P9LE-LABEL: fromDiffMemVarAConvdtoi -; P8BE-LABEL: fromDiffMemVarAConvdtoi -; P8LE-LABEL: fromDiffMemVarAConvdtoi -; P9BE: lfdux -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: xxmrghd -; P9BE: xxmrghd -; P9BE: xvcvdpsxws -; P9BE: xvcvdpsxws -; P9BE: vmrgew v2 -; P9LE: lfdux -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: xxmrghd -; P9LE: xvcvdpsxws -; P9LE: xxmrghd -; P9LE: xvcvdpsxws -; P9LE: vmrgew v2 -; P8BE: lfdux -; P8BE: lfd -; P8BE: lfd -; P8BE: lfd -; P8BE: xxmrghd -; P8BE: xxmrghd -; P8BE: xvcvdpsxws -; P8BE: xvcvdpsxws -; P8BE: vmrgew v2 -; P8LE: lfdux -; P8LE: lfd -; P8LE: lfd -; P8LE: lfd -; P8LE: xxmrghd -; P8LE: xxmrghd -; P8LE: xvcvdpsxws -; P8LE: xvcvdpsxws -; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvdtoi(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lfdux f0, r3, r4 +; P9BE-NEXT: lfd f1, -8(r3) +; P9BE-NEXT: lfd f2, -16(r3) +; P9BE-NEXT: lfd f3, -24(r3) +; P9BE-NEXT: xxmrghd vs1, vs1, vs3 +; P9BE-NEXT: xxmrghd vs0, vs0, vs2 +; P9BE-NEXT: xvcvdpsxws v2, vs1 +; P9BE-NEXT: xvcvdpsxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lfdux f0, r3, r4 +; P9LE-NEXT: lfd f2, -16(r3) +; P9LE-NEXT: lfd f1, -8(r3) +; P9LE-NEXT: lfd f3, -24(r3) +; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: xvcvdpsxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: xvcvdpsxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lfdux f0, r3, r4 +; P8BE-NEXT: lfd f1, -8(r3) +; P8BE-NEXT: lfd f2, -24(r3) +; P8BE-NEXT: lfd f3, -16(r3) +; P8BE-NEXT: xxmrghd vs1, vs1, vs2 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpsxws v2, vs1 +; P8BE-NEXT: xvcvdpsxws v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lfdux f0, r3, r4 +; P8LE-NEXT: lfd f1, -16(r3) +; P8LE-NEXT: lfd f2, -8(r3) +; P8LE-NEXT: lfd f3, -24(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsxws v2, vs0 +; P8LE-NEXT: xvcvdpsxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -1762,255 +2243,305 @@ entry: %conv13 = fptosi double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarDConvdtoi -; P9LE-LABEL: fromDiffMemVarDConvdtoi -; P8BE-LABEL: fromDiffMemVarDConvdtoi -; P8LE-LABEL: fromDiffMemVarDConvdtoi -; P9BE: lfdux -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: xxmrghd -; P9BE: xxmrghd -; P9BE: xvcvdpsxws -; P9BE: xvcvdpsxws -; P9BE: vmrgew v2 -; P9LE: lfdux -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: xxmrghd -; P9LE: xvcvdpsxws -; P9LE: xxmrghd -; P9LE: xvcvdpsxws -; P9LE: vmrgew v2 -; P8BE: lfdux -; P8BE: lfd -; P8BE: lfd -; P8BE: lfd -; P8BE: xxmrghd -; P8BE: xxmrghd -; P8BE: xvcvdpsxws -; P8BE: xvcvdpsxws -; P8BE: vmrgew v2 -; P8LE: lfdux -; P8LE: lfd -; P8LE: lfd -; P8LE: lfd -; P8LE: xxmrghd -; P8LE: xxmrghd -; P8LE: xvcvdpsxws -; P8LE: xvcvdpsxws -; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvdtoi(double %val) { +; P9BE-LABEL: spltRegValConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpsxws f0, f1 +; P9BE-NEXT: xxspltw v2, vs0, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpsxws f0, f1 +; P9LE-NEXT: xxspltw v2, vs0, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpsxws f0, f1 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpsxws f0, f1 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %conv = fptosi double %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltRegValConvdtoi -; P9LE-LABEL: spltRegValConvdtoi -; P8BE-LABEL: spltRegValConvdtoi -; P8LE-LABEL: spltRegValConvdtoi -; P9BE: xscvdpsxws -; P9BE: xxspltw -; P9BE: blr -; P9LE: xscvdpsxws -; P9LE: xxspltw -; P9LE: blr -; P8BE: xscvdpsxws -; P8BE: xxspltw -; P8BE: blr -; P8LE: xscvdpsxws -; P8LE: xxspltw -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvdtoi(double* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvdtoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xscvdpsxws f0, f0 +; P9BE-NEXT: xxspltw v2, vs0, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvdtoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xscvdpsxws f0, f0 +; P9LE-NEXT: xxspltw v2, vs0, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvdtoi: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xscvdpsxws f0, f0 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvdtoi: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xscvdpsxws f0, f0 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %0 = load double, double* %ptr, align 8 %conv = fptosi double %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltMemValConvdtoi -; P9LE-LABEL: spltMemValConvdtoi -; P8BE-LABEL: spltMemValConvdtoi -; P8LE-LABEL: spltMemValConvdtoi -; P9BE: lfd -; P9BE: xscvdpsxws -; P9BE: xxspltw -; P9BE: blr -; P9LE: lfd -; P9LE: xscvdpsxws -; P9LE: xxspltw -; P9LE: blr -; P8BE: lfdx -; P8BE: xscvdpsxws -; P8BE: xxspltw -; P8BE: blr -; P8LE: lfdx -; P8LE: xscvdpsxws -; P8LE: xxspltw -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allZeroui() { +; P9BE-LABEL: allZeroui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxlxor v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allZeroui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxlxor v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allZeroui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxlxor v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allZeroui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxlxor v2, v2, v2 +; P8LE-NEXT: blr entry: ret <4 x i32> zeroinitializer -; P9BE-LABEL: allZeroui -; P9LE-LABEL: allZeroui -; P8BE-LABEL: allZeroui -; P8LE-LABEL: allZeroui -; P9BE: xxlxor v2, v2, v2 -; P9BE: blr -; P9LE: xxlxor v2, v2, v2 -; P9LE: blr -; P8BE: xxlxor v2, v2, v2 -; P8BE: blr -; P8LE: xxlxor v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allOneui() { +; P9BE-LABEL: allOneui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxspltib v2, 255 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allOneui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxspltib v2, 255 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allOneui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisb v2, -1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allOneui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisb v2, -1 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> -; P9BE-LABEL: allOneui -; P9LE-LABEL: allOneui -; P8BE-LABEL: allOneui -; P8LE-LABEL: allOneui -; P9BE: xxspltib v2, 255 -; P9BE: blr -; P9LE: xxspltib v2, 255 -; P9LE: blr -; P8BE: vspltisb v2, -1 -; P8BE: blr -; P8LE: vspltisb v2, -1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst1ui() { +; P9BE-LABEL: spltConst1ui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst1ui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst1ui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst1ui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, 1 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 1, i32 1, i32 1, i32 1> -; P9BE-LABEL: spltConst1ui -; P9LE-LABEL: spltConst1ui -; P8BE-LABEL: spltConst1ui -; P8LE-LABEL: spltConst1ui -; P9BE: vspltisw v2, 1 -; P9BE: blr -; P9LE: vspltisw v2, 1 -; P9LE: blr -; P8BE: vspltisw v2, 1 -; P8BE: blr -; P8LE: vspltisw v2, 1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst16kui() { +; P9BE-LABEL: spltConst16kui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, -15 +; P9BE-NEXT: vsrw v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst16kui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, -15 +; P9LE-NEXT: vsrw v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst16kui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, -15 +; P8BE-NEXT: vsrw v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst16kui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, -15 +; P8LE-NEXT: vsrw v2, v2, v2 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767> -; P9BE-LABEL: spltConst16kui -; P9LE-LABEL: spltConst16kui -; P8BE-LABEL: spltConst16kui -; P8LE-LABEL: spltConst16kui -; P9BE: vspltisw v2, -15 -; P9BE: vsrw v2, v2, v2 -; P9BE: blr -; P9LE: vspltisw v2, -15 -; P9LE: vsrw v2, v2, v2 -; P9LE: blr -; P8BE: vspltisw v2, -15 -; P8BE: vsrw v2, v2, v2 -; P8BE: blr -; P8LE: vspltisw v2, -15 -; P8LE: vsrw v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst32kui() { +; P9BE-LABEL: spltConst32kui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, -16 +; P9BE-NEXT: vsrw v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst32kui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, -16 +; P9LE-NEXT: vsrw v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst32kui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, -16 +; P8BE-NEXT: vsrw v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst32kui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, -16 +; P8LE-NEXT: vsrw v2, v2, v2 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> -; P9BE-LABEL: spltConst32kui -; P9LE-LABEL: spltConst32kui -; P8BE-LABEL: spltConst32kui -; P8LE-LABEL: spltConst32kui -; P9BE: vspltisw v2, -16 -; P9BE: vsrw v2, v2, v2 -; P9BE: blr -; P9LE: vspltisw v2, -16 -; P9LE: vsrw v2, v2, v2 -; P9LE: blr -; P8BE: vspltisw v2, -16 -; P8BE: vsrw v2, v2, v2 -; P8BE: blr -; P8LE: vspltisw v2, -16 -; P8LE: vsrw v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32 zeroext %d) { +; P9BE-LABEL: fromRegsui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: rldimi r6, r5, 32, 0 +; P9BE-NEXT: rldimi r4, r3, 32, 0 +; P9BE-NEXT: mtvsrdd v2, r4, r6 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: rldimi r3, r4, 32, 0 +; P9LE-NEXT: rldimi r5, r6, 32, 0 +; P9LE-NEXT: mtvsrdd v2, r5, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: rldimi r6, r5, 32, 0 +; P8BE-NEXT: rldimi r4, r3, 32, 0 +; P8BE-NEXT: mtvsrd f0, r6 +; P8BE-NEXT: mtvsrd f1, r4 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: rldimi r3, r4, 32, 0 +; P8LE-NEXT: rldimi r5, r6, 32, 0 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd f1, r5 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3 ret <4 x i32> %vecinit3 -; P9BE-LABEL: fromRegsui -; P9LE-LABEL: fromRegsui -; P8BE-LABEL: fromRegsui -; P8LE-LABEL: fromRegsui -; P9BE-DAG: rldimi r6, r5, 32, 0 -; P9BE-DAG: rldimi r4, r3, 32, 0 -; P9BE: mtvsrdd v2, r4, r6 -; P9BE: blr -; P9LE-DAG: rldimi r3, r4, 32, 0 -; P9LE-DAG: rldimi r5, r6, 32, 0 -; P9LE: mtvsrdd v2, r5, r3 -; P9LE: blr -; P8BE-DAG: rldimi r6, r5, 32, 0 -; P8BE-DAG: rldimi r4, r3, 32, 0 -; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6 -; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4 -; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]] -; P8BE: blr -; P8LE-DAG: rldimi r3, r4, 32, 0 -; P8LE-DAG: rldimi r5, r6, 32, 0 -; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3 -; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5 -; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]] -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsui() { +; P9BE-LABEL: fromDiffConstsui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI39_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI39_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI39_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI39_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI39_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI39_0@toc@l +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI39_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI39_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19> -; P9BE-LABEL: fromDiffConstsui -; P9LE-LABEL: fromDiffConstsui -; P8BE-LABEL: fromDiffConstsui -; P8LE-LABEL: fromDiffConstsui -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lvx -; P8LE-NOT: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAui(i32* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsAui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %0 = load i32, i32* %arr, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -2024,23 +2555,46 @@ entry: %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromDiffMemConsAui -; P9LE-LABEL: fromDiffMemConsAui -; P8BE-LABEL: fromDiffMemConsAui -; P8LE-LABEL: fromDiffMemConsAui -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsDui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: addis r3, r2, .LCPI41_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI41_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI41_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r4, r2, .LCPI41_0@toc@ha +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: addi r4, r4, .LCPI41_0@toc@l +; P8BE-NEXT: lxvw4x v3, 0, r4 +; P8BE-NEXT: vperm v2, v2, v2, v3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI41_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI41_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 %0 = load i32, i32* %arrayidx, align 4 @@ -2054,32 +2608,34 @@ entry: %3 = load i32, i32* %arr, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromDiffMemConsDui -; P9LE-LABEL: fromDiffMemConsDui -; P8BE-LABEL: fromDiffMemConsDui -; P8LE-LABEL: fromDiffMemConsDui -; P9BE: lxv -; P9BE: lxv -; P9BE: vperm -; P9BE: blr -; P9LE: lxv -; P9LE: lxv -; P9LE: vperm -; P9LE: blr -; P8BE: lxvw4x -; P8BE: lxvw4x -; P8BE: vperm -; P8BE: blr -; P8LE: lxvd2x -; P8LE-DAG: lvx -; P8LE-NOT: xxswapd -; P8LE: xxswapd -; P8LE: vperm -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAui(i32* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lxvx v2, r3, r4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lxvx v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lxvw4x v2, r3, r4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lxvd2x vs0, r3, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom @@ -2101,27 +2657,58 @@ entry: %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 -; P9BE-LABEL: fromDiffMemVarAui -; P9LE-LABEL: fromDiffMemVarAui -; P8BE-LABEL: fromDiffMemVarAui -; P8LE-LABEL: fromDiffMemVarAui -; P9BE: sldi r4, r4, 2 -; P9BE: lxvx v2, r3, r4 -; P9BE: blr -; P9LE: sldi r4, r4, 2 -; P9LE: lxvx v2, r3, r4 -; P9LE: blr -; P8BE: sldi r4, r4, 2 -; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4 -; P8BE: blr -; P8LE: sldi r4, r4, 2 -; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4 -; P8LE: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addi r3, r3, -12 +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: addis r3, r2, .LCPI43_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI43_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: addi r3, r3, -12 +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: addis r3, r2, .LCPI43_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI43_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: addis r5, r2, .LCPI43_0@toc@ha +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: addi r4, r5, .LCPI43_0@toc@l +; P8BE-NEXT: addi r3, r3, -12 +; P8BE-NEXT: lxvw4x v3, 0, r4 +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: vperm v2, v2, v2, v3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: addis r5, r2, .LCPI43_0@toc@ha +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r3, r3, -12 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addi r3, r5, .LCPI43_0@toc@l +; P8LE-NEXT: lvx v3, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: vperm v2, v2, v2, v3 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom @@ -2143,36 +2730,57 @@ entry: %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 -; P9BE-LABEL: fromDiffMemVarDui -; P9LE-LABEL: fromDiffMemVarDui -; P8BE-LABEL: fromDiffMemVarDui -; P8LE-LABEL: fromDiffMemVarDui -; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P9BE-DAG: addi r3, r3, -12 -; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3 -; P9BE-DAG: lxvx -; P9BE: vperm -; P9BE: blr -; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P9LE-DAG: addi r3, r3, -12 -; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3 -; P9LE-DAG: lxv -; P9LE: vperm -; P9LE: blr -; P8BE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3 -; P8BE-DAG: lxvw4x -; P8BE: vperm -; P8BE: blr -; P8LE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P8LE-DAG: lvx -; P8LE-DAG: lvx -; P8LE: vperm -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) { +; P9BE-LABEL: fromRandMemConsui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lwz r4, 16(r3) +; P9BE-NEXT: lwz r5, 72(r3) +; P9BE-NEXT: lwz r6, 8(r3) +; P9BE-NEXT: lwz r3, 352(r3) +; P9BE-NEXT: rldimi r3, r6, 32, 0 +; P9BE-NEXT: rldimi r5, r4, 32, 0 +; P9BE-NEXT: mtvsrdd v2, r5, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemConsui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lwz r4, 16(r3) +; P9LE-NEXT: lwz r5, 72(r3) +; P9LE-NEXT: lwz r6, 8(r3) +; P9LE-NEXT: lwz r3, 352(r3) +; P9LE-NEXT: rldimi r4, r5, 32, 0 +; P9LE-NEXT: rldimi r6, r3, 32, 0 +; P9LE-NEXT: mtvsrdd v2, r6, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemConsui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lwz r4, 8(r3) +; P8BE-NEXT: lwz r5, 352(r3) +; P8BE-NEXT: lwz r6, 16(r3) +; P8BE-NEXT: lwz r3, 72(r3) +; P8BE-NEXT: rldimi r5, r4, 32, 0 +; P8BE-NEXT: rldimi r3, r6, 32, 0 +; P8BE-NEXT: mtvsrd f0, r5 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemConsui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lwz r4, 16(r3) +; P8LE-NEXT: lwz r5, 72(r3) +; P8LE-NEXT: lwz r6, 8(r3) +; P8LE-NEXT: lwz r3, 352(r3) +; P8LE-NEXT: rldimi r4, r5, 32, 0 +; P8LE-NEXT: rldimi r6, r3, 32, 0 +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4 %0 = load i32, i32* %arrayidx, align 4 @@ -2187,46 +2795,65 @@ entry: %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromRandMemConsui -; P9LE-LABEL: fromRandMemConsui -; P8BE-LABEL: fromRandMemConsui -; P8LE-LABEL: fromRandMemConsui -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: rldimi -; P9BE: rldimi -; P9BE: mtvsrdd -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: rldimi -; P9LE: rldimi -; P9LE: mtvsrdd -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: rldimi -; P8BE: rldimi -; P8BE: mtvsrd -; P8BE: mtvsrd -; P8BE: xxmrghd -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: rldimi -; P8LE: rldimi -; P8LE: mtvsrd -; P8LE: mtvsrd -; P8LE: xxmrghd } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromRandMemVarui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: lwz r4, 16(r3) +; P9BE-NEXT: lwz r5, 4(r3) +; P9BE-NEXT: lwz r6, 8(r3) +; P9BE-NEXT: lwz r3, 32(r3) +; P9BE-NEXT: rldimi r3, r6, 32, 0 +; P9BE-NEXT: rldimi r5, r4, 32, 0 +; P9BE-NEXT: mtvsrdd v2, r5, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemVarui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: lwz r4, 16(r3) +; P9LE-NEXT: lwz r5, 4(r3) +; P9LE-NEXT: lwz r6, 8(r3) +; P9LE-NEXT: lwz r3, 32(r3) +; P9LE-NEXT: rldimi r4, r5, 32, 0 +; P9LE-NEXT: rldimi r6, r3, 32, 0 +; P9LE-NEXT: mtvsrdd v2, r6, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemVarui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: lwz r4, 8(r3) +; P8BE-NEXT: lwz r5, 32(r3) +; P8BE-NEXT: lwz r6, 16(r3) +; P8BE-NEXT: lwz r3, 4(r3) +; P8BE-NEXT: rldimi r5, r4, 32, 0 +; P8BE-NEXT: rldimi r3, r6, 32, 0 +; P8BE-NEXT: mtvsrd f0, r5 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemVarui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: lwz r4, 16(r3) +; P8LE-NEXT: lwz r5, 4(r3) +; P8LE-NEXT: lwz r6, 8(r3) +; P8LE-NEXT: lwz r3, 32(r3) +; P8LE-NEXT: rldimi r4, r5, 32, 0 +; P8LE-NEXT: rldimi r6, r3, 32, 0 +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 @@ -2249,119 +2876,151 @@ entry: %3 = load i32, i32* %arrayidx11, align 4 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3 ret <4 x i32> %vecinit12 -; P9BE-LABEL: fromRandMemVarui -; P9LE-LABEL: fromRandMemVarui -; P8BE-LABEL: fromRandMemVarui -; P8LE-LABEL: fromRandMemVarui -; P9BE: sldi r4, r4, 2 -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: lwz -; P9BE: rldimi -; P9BE: rldimi -; P9BE: mtvsrdd -; P9LE: sldi r4, r4, 2 -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: lwz -; P9LE: rldimi -; P9LE: rldimi -; P9LE: mtvsrdd -; P8BE: sldi r4, r4, 2 -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: lwz -; P8BE: rldimi -; P8BE: rldimi -; P8BE: mtvsrd -; P8BE: mtvsrd -; P8BE: xxmrghd -; P8LE: sldi r4, r4, 2 -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: lwz -; P8LE: rldimi -; P8LE: rldimi -; P8LE: mtvsrd -; P8LE: mtvsrd -; P8LE: xxmrghd } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValui(i32 zeroext %val) { +; P9BE-LABEL: spltRegValui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mtvsrws v2, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mtvsrws v2, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mtvsrwz f0, r3 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mtvsrwz f0, r3 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltRegValui -; P9LE-LABEL: spltRegValui -; P8BE-LABEL: spltRegValui -; P8LE-LABEL: spltRegValui -; P9BE: mtvsrws v2, r3 -; P9BE: blr -; P9LE: mtvsrws v2, r3 -; P9LE: blr -; P8BE: mtvsrwz {{[vsf0-9]+}}, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 -; P8BE: blr -; P8LE: mtvsrwz {{[vsf0-9]+}}, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxspltw v2, vs0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxspltw v2, vs0, 3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxspltw v2, vs0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxspltw v2, vs0, 3 +; P8LE-NEXT: blr entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltMemValui -; P9LE-LABEL: spltMemValui -; P8BE-LABEL: spltMemValui -; P8LE-LABEL: spltMemValui -; P9BE: lfiwzx f0, 0, r3 -; P9BE: xxsldwi vs0, f0, f0, 1 -; P9BE: xxspltw v2, vs0, 0 -; P9BE: blr -; P9LE: lfiwzx f0, 0, r3 -; P9LE: xxpermdi vs0, f0, f0, 2 -; P9LE: xxspltw v2, vs0, 3 -; P9LE: blr -; P8BE: lfiwzx f0, 0, r3 -; P8BE: xxsldwi vs0, f0, f0, 1 -; P8BE: xxspltw v2, vs0, 0 -; P8BE: blr -; P8LE: lfiwzx f0, 0, r3 -; P8LE: xxpermdi vs0, f0, f0, 2 -; P8LE: xxspltw v2, vs0, 3 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvftoui() { +; P9BE-LABEL: spltCnstConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, 4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, 4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, 4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, 4 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; P9BE-LABEL: spltCnstConvftoui -; P9LE-LABEL: spltCnstConvftoui -; P8BE-LABEL: spltCnstConvftoui -; P8LE-LABEL: spltCnstConvftoui -; P9BE: vspltisw v2, 4 -; P9BE: blr -; P9LE: vspltisw v2, 4 -; P9LE: blr -; P8BE: vspltisw v2, 4 -; P8BE: blr -; P8LE: vspltisw v2, 4 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) { +; P9BE-LABEL: fromRegsConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: xxmrghd vs0, vs2, vs4 +; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xvcvdpuxws v2, vs0 +; P9BE-NEXT: xxmrghd vs0, vs1, vs3 +; P9BE-NEXT: xvcvdpuxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: xvcvdpuxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs4, vs2 +; P9LE-NEXT: xvcvdpuxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: xxmrghd vs0, vs2, vs4 +; P8BE-NEXT: xxmrghd vs1, vs1, vs3 +; P8BE-NEXT: xvcvdpuxws v2, vs0 +; P8BE-NEXT: xvcvdpuxws v3, vs1 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: xxmrghd vs0, vs3, vs1 +; P8LE-NEXT: xxmrghd vs1, vs4, vs2 +; P8LE-NEXT: xvcvdpuxws v2, vs0 +; P8LE-NEXT: xvcvdpuxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %conv = fptoui float %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 @@ -2372,79 +3031,116 @@ entry: %conv5 = fptoui float %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromRegsConvftoui -; P9LE-LABEL: fromRegsConvftoui -; P8BE-LABEL: fromRegsConvftoui -; P8LE-LABEL: fromRegsConvftoui -; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvftoui() { +; P9BE-LABEL: fromDiffConstsConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI50_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI50_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI50_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI50_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI50_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI50_0@toc@l +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI50_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI50_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 24, i32 234, i32 988, i32 422> -; P9BE-LABEL: fromDiffConstsConvftoui -; P9LE-LABEL: fromDiffConstsConvftoui -; P8BE-LABEL: fromDiffConstsConvftoui -; P8LE-LABEL: fromDiffConstsConvftoui -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lvx -; P8LE-NOT: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvftoui(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xvcvspuxws v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xvcvspuxws v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvw4x vs0, 0, r3 +; P8BE-NEXT: xvcvspuxws v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xvcvspuxws v2, v2 +; P8LE-NEXT: blr entry: %0 = bitcast float* %ptr to <4 x float>* %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = fptoui <4 x float> %1 to <4 x i32> ret <4 x i32> %2 -; P9BE-LABEL: fromDiffMemConsAConvftoui -; P9LE-LABEL: fromDiffMemConsAConvftoui -; P8BE-LABEL: fromDiffMemConsAConvftoui -; P8LE-LABEL: fromDiffMemConsAConvftoui -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE: xvcvspuxws v2, [[REG1]] -; P9BE: blr -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE: xvcvspuxws v2, [[REG1]] -; P9LE: blr -; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 -; P8BE: xvcvspuxws v2, [[REG1]] -; P8BE: blr -; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 -; P8LE: xxswapd v2, [[REG1]] -; P8LE: xvcvspuxws v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: addis r3, r2, .LCPI52_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI52_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: xvcvspuxws v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI52_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI52_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: xvcvspuxws v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r4, r2, .LCPI52_0@toc@ha +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: addi r4, r4, .LCPI52_0@toc@l +; P8BE-NEXT: lxvw4x v3, 0, r4 +; P8BE-NEXT: vperm v2, v2, v2, v3 +; P8BE-NEXT: xvcvspuxws v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI52_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI52_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: xvcvspuxws v2, v2 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 @@ -2462,35 +3158,69 @@ entry: %conv8 = fptoui float %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 -; P9BE-LABEL: fromDiffMemConsDConvftoui -; P9LE-LABEL: fromDiffMemConsDConvftoui -; P8BE-LABEL: fromDiffMemConsDConvftoui -; P8LE-LABEL: fromDiffMemConsDConvftoui -; P9BE: lxv -; P9BE: lxv -; P9BE: vperm -; P9BE: xvcvspuxws -; P9BE: blr -; P9LE: lxv -; P9LE: lxv -; P9LE: vperm -; P9LE: xvcvspuxws -; P9LE: blr -; P8BE: lxvw4x -; P8BE: lxvw4x -; P8BE: vperm -; P8BE: xvcvspuxws -; P8BE: blr -; P8LE-DAG: lxvd2x -; P8LE-DAG: lvx -; P8LE: xxswapd -; P8LE: vperm -; P8LE: xvcvspuxws -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvftoui(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, 12(r3) +; P9BE-NEXT: lfs f2, 4(r3) +; P9BE-NEXT: xxmrghd vs1, vs2, vs1 +; P9BE-NEXT: xvcvdpsp v2, vs1 +; P9BE-NEXT: lfs f1, 8(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsp v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: xvcvspuxws v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, 8(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: lfs f1, 12(r3) +; P9LE-NEXT: xvcvdpsp v2, vs0 +; P9LE-NEXT: lfs f0, 4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsp v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: xvcvspuxws v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, 12(r3) +; P8BE-NEXT: lfs f2, 4(r3) +; P8BE-NEXT: lfs f3, 8(r3) +; P8BE-NEXT: xxmrghd vs1, vs2, vs1 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpsp v2, vs1 +; P8BE-NEXT: xvcvdpsp v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: xvcvspuxws v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, 8(r3) +; P8LE-NEXT: lfs f2, 4(r3) +; P8LE-NEXT: lfs f3, 12(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsp v2, vs0 +; P8LE-NEXT: xvcvdpsp v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: xvcvspuxws v2, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -2516,19 +3246,70 @@ entry: %conv13 = fptoui float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarAConvftoui -; P9LE-LABEL: fromDiffMemVarAConvftoui -; P8BE-LABEL: fromDiffMemVarAConvftoui -; P8LE-LABEL: fromDiffMemVarAConvftoui ; FIXME: implement finding consecutive loads with pre-inc -; P9BE: lfsux -; P9LE: lfsux -; P8BE: lfsux -; P8LE: lfsux } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvftoui(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, -12(r3) +; P9BE-NEXT: lfs f2, -4(r3) +; P9BE-NEXT: xxmrghd vs1, vs2, vs1 +; P9BE-NEXT: xvcvdpsp v2, vs1 +; P9BE-NEXT: lfs f1, -8(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsp v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: xvcvspuxws v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, -8(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: lfs f1, -12(r3) +; P9LE-NEXT: xvcvdpsp v2, vs0 +; P9LE-NEXT: lfs f0, -4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsp v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: xvcvspuxws v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, -12(r3) +; P8BE-NEXT: lfs f2, -4(r3) +; P8BE-NEXT: lfs f3, -8(r3) +; P8BE-NEXT: xxmrghd vs1, vs2, vs1 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpsp v2, vs1 +; P8BE-NEXT: xvcvdpsp v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: xvcvspuxws v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, -8(r3) +; P8LE-NEXT: lfs f2, -4(r3) +; P8LE-NEXT: lfs f3, -12(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpsp v2, vs0 +; P8LE-NEXT: xvcvdpsp v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: xvcvspuxws v2, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -2554,86 +3335,154 @@ entry: %conv13 = fptoui float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarDConvftoui -; P9LE-LABEL: fromDiffMemVarDConvftoui -; P8BE-LABEL: fromDiffMemVarDConvftoui -; P8LE-LABEL: fromDiffMemVarDConvftoui ; FIXME: implement finding consecutive loads with pre-inc -; P9BE: lfsux -; P9LE: lfsux -; P8BE: lfsux -; P8LE: lfsux } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvftoui(float %val) { +; P9BE-LABEL: spltRegValConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpuxws f0, f1 +; P9BE-NEXT: xxspltw v2, vs0, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpuxws f0, f1 +; P9LE-NEXT: xxspltw v2, vs0, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpuxws f0, f1 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpuxws f0, f1 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %conv = fptoui float %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltRegValConvftoui -; P9LE-LABEL: spltRegValConvftoui -; P8BE-LABEL: spltRegValConvftoui -; P8LE-LABEL: spltRegValConvftoui -; P9BE: xscvdpuxws f[[REG1:[0-9]+]], f1 -; P9BE: xxspltw v2, vs[[REG1]], 1 -; P9BE: blr -; P9LE: xscvdpuxws f[[REG1:[0-9]+]], f1 -; P9LE: xxspltw v2, vs[[REG1]], 1 -; P9LE: blr -; P8BE: xscvdpuxws f[[REG1:[0-9]+]], f1 -; P8BE: xxspltw v2, vs[[REG1]], 1 -; P8BE: blr -; P8LE: xscvdpuxws f[[REG1:[0-9]+]], f1 -; P8LE: xxspltw v2, vs[[REG1]], 1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvftoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxvwsx vs0, 0, r3 +; P9BE-NEXT: xvcvspuxws v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvftoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvwsx vs0, 0, r3 +; P9LE-NEXT: xvcvspuxws v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvftoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: xscvdpuxws f0, f0 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvftoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: xscvdpuxws f0, f0 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 %conv = fptoui float %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltMemValConvftoui -; P9LE-LABEL: spltMemValConvftoui -; P8BE-LABEL: spltMemValConvftoui -; P8LE-LABEL: spltMemValConvftoui -; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3 -; P9BE: xvcvspuxws v2, [[REG1]] -; P9LE: [[REG1:[vs0-9]+]], 0, r3 -; P9LE: xvcvspuxws v2, [[REG1]] -; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 -; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]] -; P8BE: xxspltw v2, vs[[REG2]], 1 -; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 -; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]] -; P8LE: xxspltw v2, vs[[REG2]], 1 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvdtoui() { +; P9BE-LABEL: spltCnstConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: vspltisw v2, 4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: vspltisw v2, 4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisw v2, 4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisw v2, 4 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 4, i32 4, i32 4, i32 4> -; P9BE-LABEL: spltCnstConvdtoui -; P9LE-LABEL: spltCnstConvdtoui -; P8BE-LABEL: spltCnstConvdtoui -; P8LE-LABEL: spltCnstConvdtoui -; P9BE: vspltisw v2, 4 -; P9BE: blr -; P9LE: vspltisw v2, 4 -; P9LE: blr -; P8BE: vspltisw v2, 4 -; P8BE: blr -; P8LE: vspltisw v2, 4 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) { +; P9BE-LABEL: fromRegsConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: xxmrghd vs0, vs2, vs4 +; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xvcvdpuxws v2, vs0 +; P9BE-NEXT: xxmrghd vs0, vs1, vs3 +; P9BE-NEXT: xvcvdpuxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: xvcvdpuxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs4, vs2 +; P9LE-NEXT: xvcvdpuxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: xxmrghd vs0, vs2, vs4 +; P8BE-NEXT: xxmrghd vs1, vs1, vs3 +; P8BE-NEXT: xvcvdpuxws v2, vs0 +; P8BE-NEXT: xvcvdpuxws v3, vs1 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4 +; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3 +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: xxmrghd vs0, vs3, vs1 +; P8LE-NEXT: xxmrghd vs1, vs4, vs2 +; P8LE-NEXT: xvcvdpuxws v2, vs0 +; P8LE-NEXT: xvcvdpuxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %conv = fptoui double %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 @@ -2644,53 +3493,90 @@ entry: %conv5 = fptoui double %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 -; P9BE-LABEL: fromRegsConvdtoui -; P9LE-LABEL: fromRegsConvdtoui -; P8BE-LABEL: fromRegsConvdtoui -; P8LE-LABEL: fromRegsConvdtoui -; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 -; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 -; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] -; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvdtoui() { +; P9BE-LABEL: fromDiffConstsConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI59_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI59_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI59_0@toc@l +; P8BE-NEXT: lxvw4x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI59_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: blr entry: ret <4 x i32> <i32 24, i32 234, i32 988, i32 422> -; P9BE-LABEL: fromDiffConstsConvdtoui -; P9LE-LABEL: fromDiffConstsConvdtoui -; P8BE-LABEL: fromDiffConstsConvdtoui -; P8LE-LABEL: fromDiffConstsConvdtoui -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvw4x -; P8BE: blr -; P8LE: lvx -; P8LE-NOT: xxswapd -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvdtoui(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: lxv vs1, 16(r3) +; P9BE-NEXT: xxmrgld vs2, vs0, vs1 +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpuxws v2, vs2 +; P9BE-NEXT: xvcvdpuxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: lxv vs1, 16(r3) +; P9LE-NEXT: xxmrgld vs2, vs1, vs0 +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpuxws v2, vs2 +; P9LE-NEXT: xvcvdpuxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: li r4, 16 +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: lxvd2x vs1, r3, r4 +; P8BE-NEXT: xxmrgld vs2, vs0, vs1 +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpuxws v2, vs2 +; P8BE-NEXT: xvcvdpuxws v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: li r4, 16 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: lxvd2x vs1, r3, r4 +; P8LE-NEXT: xxswapd vs0, vs0 +; P8LE-NEXT: xxswapd vs1, vs1 +; P8LE-NEXT: xxmrgld vs2, vs1, vs0 +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpuxws v2, vs2 +; P8LE-NEXT: xvcvdpuxws v3, vs0 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 @@ -2701,44 +3587,61 @@ entry: %5 = fptoui <2 x double> %4 to <2 x i32> %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x i32> %vecinit9 -; P9BE-LABEL: fromDiffMemConsAConvdtoui -; P9LE-LABEL: fromDiffMemConsAConvdtoui -; P8BE-LABEL: fromDiffMemConsAConvdtoui -; P8LE-LABEL: fromDiffMemConsAConvdtoui -; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) -; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] -; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] -; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] -; P9BE: vmrgew v2, [[REG6]], [[REG5]] -; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) -; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] -; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] -; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] -; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] -; P9LE: vmrgew v2, [[REG6]], [[REG5]] -; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 -; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 -; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] -; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] -; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] -; P8BE: vmrgew v2, [[REG6]], [[REG5]] -; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 -; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 -; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] -; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] -; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] -; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] -; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]] -; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]] -; P8LE: vmrgew v2, [[REG8]], [[REG7]] } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 24(r3) +; P9BE-NEXT: lfd f1, 16(r3) +; P9BE-NEXT: lfd f2, 8(r3) +; P9BE-NEXT: lfd f3, 0(r3) +; P9BE-NEXT: xxmrghd vs1, vs1, vs3 +; P9BE-NEXT: xxmrghd vs0, vs0, vs2 +; P9BE-NEXT: xvcvdpuxws v2, vs1 +; P9BE-NEXT: xvcvdpuxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 24(r3) +; P9LE-NEXT: lfd f2, 8(r3) +; P9LE-NEXT: lfd f1, 16(r3) +; P9LE-NEXT: lfd f3, 0(r3) +; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: xvcvdpuxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: xvcvdpuxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f3, 0, r3 +; P8BE-NEXT: lfd f0, 24(r3) +; P8BE-NEXT: lfd f1, 8(r3) +; P8BE-NEXT: lfd f2, 16(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xxmrghd vs1, vs2, vs3 +; P8BE-NEXT: xvcvdpuxws v2, vs0 +; P8BE-NEXT: xvcvdpuxws v3, vs1 +; P8BE-NEXT: vmrgew v2, v2, v3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f3, 0, r3 +; P8LE-NEXT: lfd f0, 24(r3) +; P8LE-NEXT: lfd f1, 8(r3) +; P8LE-NEXT: lfd f2, 16(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpuxws v2, vs0 +; P8LE-NEXT: xvcvdpuxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 @@ -2756,50 +3659,65 @@ entry: %conv8 = fptoui double %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 -; P9BE-LABEL: fromDiffMemConsDConvdtoui -; P9LE-LABEL: fromDiffMemConsDConvdtoui -; P8BE-LABEL: fromDiffMemConsDConvdtoui -; P8LE-LABEL: fromDiffMemConsDConvdtoui -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: xxmrghd -; P9BE: xxmrghd -; P9BE: xvcvdpuxws -; P9BE: xvcvdpuxws -; P9BE: vmrgew v2 -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: xxmrghd -; P9LE: xvcvdpuxws -; P9LE: xxmrghd -; P9LE: xvcvdpuxws -; P9LE: vmrgew v2 -; P8BE: lfdx -; P8BE: lfd -; P8BE: lfd -; P8BE: lfd -; P8BE: xxmrghd -; P8BE: xxmrghd -; P8BE: xvcvdpuxws -; P8BE: xvcvdpuxws -; P8BE: vmrgew v2 -; P8LE: lfdx -; P8LE: lfd -; P8LE: lfd -; P8LE: lfd -; P8LE: xxmrghd -; P8LE: xxmrghd -; P8LE: xvcvdpuxws -; P8LE: xvcvdpuxws -; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvdtoui(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lfdux f0, r3, r4 +; P9BE-NEXT: lfd f1, 8(r3) +; P9BE-NEXT: lfd f2, 16(r3) +; P9BE-NEXT: lfd f3, 24(r3) +; P9BE-NEXT: xxmrghd vs1, vs1, vs3 +; P9BE-NEXT: xxmrghd vs0, vs0, vs2 +; P9BE-NEXT: xvcvdpuxws v2, vs1 +; P9BE-NEXT: xvcvdpuxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lfdux f0, r3, r4 +; P9LE-NEXT: lfd f2, 16(r3) +; P9LE-NEXT: lfd f1, 8(r3) +; P9LE-NEXT: lfd f3, 24(r3) +; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: xvcvdpuxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: xvcvdpuxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lfdux f0, r3, r4 +; P8BE-NEXT: lfd f1, 8(r3) +; P8BE-NEXT: lfd f2, 24(r3) +; P8BE-NEXT: lfd f3, 16(r3) +; P8BE-NEXT: xxmrghd vs1, vs1, vs2 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpuxws v2, vs1 +; P8BE-NEXT: xvcvdpuxws v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lfdux f0, r3, r4 +; P8LE-NEXT: lfd f1, 16(r3) +; P8LE-NEXT: lfd f2, 8(r3) +; P8LE-NEXT: lfd f3, 24(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpuxws v2, vs0 +; P8LE-NEXT: xvcvdpuxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -2825,50 +3743,65 @@ entry: %conv13 = fptoui double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarAConvdtoui -; P9LE-LABEL: fromDiffMemVarAConvdtoui -; P8BE-LABEL: fromDiffMemVarAConvdtoui -; P8LE-LABEL: fromDiffMemVarAConvdtoui -; P9BE: lfdux -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: xxmrghd -; P9BE: xxmrghd -; P9BE: xvcvdpuxws -; P9BE: xvcvdpuxws -; P9BE: vmrgew v2 -; P9LE: lfdux -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: xxmrghd -; P9LE: xvcvdpuxws -; P9LE: xxmrghd -; P9LE: xvcvdpuxws -; P9LE: vmrgew v2 -; P8BE: lfdux -; P8BE: lfd -; P8BE: lfd -; P8BE: lfd -; P8BE: xxmrghd -; P8BE: xxmrghd -; P8BE: xvcvdpuxws -; P8BE: xvcvdpuxws -; P8BE: vmrgew v2 -; P8LE: lfdux -; P8LE: lfd -; P8LE: lfd -; P8LE: lfd -; P8LE: xxmrghd -; P8LE: xxmrghd -; P8LE: xvcvdpuxws -; P8LE: xvcvdpuxws -; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvdtoui(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lfdux f0, r3, r4 +; P9BE-NEXT: lfd f1, -8(r3) +; P9BE-NEXT: lfd f2, -16(r3) +; P9BE-NEXT: lfd f3, -24(r3) +; P9BE-NEXT: xxmrghd vs1, vs1, vs3 +; P9BE-NEXT: xxmrghd vs0, vs0, vs2 +; P9BE-NEXT: xvcvdpuxws v2, vs1 +; P9BE-NEXT: xvcvdpuxws v3, vs0 +; P9BE-NEXT: vmrgew v2, v3, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lfdux f0, r3, r4 +; P9LE-NEXT: lfd f2, -16(r3) +; P9LE-NEXT: lfd f1, -8(r3) +; P9LE-NEXT: lfd f3, -24(r3) +; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: xvcvdpuxws v2, vs0 +; P9LE-NEXT: xxmrghd vs0, vs3, vs1 +; P9LE-NEXT: xvcvdpuxws v3, vs0 +; P9LE-NEXT: vmrgew v2, v3, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lfdux f0, r3, r4 +; P8BE-NEXT: lfd f1, -8(r3) +; P8BE-NEXT: lfd f2, -24(r3) +; P8BE-NEXT: lfd f3, -16(r3) +; P8BE-NEXT: xxmrghd vs1, vs1, vs2 +; P8BE-NEXT: xxmrghd vs0, vs0, vs3 +; P8BE-NEXT: xvcvdpuxws v2, vs1 +; P8BE-NEXT: xvcvdpuxws v3, vs0 +; P8BE-NEXT: vmrgew v2, v3, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lfdux f0, r3, r4 +; P8LE-NEXT: lfd f1, -16(r3) +; P8LE-NEXT: lfd f2, -8(r3) +; P8LE-NEXT: lfd f3, -24(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xxmrghd vs1, vs3, vs2 +; P8LE-NEXT: xvcvdpuxws v2, vs0 +; P8LE-NEXT: xvcvdpuxws v3, vs1 +; P8LE-NEXT: vmrgew v2, v3, v2 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -2894,236 +3827,315 @@ entry: %conv13 = fptoui double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 -; P9BE-LABEL: fromDiffMemVarDConvdtoui -; P9LE-LABEL: fromDiffMemVarDConvdtoui -; P8BE-LABEL: fromDiffMemVarDConvdtoui -; P8LE-LABEL: fromDiffMemVarDConvdtoui -; P9BE: lfdux -; P9BE: lfd -; P9BE: lfd -; P9BE: lfd -; P9BE: xxmrghd -; P9BE: xxmrghd -; P9BE: xvcvdpuxws -; P9BE: xvcvdpuxws -; P9BE: vmrgew v2 -; P9LE: lfdux -; P9LE: lfd -; P9LE: lfd -; P9LE: lfd -; P9LE: xxmrghd -; P9LE: xvcvdpuxws -; P9LE: xxmrghd -; P9LE: xvcvdpuxws -; P9LE: vmrgew v2 -; P8BE: lfdux -; P8BE: lfd -; P8BE: lfd -; P8BE: lfd -; P8BE: xxmrghd -; P8BE: xxmrghd -; P8BE: xvcvdpuxws -; P8BE: xvcvdpuxws -; P8BE: vmrgew v2 -; P8LE: lfdux -; P8LE: lfd -; P8LE: lfd -; P8LE: lfd -; P8LE: xxmrghd -; P8LE: xxmrghd -; P8LE: xvcvdpuxws -; P8LE: xvcvdpuxws -; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvdtoui(double %val) { +; P9BE-LABEL: spltRegValConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpuxws f0, f1 +; P9BE-NEXT: xxspltw v2, vs0, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpuxws f0, f1 +; P9LE-NEXT: xxspltw v2, vs0, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpuxws f0, f1 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpuxws f0, f1 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %conv = fptoui double %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltRegValConvdtoui -; P9LE-LABEL: spltRegValConvdtoui -; P8BE-LABEL: spltRegValConvdtoui -; P8LE-LABEL: spltRegValConvdtoui -; P9BE: xscvdpuxws -; P9BE: xxspltw -; P9BE: blr -; P9LE: xscvdpuxws -; P9LE: xxspltw -; P9LE: blr -; P8BE: xscvdpuxws -; P8BE: xxspltw -; P8BE: blr -; P8LE: xscvdpuxws -; P8LE: xxspltw -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvdtoui(double* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvdtoui: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xscvdpuxws f0, f0 +; P9BE-NEXT: xxspltw v2, vs0, 1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvdtoui: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xscvdpuxws f0, f0 +; P9LE-NEXT: xxspltw v2, vs0, 1 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvdtoui: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xscvdpuxws f0, f0 +; P8BE-NEXT: xxspltw v2, vs0, 1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvdtoui: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xscvdpuxws f0, f0 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %0 = load double, double* %ptr, align 8 %conv = fptoui double %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; P9BE-LABEL: spltMemValConvdtoui -; P9LE-LABEL: spltMemValConvdtoui -; P8BE-LABEL: spltMemValConvdtoui -; P8LE-LABEL: spltMemValConvdtoui -; P9BE: lfd -; P9BE: xscvdpuxws -; P9BE: xxspltw -; P9BE: blr -; P9LE: lfd -; P9LE: xscvdpuxws -; P9LE: xxspltw -; P9LE: blr -; P8BE: lfdx -; P8BE: xscvdpuxws -; P8BE: xxspltw -; P8BE: blr -; P8LE: lfdx -; P8LE: xscvdpuxws -; P8LE: xxspltw -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allZeroll() { +; P9BE-LABEL: allZeroll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxlxor v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allZeroll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxlxor v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allZeroll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxlxor v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allZeroll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxlxor v2, v2, v2 +; P8LE-NEXT: blr entry: ret <2 x i64> zeroinitializer -; P9BE-LABEL: allZeroll -; P9LE-LABEL: allZeroll -; P8BE-LABEL: allZeroll -; P8LE-LABEL: allZeroll -; P9BE: xxlxor v2, v2, v2 -; P9BE: blr -; P9LE: xxlxor v2, v2, v2 -; P9LE: blr -; P8BE: xxlxor v2, v2, v2 -; P8BE: blr -; P8LE: xxlxor v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allOnell() { +; P9BE-LABEL: allOnell: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxspltib v2, 255 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allOnell: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxspltib v2, 255 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allOnell: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisb v2, -1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allOnell: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisb v2, -1 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 -1, i64 -1> -; P9BE-LABEL: allOnell -; P9LE-LABEL: allOnell -; P8BE-LABEL: allOnell -; P8LE-LABEL: allOnell -; P9BE: xxspltib v2, 255 -; P9BE: blr -; P9LE: xxspltib v2, 255 -; P9LE: blr -; P8BE: vspltisb v2, -1 -; P8BE: blr -; P8LE: vspltisb v2, -1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst1ll() { +; P9BE-LABEL: spltConst1ll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI68_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI68_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst1ll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI68_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI68_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst1ll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI68_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI68_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst1ll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI68_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI68_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 1, i64 1> -; P9BE-LABEL: spltConst1ll -; P9LE-LABEL: spltConst1ll -; P8BE-LABEL: spltConst1ll -; P8LE-LABEL: spltConst1ll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst16kll() { +; P9BE-LABEL: spltConst16kll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI69_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI69_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst16kll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI69_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI69_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst16kll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI69_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI69_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst16kll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI69_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI69_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 32767, i64 32767> -; P9BE-LABEL: spltConst16kll -; P9LE-LABEL: spltConst16kll -; P8BE-LABEL: spltConst16kll -; P8LE-LABEL: spltConst16kll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst32kll() { +; P9BE-LABEL: spltConst32kll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI70_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI70_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst32kll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI70_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI70_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst32kll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI70_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI70_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst32kll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI70_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI70_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 65535, i64 65535> -; P9BE-LABEL: spltConst32kll -; P9LE-LABEL: spltConst32kll -; P8BE-LABEL: spltConst32kll -; P8LE-LABEL: spltConst32kll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsll(i64 %a, i64 %b) { +; P9BE-LABEL: fromRegsll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mtvsrdd v2, r3, r4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd f1, r4 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 ret <2 x i64> %vecinit1 -; P9BE-LABEL: fromRegsll -; P9LE-LABEL: fromRegsll -; P8BE-LABEL: fromRegsll -; P8LE-LABEL: fromRegsll -; P9BE: mtvsrdd v2, r3, r4 -; P9BE: blr -; P9LE: mtvsrdd v2, r4, r3 -; P9LE: blr -; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3 -; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4 -; P8BE: xxmrghd v2 -; P8BE: blr -; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3 -; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4 -; P8LE: xxmrghd v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsll() { +; P9BE-LABEL: fromDiffConstsll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI72_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI72_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI72_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI72_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI72_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI72_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI72_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI72_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 242, i64 -113> -; P9BE-LABEL: fromDiffConstsll -; P9LE-LABEL: fromDiffConstsll -; P8BE-LABEL: fromDiffConstsll -; P8LE-LABEL: fromDiffConstsll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAll(i64* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsAll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %0 = load i64, i64* %arr, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 @@ -3131,23 +4143,34 @@ entry: %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromDiffMemConsAll -; P9LE-LABEL: fromDiffMemConsAll -; P8BE-LABEL: fromDiffMemConsAll -; P8LE-LABEL: fromDiffMemConsAll -; P9BE: lxv v2 -; P9BE: blr -; P9LE: lxv v2 -; P9LE: blr -; P8BE: lxvd2x v2 -; P8BE: blr -; P8LE: lxvd2x -; P8LE: xxswapd v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsDll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 16(r3) +; P9BE-NEXT: xxswapd v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 16(r3) +; P9LE-NEXT: xxswapd v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 16 +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: xxswapd v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 16 +; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3 %0 = load i64, i64* %arrayidx, align 8 @@ -3156,24 +4179,34 @@ entry: %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromDiffMemConsDll -; P9LE-LABEL: fromDiffMemConsDll -; P8BE-LABEL: fromDiffMemConsDll -; P8LE-LABEL: fromDiffMemConsDll -; P9BE: lxv v2 -; P9BE: blr -; P9LE: lxv -; P9LE: xxswapd v2 -; P9LE: blr -; P8BE: lxvd2x -; P8BE: xxswapd v2 -; P8BE-NEXT: blr -; P8LE: lxvd2x v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAll(i64* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lxvx v2, r3, r4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lxvx v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lxvd2x v2, r3, r4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lxvd2x vs0, r3, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom @@ -3185,27 +4218,44 @@ entry: %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemVarAll -; P9LE-LABEL: fromDiffMemVarAll -; P8BE-LABEL: fromDiffMemVarAll -; P8LE-LABEL: fromDiffMemVarAll -; P9BE: sldi -; P9BE: lxvx v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxvx v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x -; P8LE: xxswapd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addi r3, r3, -8 +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: xxswapd v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: addi r3, r3, -8 +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: xxswapd v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: addi r3, r3, -8 +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: xxswapd v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r3, r3, -8 +; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom @@ -3217,29 +4267,41 @@ entry: %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemVarDll -; P9LE-LABEL: fromDiffMemVarDll -; P8BE-LABEL: fromDiffMemVarDll -; P8LE-LABEL: fromDiffMemVarDll -; P9BE: sldi -; P9BE: lxv -; P9BE: xxswapd v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxv -; P9LE: xxswapd v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x -; P8BE: xxswapd v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) { +; P9BE-LABEL: fromRandMemConsll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: ld r4, 32(r3) +; P9BE-NEXT: ld r3, 144(r3) +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemConsll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: ld r4, 32(r3) +; P9LE-NEXT: ld r3, 144(r3) +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemConsll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: ld r4, 144(r3) +; P8BE-NEXT: ld r3, 32(r3) +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemConsll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: ld r4, 32(r3) +; P8LE-NEXT: ld r3, 144(r3) +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r3 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4 %0 = load i64, i64* %arrayidx, align 8 @@ -3248,34 +4310,49 @@ entry: %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromRandMemConsll -; P9LE-LABEL: fromRandMemConsll -; P8BE-LABEL: fromRandMemConsll -; P8LE-LABEL: fromRandMemConsll -; P9BE: ld -; P9BE: ld -; P9BE: mtvsrdd v2 -; P9BE-NEXT: blr -; P9LE: ld -; P9LE: ld -; P9LE: mtvsrdd v2 -; P9LE-NEXT: blr -; P8BE: ld -; P8BE: ld -; P8BE-DAG: mtvsrd -; P8BE-DAG: mtvsrd -; P8BE: xxmrghd v2 -; P8BE-NEXT: blr -; P8LE: ld -; P8LE: ld -; P8LE-DAG: mtvsrd -; P8LE-DAG: mtvsrd -; P8LE: xxmrghd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromRandMemVarll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: ld r4, 32(r3) +; P9BE-NEXT: ld r3, 8(r3) +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemVarll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: ld r4, 32(r3) +; P9LE-NEXT: ld r3, 8(r3) +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemVarll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: ld r4, 8(r3) +; P8BE-NEXT: ld r3, 32(r3) +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemVarll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: ld r4, 32(r3) +; P8LE-NEXT: ld r3, 8(r3) +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r3 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 @@ -3288,144 +4365,207 @@ entry: %1 = load i64, i64* %arrayidx3, align 8 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromRandMemVarll -; P9LE-LABEL: fromRandMemVarll -; P8BE-LABEL: fromRandMemVarll -; P8LE-LABEL: fromRandMemVarll -; P9BE: sldi -; P9BE: ld -; P9BE: ld -; P9BE: mtvsrdd v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: ld -; P9LE: ld -; P9LE: mtvsrdd v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: ld -; P8BE: ld -; P8BE: mtvsrd -; P8BE: mtvsrd -; P8BE: xxmrghd v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: ld -; P8LE: ld -; P8LE: mtvsrd -; P8LE: mtvsrd -; P8LE: xxmrghd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValll(i64 %val) { +; P9BE-LABEL: spltRegValll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mtvsrdd v2, r3, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mtvsrdd v2, r3, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mtvsrd f0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: xxspltd v2, vs0, 0 +; P8LE-NEXT: blr entry: %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltRegValll -; P9LE-LABEL: spltRegValll -; P8BE-LABEL: spltRegValll -; P8LE-LABEL: spltRegValll -; P9BE: mtvsrdd v2, r3, r3 -; P9BE-NEXT: blr -; P9LE: mtvsrdd v2, r3, r3 -; P9LE-NEXT: blr -; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 -; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 -; P8BE-NEXT: blr -; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 -; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValll(i64* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxvdsx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvdsx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvdsx v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvdsx v2, 0, r3 +; P8LE-NEXT: blr entry: %0 = load i64, i64* %ptr, align 8 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltMemValll -; P9LE-LABEL: spltMemValll -; P8BE-LABEL: spltMemValll -; P8LE-LABEL: spltMemValll -; P9BE: lxvdsx v2 -; P9BE-NEXT: blr -; P9LE: lxvdsx v2 -; P9LE-NEXT: blr -; P8BE: lxvdsx v2 -; P8BE-NEXT: blr -; P8LE: lxvdsx v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvftoll() { +; P9BE-LABEL: spltCnstConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI81_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI81_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI81_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI81_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI81_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI81_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI81_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI81_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 4, i64 4> -; P9BE-LABEL: spltCnstConvftoll -; P9LE-LABEL: spltCnstConvftoll -; P8BE-LABEL: spltCnstConvftoll -; P8LE-LABEL: spltCnstConvftoll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvftoll(float %a, float %b) { +; P9BE-LABEL: fromRegsConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xxmrghd vs0, vs1, vs2 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs2, vs1 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: xxmrghd vs0, vs1, vs2 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: xxmrghd vs0, vs2, vs1 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %conv = fptosi float %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptosi float %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromRegsConvftoll -; P9LE-LABEL: fromRegsConvftoll -; P8BE-LABEL: fromRegsConvftoll -; P8LE-LABEL: fromRegsConvftoll -; P9BE: xxmrghd -; P9BE: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: xxmrghd -; P9LE: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: xxmrghd -; P8BE: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: xxmrghd -; P8LE: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvftoll() { +; P9BE-LABEL: fromDiffConstsConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI83_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI83_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI83_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI83_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI83_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI83_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI83_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI83_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 24, i64 234> -; P9BE-LABEL: fromDiffConstsConvftoll -; P9LE-LABEL: fromDiffConstsConvftoll -; P8BE-LABEL: fromDiffConstsConvftoll -; P8LE-LABEL: fromDiffConstsConvftoll -; P9BE: lxvx v2 -; P9BE: blr -; P9LE: lxvx v2 -; P9LE: blr -; P8BE: lxvd2x v2 -; P8BE: blr -; P8LE: lxvd2x -; P8LE: xxswapd v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvftoll(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfs f0, 0(r3) +; P9BE-NEXT: lfs f1, 4(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfs f0, 0(r3) +; P9LE-NEXT: lfs f1, 4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: lfs f1, 4(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: lfs f1, 4(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 %conv = fptosi float %0 to i64 @@ -3435,34 +4575,41 @@ entry: %conv2 = fptosi float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemConsAConvftoll -; P9LE-LABEL: fromDiffMemConsAConvftoll -; P8BE-LABEL: fromDiffMemConsAConvftoll -; P8LE-LABEL: fromDiffMemConsAConvftoll -; P9BE: lfs -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: lfs -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: lfs -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: lfs -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvftoll(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfs f0, 12(r3) +; P9BE-NEXT: lfs f1, 8(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfs f0, 12(r3) +; P9LE-NEXT: lfs f1, 8(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfs f0, 12(r3) +; P8BE-NEXT: lfs f1, 8(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfs f0, 12(r3) +; P8LE-NEXT: lfs f1, 8(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 @@ -3473,34 +4620,45 @@ entry: %conv2 = fptosi float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemConsDConvftoll -; P9LE-LABEL: fromDiffMemConsDConvftoll -; P8BE-LABEL: fromDiffMemConsDConvftoll -; P8LE-LABEL: fromDiffMemConsDConvftoll -; P9BE: lfs -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: lfs -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: lfs -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: lfs -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvftoll(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, 4(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, 4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, 4(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, 4(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -3514,38 +4672,45 @@ entry: %conv3 = fptosi float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarAConvftoll -; P9LE-LABEL: fromDiffMemVarAConvftoll -; P8BE-LABEL: fromDiffMemVarAConvftoll -; P8LE-LABEL: fromDiffMemVarAConvftoll -; P9BE: sldi -; P9BE: lfsux -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lfsux -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lfsux -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lfsux -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvftoll(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, -4(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, -4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, -4(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, -4(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -3559,181 +4724,249 @@ entry: %conv3 = fptosi float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarDConvftoll -; P9LE-LABEL: fromDiffMemVarDConvftoll -; P8BE-LABEL: fromDiffMemVarDConvftoll -; P8LE-LABEL: fromDiffMemVarDConvftoll -; P9BE: sldi -; P9BE: lfsux -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lfsux -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lfsux -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lfsux -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvftoll(float %val) { +; P9BE-LABEL: spltRegValConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpsxds f0, f1 +; P9BE-NEXT: xxspltd v2, f0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpsxds f0, f1 +; P9LE-NEXT: xxspltd v2, f0, 0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpsxds f0, f1 +; P8BE-NEXT: xxspltd v2, f0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpsxds f0, f1 +; P8LE-NEXT: xxspltd v2, f0, 0 +; P8LE-NEXT: blr entry: %conv = fptosi float %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltRegValConvftoll -; P9LE-LABEL: spltRegValConvftoll -; P8BE-LABEL: spltRegValConvftoll -; P8LE-LABEL: spltRegValConvftoll -; P9BE: xscvdpsxds -; P9BE-NEXT: xxspltd v2 -; P9BE-NEXT: blr -; P9LE: xscvdpsxds -; P9LE-NEXT: xxspltd v2 -; P9LE-NEXT: blr -; P8BE: xscvdpsxds -; P8BE-NEXT: xxspltd v2 -; P8BE-NEXT: blr -; P8LE: xscvdpsxds -; P8LE-NEXT: xxspltd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvftoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfs f0, 0(r3) +; P9BE-NEXT: xscvdpsxds f0, f0 +; P9BE-NEXT: xxspltd v2, f0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvftoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfs f0, 0(r3) +; P9LE-NEXT: xscvdpsxds f0, f0 +; P9LE-NEXT: xxspltd v2, f0, 0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvftoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: xscvdpsxds f0, f0 +; P8BE-NEXT: xxspltd v2, f0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvftoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: xscvdpsxds f0, f0 +; P8LE-NEXT: xxspltd v2, f0, 0 +; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 %conv = fptosi float %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltMemValConvftoll -; P9LE-LABEL: spltMemValConvftoll -; P8BE-LABEL: spltMemValConvftoll -; P8LE-LABEL: spltMemValConvftoll -; P9BE: lfs -; P9BE-NEXT: xscvdpsxds -; P9BE-NEXT: xxspltd v2 -; P9BE-NEXT: blr -; P9LE: lfs -; P9LE-NEXT: xscvdpsxds -; P9LE-NEXT: xxspltd v2 -; P9LE-NEXT: blr -; P8BE: lfs -; P8BE-NEXT: xscvdpsxds -; P8BE-NEXT: xxspltd v2 -; P8BE-NEXT: blr -; P8LE: lfs -; P8LE-NEXT: xscvdpsxds -; P8LE-NEXT: xxspltd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvdtoll() { +; P9BE-LABEL: spltCnstConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI90_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI90_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI90_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI90_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI90_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI90_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI90_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI90_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 4, i64 4> -; P9BE-LABEL: spltCnstConvdtoll -; P9LE-LABEL: spltCnstConvdtoll -; P8BE-LABEL: spltCnstConvdtoll -; P8LE-LABEL: spltCnstConvdtoll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvdtoll(double %a, double %b) { +; P9BE-LABEL: fromRegsConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xxmrghd vs0, vs1, vs2 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs2, vs1 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: xxmrghd vs0, vs1, vs2 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: xxmrghd vs0, vs2, vs1 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %conv = fptosi double %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptosi double %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromRegsConvdtoll -; P9LE-LABEL: fromRegsConvdtoll -; P8BE-LABEL: fromRegsConvdtoll -; P8LE-LABEL: fromRegsConvdtoll -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpsxds -; P9BE-NEXT: blr -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpsxds -; P9LE-NEXT: blr -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpsxds -; P8BE-NEXT: blr -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpsxds -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvdtoll() { +; P9BE-LABEL: fromDiffConstsConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI92_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI92_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI92_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI92_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI92_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI92_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI92_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI92_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 24, i64 234> -; P9BE-LABEL: fromDiffConstsConvdtoll -; P9LE-LABEL: fromDiffConstsConvdtoll -; P8BE-LABEL: fromDiffConstsConvdtoll -; P8LE-LABEL: fromDiffConstsConvdtoll -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvdtoll(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd vs0, vs0 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 %2 = fptosi <2 x double> %1 to <2 x i64> ret <2 x i64> %2 -; P9BE-LABEL: fromDiffMemConsAConvdtoll -; P9LE-LABEL: fromDiffMemConsAConvdtoll -; P8BE-LABEL: fromDiffMemConsAConvdtoll -; P8LE-LABEL: fromDiffMemConsAConvdtoll -; P9BE: lxv -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: lxv -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: lxvd2x -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: lxvd2x -; P8LE: xxswapd -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 16(r3) +; P9BE-NEXT: xxswapd vs0, vs0 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 16(r3) +; P9LE-NEXT: xxswapd vs0, vs0 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 16 +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: xxswapd vs0, vs0 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 16 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 @@ -3744,29 +4977,38 @@ entry: %conv2 = fptosi double %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemConsDConvdtoll -; P9LE-LABEL: fromDiffMemConsDConvdtoll -; P8BE-LABEL: fromDiffMemConsDConvdtoll -; P8LE-LABEL: fromDiffMemConsDConvdtoll -; P9BE: lxv -; P9BE-NEXT: xxswapd -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: lxv -; P9LE-NEXT: xxswapd -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: lxvd2x -; P8BE-NEXT: xxswapd -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: lxvd2x -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvdtoll(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lxvx vs0, r3, r4 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lxvx vs0, r3, r4 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lxvd2x vs0, r3, r4 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lxvd2x vs0, r3, r4 +; P8LE-NEXT: xxswapd vs0, vs0 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -3780,31 +5022,48 @@ entry: %conv3 = fptosi double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarAConvdtoll -; P9LE-LABEL: fromDiffMemVarAConvdtoll -; P8BE-LABEL: fromDiffMemVarAConvdtoll -; P8LE-LABEL: fromDiffMemVarAConvdtoll -; P9BE: sldi -; P9BE: lxvx -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxvx -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x -; P8LE-NEXT: xxswapd -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addi r3, r3, -8 +; P9BE-NEXT: lxvx vs0, 0, r3 +; P9BE-NEXT: xxswapd vs0, vs0 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: addi r3, r3, -8 +; P9LE-NEXT: lxvx vs0, 0, r3 +; P9LE-NEXT: xxswapd vs0, vs0 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: addi r3, r3, -8 +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: xxswapd vs0, vs0 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r3, r3, -8 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -3818,216 +5077,312 @@ entry: %conv3 = fptosi double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarDConvdtoll -; P9LE-LABEL: fromDiffMemVarDConvdtoll -; P8BE-LABEL: fromDiffMemVarDConvdtoll -; P8LE-LABEL: fromDiffMemVarDConvdtoll -; P9BE: sldi -; P9BE: lxv -; P9BE-NEXT: xxswapd -; P9BE-NEXT: xvcvdpsxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxv -; P9LE-NEXT: xxswapd -; P9LE-NEXT: xvcvdpsxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x -; P8BE-NEXT: xxswapd -; P8BE-NEXT: xvcvdpsxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x -; P8LE-NEXT: xvcvdpsxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvdtoll(double %val) { +; P9BE-LABEL: spltRegValConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpsxds f0, f1 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpsxds f0, f1 +; P9LE-NEXT: xxspltd v2, vs0, 0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpsxds f0, f1 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpsxds f0, f1 +; P8LE-NEXT: xxspltd v2, vs0, 0 +; P8LE-NEXT: blr entry: %conv = fptosi double %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltRegValConvdtoll -; P9LE-LABEL: spltRegValConvdtoll -; P8BE-LABEL: spltRegValConvdtoll -; P8LE-LABEL: spltRegValConvdtoll -; P9BE: xscvdpsxds -; P9BE-NEXT: xxspltd v2 -; P9BE-NEXT: blr -; P9LE: xscvdpsxds -; P9LE-NEXT: xxspltd v2 -; P9LE-NEXT: blr -; P8BE: xscvdpsxds -; P8BE-NEXT: xxspltd v2 -; P8BE-NEXT: blr -; P8LE: xscvdpsxds -; P8LE-NEXT: xxspltd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvdtoll(double* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvdtoll: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxvdsx vs0, 0, r3 +; P9BE-NEXT: xvcvdpsxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvdtoll: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvdsx vs0, 0, r3 +; P9LE-NEXT: xvcvdpsxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvdtoll: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvdsx vs0, 0, r3 +; P8BE-NEXT: xvcvdpsxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvdtoll: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvdsx vs0, 0, r3 +; P8LE-NEXT: xvcvdpsxds v2, vs0 +; P8LE-NEXT: blr entry: %0 = load double, double* %ptr, align 8 %conv = fptosi double %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltMemValConvdtoll -; P9LE-LABEL: spltMemValConvdtoll -; P8BE-LABEL: spltMemValConvdtoll -; P8LE-LABEL: spltMemValConvdtoll -; P9BE: lxvdsx -; P9BE-NEXT: xvcvdpsxds -; P9BE-NEXT: blr -; P9LE: lxvdsx -; P9LE-NEXT: xvcvdpsxds -; P9LE-NEXT: blr -; P8BE: lxvdsx -; P8BE-NEXT: xvcvdpsxds -; P8BE-NEXT: blr -; P8LE: lxvdsx -; P8LE-NEXT: xvcvdpsxds -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allZeroull() { +; P9BE-LABEL: allZeroull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxlxor v2, v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allZeroull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxlxor v2, v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allZeroull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxlxor v2, v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allZeroull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxlxor v2, v2, v2 +; P8LE-NEXT: blr entry: ret <2 x i64> zeroinitializer -; P9BE-LABEL: allZeroull -; P9LE-LABEL: allZeroull -; P8BE-LABEL: allZeroull -; P8LE-LABEL: allZeroull -; P9BE: xxlxor v2, v2, v2 -; P9BE: blr -; P9LE: xxlxor v2, v2, v2 -; P9LE: blr -; P8BE: xxlxor v2, v2, v2 -; P8BE: blr -; P8LE: xxlxor v2, v2, v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allOneull() { +; P9BE-LABEL: allOneull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxspltib v2, 255 +; P9BE-NEXT: blr +; +; P9LE-LABEL: allOneull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxspltib v2, 255 +; P9LE-NEXT: blr +; +; P8BE-LABEL: allOneull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: vspltisb v2, -1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: allOneull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: vspltisb v2, -1 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 -1, i64 -1> -; P9BE-LABEL: allOneull -; P9LE-LABEL: allOneull -; P8BE-LABEL: allOneull -; P8LE-LABEL: allOneull -; P9BE: xxspltib v2, 255 -; P9BE: blr -; P9LE: xxspltib v2, 255 -; P9LE: blr -; P8BE: vspltisb v2, -1 -; P8BE: blr -; P8LE: vspltisb v2, -1 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst1ull() { +; P9BE-LABEL: spltConst1ull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI101_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI101_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst1ull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI101_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI101_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst1ull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI101_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI101_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst1ull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI101_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI101_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 1, i64 1> -; P9BE-LABEL: spltConst1ull -; P9LE-LABEL: spltConst1ull -; P8BE-LABEL: spltConst1ull -; P8LE-LABEL: spltConst1ull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst16kull() { +; P9BE-LABEL: spltConst16kull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI102_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI102_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst16kull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI102_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI102_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst16kull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI102_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI102_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst16kull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI102_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI102_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 32767, i64 32767> -; P9BE-LABEL: spltConst16kull -; P9LE-LABEL: spltConst16kull -; P8BE-LABEL: spltConst16kull -; P8LE-LABEL: spltConst16kull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst32kull() { +; P9BE-LABEL: spltConst32kull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI103_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI103_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltConst32kull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI103_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI103_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltConst32kull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI103_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI103_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltConst32kull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI103_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI103_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 65535, i64 65535> -; P9BE-LABEL: spltConst32kull -; P9LE-LABEL: spltConst32kull -; P8BE-LABEL: spltConst32kull -; P8LE-LABEL: spltConst32kull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsull(i64 %a, i64 %b) { +; P9BE-LABEL: fromRegsull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mtvsrdd v2, r3, r4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd f1, r4 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 ret <2 x i64> %vecinit1 -; P9BE-LABEL: fromRegsull -; P9LE-LABEL: fromRegsull -; P8BE-LABEL: fromRegsull -; P8LE-LABEL: fromRegsull -; P9BE: mtvsrdd v2, r3, r4 -; P9BE: blr -; P9LE: mtvsrdd v2, r4, r3 -; P9LE: blr -; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3 -; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4 -; P8BE: xxmrghd v2 -; P8BE: blr -; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3 -; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4 -; P8LE: xxmrghd v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsull() { +; P9BE-LABEL: fromDiffConstsull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI105_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI105_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI105_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI105_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI105_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI105_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI105_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI105_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 242, i64 -113> -; P9BE-LABEL: fromDiffConstsull -; P9LE-LABEL: fromDiffConstsull -; P8BE-LABEL: fromDiffConstsull -; P8LE-LABEL: fromDiffConstsull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAull(i64* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsAull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %0 = load i64, i64* %arr, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 @@ -4035,23 +5390,34 @@ entry: %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromDiffMemConsAull -; P9LE-LABEL: fromDiffMemConsAull -; P8BE-LABEL: fromDiffMemConsAull -; P8LE-LABEL: fromDiffMemConsAull -; P9BE: lxv v2 -; P9BE: blr -; P9LE: lxv v2 -; P9LE: blr -; P8BE: lxvd2x v2 -; P8BE: blr -; P8LE: lxvd2x -; P8LE: xxswapd v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) { +; P9BE-LABEL: fromDiffMemConsDull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv v2, 16(r3) +; P9BE-NEXT: xxswapd v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv v2, 16(r3) +; P9LE-NEXT: xxswapd v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 16 +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: xxswapd v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 16 +; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3 %0 = load i64, i64* %arrayidx, align 8 @@ -4060,24 +5426,34 @@ entry: %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromDiffMemConsDull -; P9LE-LABEL: fromDiffMemConsDull -; P8BE-LABEL: fromDiffMemConsDull -; P8LE-LABEL: fromDiffMemConsDull -; P9BE: lxv v2 -; P9BE: blr -; P9LE: lxv -; P9LE: xxswapd v2 -; P9LE: blr -; P8BE: lxvd2x -; P8BE: xxswapd v2 -; P8BE-NEXT: blr -; P8LE: lxvd2x v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAull(i64* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lxvx v2, r3, r4 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lxvx v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lxvd2x v2, r3, r4 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lxvd2x vs0, r3, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom @@ -4089,27 +5465,44 @@ entry: %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemVarAull -; P9LE-LABEL: fromDiffMemVarAull -; P8BE-LABEL: fromDiffMemVarAull -; P8LE-LABEL: fromDiffMemVarAull -; P9BE: sldi -; P9BE: lxvx v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxvx v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x -; P8LE: xxswapd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addi r3, r3, -8 +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: xxswapd v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: addi r3, r3, -8 +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: xxswapd v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: addi r3, r3, -8 +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: xxswapd v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r3, r3, -8 +; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom @@ -4121,29 +5514,41 @@ entry: %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemVarDull -; P9LE-LABEL: fromDiffMemVarDull -; P8BE-LABEL: fromDiffMemVarDull -; P8LE-LABEL: fromDiffMemVarDull -; P9BE: sldi -; P9BE: lxv -; P9BE: xxswapd v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxv -; P9LE: xxswapd v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x -; P8BE: xxswapd v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) { +; P9BE-LABEL: fromRandMemConsull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: ld r4, 32(r3) +; P9BE-NEXT: ld r3, 144(r3) +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemConsull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: ld r4, 32(r3) +; P9LE-NEXT: ld r3, 144(r3) +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemConsull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: ld r4, 144(r3) +; P8BE-NEXT: ld r3, 32(r3) +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemConsull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: ld r4, 32(r3) +; P8LE-NEXT: ld r3, 144(r3) +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r3 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4 %0 = load i64, i64* %arrayidx, align 8 @@ -4152,34 +5557,49 @@ entry: %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromRandMemConsull -; P9LE-LABEL: fromRandMemConsull -; P8BE-LABEL: fromRandMemConsull -; P8LE-LABEL: fromRandMemConsull -; P9BE: ld -; P9BE: ld -; P9BE: mtvsrdd v2 -; P9BE-NEXT: blr -; P9LE: ld -; P9LE: ld -; P9LE: mtvsrdd v2 -; P9LE-NEXT: blr -; P8BE: ld -; P8BE: ld -; P8BE-DAG: mtvsrd -; P8BE-DAG: mtvsrd -; P8BE: xxmrghd v2 -; P8BE-NEXT: blr -; P8LE: ld -; P8LE: ld -; P8LE-DAG: mtvsrd -; P8LE-DAG: mtvsrd -; P8LE: xxmrghd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromRandMemVarull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: ld r4, 32(r3) +; P9BE-NEXT: ld r3, 8(r3) +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRandMemVarull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: ld r4, 32(r3) +; P9LE-NEXT: ld r3, 8(r3) +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRandMemVarull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: ld r4, 8(r3) +; P8BE-NEXT: ld r3, 32(r3) +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRandMemVarull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: ld r4, 32(r3) +; P8LE-NEXT: ld r3, 8(r3) +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: mtvsrd f1, r3 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 @@ -4192,144 +5612,207 @@ entry: %1 = load i64, i64* %arrayidx3, align 8 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromRandMemVarull -; P9LE-LABEL: fromRandMemVarull -; P8BE-LABEL: fromRandMemVarull -; P8LE-LABEL: fromRandMemVarull -; P9BE: sldi -; P9BE: ld -; P9BE: ld -; P9BE: mtvsrdd v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: ld -; P9LE: ld -; P9LE: mtvsrdd v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: ld -; P8BE: ld -; P8BE: mtvsrd -; P8BE: mtvsrd -; P8BE: xxmrghd v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: ld -; P8LE: ld -; P8LE: mtvsrd -; P8LE: mtvsrd -; P8LE: xxmrghd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValull(i64 %val) { +; P9BE-LABEL: spltRegValull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mtvsrdd v2, r3, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mtvsrdd v2, r3, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mtvsrd f0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: xxspltd v2, vs0, 0 +; P8LE-NEXT: blr entry: %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltRegValull -; P9LE-LABEL: spltRegValull -; P8BE-LABEL: spltRegValull -; P8LE-LABEL: spltRegValull -; P9BE: mtvsrdd v2, r3, r3 -; P9BE-NEXT: blr -; P9LE: mtvsrdd v2, r3, r3 -; P9LE-NEXT: blr -; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 -; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 -; P8BE-NEXT: blr -; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 -; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValull(i64* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxvdsx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvdsx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvdsx v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvdsx v2, 0, r3 +; P8LE-NEXT: blr entry: %0 = load i64, i64* %ptr, align 8 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltMemValull -; P9LE-LABEL: spltMemValull -; P8BE-LABEL: spltMemValull -; P8LE-LABEL: spltMemValull -; P9BE: lxvdsx v2 -; P9BE-NEXT: blr -; P9LE: lxvdsx v2 -; P9LE-NEXT: blr -; P8BE: lxvdsx v2 -; P8BE-NEXT: blr -; P8LE: lxvdsx v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvftoull() { +; P9BE-LABEL: spltCnstConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI114_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI114_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI114_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI114_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI114_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI114_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI114_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI114_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 4, i64 4> -; P9BE-LABEL: spltCnstConvftoull -; P9LE-LABEL: spltCnstConvftoull -; P8BE-LABEL: spltCnstConvftoull -; P8LE-LABEL: spltCnstConvftoull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvftoull(float %a, float %b) { +; P9BE-LABEL: fromRegsConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xxmrghd vs0, vs1, vs2 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs2, vs1 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: xxmrghd vs0, vs1, vs2 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: xxmrghd vs0, vs2, vs1 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %conv = fptoui float %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptoui float %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromRegsConvftoull -; P9LE-LABEL: fromRegsConvftoull -; P8BE-LABEL: fromRegsConvftoull -; P8LE-LABEL: fromRegsConvftoull -; P9BE: xxmrghd -; P9BE: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: xxmrghd -; P9LE: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: xxmrghd -; P8BE: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: xxmrghd -; P8LE: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvftoull() { +; P9BE-LABEL: fromDiffConstsConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI116_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI116_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI116_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI116_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI116_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI116_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI116_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI116_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 24, i64 234> -; P9BE-LABEL: fromDiffConstsConvftoull -; P9LE-LABEL: fromDiffConstsConvftoull -; P8BE-LABEL: fromDiffConstsConvftoull -; P8LE-LABEL: fromDiffConstsConvftoull -; P9BE: lxvx v2 -; P9BE: blr -; P9LE: lxvx v2 -; P9LE: blr -; P8BE: lxvd2x v2 -; P8BE: blr -; P8LE: lxvd2x -; P8LE: xxswapd v2 -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvftoull(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfs f0, 0(r3) +; P9BE-NEXT: lfs f1, 4(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfs f0, 0(r3) +; P9LE-NEXT: lfs f1, 4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: lfs f1, 4(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: lfs f1, 4(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 %conv = fptoui float %0 to i64 @@ -4339,34 +5822,41 @@ entry: %conv2 = fptoui float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemConsAConvftoull -; P9LE-LABEL: fromDiffMemConsAConvftoull -; P8BE-LABEL: fromDiffMemConsAConvftoull -; P8LE-LABEL: fromDiffMemConsAConvftoull -; P9BE: lfs -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: lfs -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: lfs -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: lfs -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvftoull(float* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfs f0, 12(r3) +; P9BE-NEXT: lfs f1, 8(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfs f0, 12(r3) +; P9LE-NEXT: lfs f1, 8(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfs f0, 12(r3) +; P8BE-NEXT: lfs f1, 8(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfs f0, 12(r3) +; P8LE-NEXT: lfs f1, 8(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 @@ -4377,34 +5867,45 @@ entry: %conv2 = fptoui float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemConsDConvftoull -; P9LE-LABEL: fromDiffMemConsDConvftoull -; P8BE-LABEL: fromDiffMemConsDConvftoull -; P8LE-LABEL: fromDiffMemConsDConvftoull -; P9BE: lfs -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: lfs -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: lfs -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: lfs -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvftoull(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, 4(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, 4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, 4(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, 4(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -4418,38 +5919,45 @@ entry: %conv3 = fptoui float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarAConvftoull -; P9LE-LABEL: fromDiffMemVarAConvftoull -; P8BE-LABEL: fromDiffMemVarAConvftoull -; P8LE-LABEL: fromDiffMemVarAConvftoull -; P9BE: sldi -; P9BE: lfsux -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lfsux -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lfsux -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lfsux -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvftoull(float* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: lfsux f0, r3, r4 +; P9BE-NEXT: lfs f1, -4(r3) +; P9BE-NEXT: xxmrghd vs0, vs0, vs1 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: lfsux f0, r3, r4 +; P9LE-NEXT: lfs f1, -4(r3) +; P9LE-NEXT: xxmrghd vs0, vs1, vs0 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 2 +; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: lfs f1, -4(r3) +; P8BE-NEXT: xxmrghd vs0, vs0, vs1 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: lfs f1, -4(r3) +; P8LE-NEXT: xxmrghd vs0, vs1, vs0 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom @@ -4463,181 +5971,249 @@ entry: %conv3 = fptoui float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarDConvftoull -; P9LE-LABEL: fromDiffMemVarDConvftoull -; P8BE-LABEL: fromDiffMemVarDConvftoull -; P8LE-LABEL: fromDiffMemVarDConvftoull -; P9BE: sldi -; P9BE: lfsux -; P9BE: lfs -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lfsux -; P9LE: lfs -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lfsux -; P8BE: lfs -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lfsux -; P8LE: lfs -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvftoull(float %val) { +; P9BE-LABEL: spltRegValConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpuxds f0, f1 +; P9BE-NEXT: xxspltd v2, f0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpuxds f0, f1 +; P9LE-NEXT: xxspltd v2, f0, 0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpuxds f0, f1 +; P8BE-NEXT: xxspltd v2, f0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpuxds f0, f1 +; P8LE-NEXT: xxspltd v2, f0, 0 +; P8LE-NEXT: blr entry: %conv = fptoui float %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltRegValConvftoull -; P9LE-LABEL: spltRegValConvftoull -; P8BE-LABEL: spltRegValConvftoull -; P8LE-LABEL: spltRegValConvftoull -; P9BE: xscvdpuxds -; P9BE-NEXT: xxspltd v2 -; P9BE-NEXT: blr -; P9LE: xscvdpuxds -; P9LE-NEXT: xxspltd v2 -; P9LE-NEXT: blr -; P8BE: xscvdpuxds -; P8BE-NEXT: xxspltd v2 -; P8BE-NEXT: blr -; P8LE: xscvdpuxds -; P8LE-NEXT: xxspltd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvftoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfs f0, 0(r3) +; P9BE-NEXT: xscvdpuxds f0, f0 +; P9BE-NEXT: xxspltd v2, f0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvftoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfs f0, 0(r3) +; P9LE-NEXT: xscvdpuxds f0, f0 +; P9LE-NEXT: xxspltd v2, f0, 0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvftoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: xscvdpuxds f0, f0 +; P8BE-NEXT: xxspltd v2, f0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvftoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: xscvdpuxds f0, f0 +; P8LE-NEXT: xxspltd v2, f0, 0 +; P8LE-NEXT: blr entry: %0 = load float, float* %ptr, align 4 %conv = fptoui float %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltMemValConvftoull -; P9LE-LABEL: spltMemValConvftoull -; P8BE-LABEL: spltMemValConvftoull -; P8LE-LABEL: spltMemValConvftoull -; P9BE: lfs -; P9BE-NEXT: xscvdpuxds -; P9BE-NEXT: xxspltd v2 -; P9BE-NEXT: blr -; P9LE: lfs -; P9LE-NEXT: xscvdpuxds -; P9LE-NEXT: xxspltd v2 -; P9LE-NEXT: blr -; P8BE: lfs -; P8BE-NEXT: xscvdpuxds -; P8BE-NEXT: xxspltd v2 -; P8BE-NEXT: blr -; P8LE: lfs -; P8LE-NEXT: xscvdpuxds -; P8LE-NEXT: xxspltd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvdtoull() { +; P9BE-LABEL: spltCnstConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI123_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI123_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltCnstConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI123_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI123_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltCnstConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI123_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI123_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltCnstConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI123_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI123_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 4, i64 4> -; P9BE-LABEL: spltCnstConvdtoull -; P9LE-LABEL: spltCnstConvdtoull -; P8BE-LABEL: spltCnstConvdtoull -; P8LE-LABEL: spltCnstConvdtoull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvdtoull(double %a, double %b) { +; P9BE-LABEL: fromRegsConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9BE-NEXT: xxmrghd vs0, vs1, vs2 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromRegsConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9LE-NEXT: xxmrghd vs0, vs2, vs1 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromRegsConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8BE-NEXT: xxmrghd vs0, vs1, vs2 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromRegsConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2 +; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8LE-NEXT: xxmrghd vs0, vs2, vs1 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %conv = fptoui double %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptoui double %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 -; P9BE-LABEL: fromRegsConvdtoull -; P9LE-LABEL: fromRegsConvdtoull -; P8BE-LABEL: fromRegsConvdtoull -; P8LE-LABEL: fromRegsConvdtoull -; P9BE: xxmrghd -; P9BE-NEXT: xvcvdpuxds -; P9BE-NEXT: blr -; P9LE: xxmrghd -; P9LE-NEXT: xvcvdpuxds -; P9LE-NEXT: blr -; P8BE: xxmrghd -; P8BE-NEXT: xvcvdpuxds -; P8BE-NEXT: blr -; P8LE: xxmrghd -; P8LE-NEXT: xvcvdpuxds -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvdtoull() { +; P9BE-LABEL: fromDiffConstsConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI125_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI125_0@toc@l +; P9BE-NEXT: lxvx v2, 0, r3 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffConstsConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI125_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI125_0@toc@l +; P9LE-NEXT: lxvx v2, 0, r3 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffConstsConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addis r3, r2, .LCPI125_0@toc@ha +; P8BE-NEXT: addi r3, r3, .LCPI125_0@toc@l +; P8BE-NEXT: lxvd2x v2, 0, r3 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffConstsConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addis r3, r2, .LCPI125_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI125_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr entry: ret <2 x i64> <i64 24, i64 234> -; P9BE-LABEL: fromDiffConstsConvdtoull -; P9LE-LABEL: fromDiffConstsConvdtoull -; P8BE-LABEL: fromDiffConstsConvdtoull -; P8LE-LABEL: fromDiffConstsConvdtoull -; P9BE: lxv -; P9BE: blr -; P9LE: lxv -; P9LE: blr -; P8BE: lxvd2x -; P8BE: blr -; P8LE: lxvd2x -; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvdtoull(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsAConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsAConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsAConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsAConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd vs0, vs0 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 %2 = fptoui <2 x double> %1 to <2 x i64> ret <2 x i64> %2 -; P9BE-LABEL: fromDiffMemConsAConvdtoull -; P9LE-LABEL: fromDiffMemConsAConvdtoull -; P8BE-LABEL: fromDiffMemConsAConvdtoull -; P8LE-LABEL: fromDiffMemConsAConvdtoull -; P9BE: lxv -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: lxv -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: lxvd2x -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: lxvd2x -; P8LE: xxswapd -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) { +; P9BE-LABEL: fromDiffMemConsDConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 16(r3) +; P9BE-NEXT: xxswapd vs0, vs0 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemConsDConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 16(r3) +; P9LE-NEXT: xxswapd vs0, vs0 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemConsDConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 16 +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: xxswapd vs0, vs0 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemConsDConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 16 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 @@ -4648,29 +6224,38 @@ entry: %conv2 = fptoui double %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 -; P9BE-LABEL: fromDiffMemConsDConvdtoull -; P9LE-LABEL: fromDiffMemConsDConvdtoull -; P8BE-LABEL: fromDiffMemConsDConvdtoull -; P8LE-LABEL: fromDiffMemConsDConvdtoull -; P9BE: lxv -; P9BE-NEXT: xxswapd -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: lxv -; P9LE-NEXT: xxswapd -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: lxvd2x -; P8BE-NEXT: xxswapd -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: lxvd2x -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvdtoull(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarAConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: lxvx vs0, r3, r4 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarAConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: lxvx vs0, r3, r4 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarAConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: lxvd2x vs0, r3, r4 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarAConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: lxvd2x vs0, r3, r4 +; P8LE-NEXT: xxswapd vs0, vs0 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -4684,31 +6269,48 @@ entry: %conv3 = fptoui double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarAConvdtoull -; P9LE-LABEL: fromDiffMemVarAConvdtoull -; P8BE-LABEL: fromDiffMemVarAConvdtoull -; P8LE-LABEL: fromDiffMemVarAConvdtoull -; P9BE: sldi -; P9BE: lxvx -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxvx -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x -; P8LE-NEXT: xxswapd -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 signext %elem) { +; P9BE-LABEL: fromDiffMemVarDConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 3 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addi r3, r3, -8 +; P9BE-NEXT: lxvx vs0, 0, r3 +; P9BE-NEXT: xxswapd vs0, vs0 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fromDiffMemVarDConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 3 +; P9LE-NEXT: add r3, r3, r4 +; P9LE-NEXT: addi r3, r3, -8 +; P9LE-NEXT: lxvx vs0, 0, r3 +; P9LE-NEXT: xxswapd vs0, vs0 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fromDiffMemVarDConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r4, 3 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: addi r3, r3, -8 +; P8BE-NEXT: lxvd2x vs0, 0, r3 +; P8BE-NEXT: xxswapd vs0, vs0 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fromDiffMemVarDConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r4, 3 +; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r3, r3, -8 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom @@ -4722,78 +6324,69 @@ entry: %conv3 = fptoui double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 -; P9BE-LABEL: fromDiffMemVarDConvdtoull -; P9LE-LABEL: fromDiffMemVarDConvdtoull -; P8BE-LABEL: fromDiffMemVarDConvdtoull -; P8LE-LABEL: fromDiffMemVarDConvdtoull -; P9BE: sldi -; P9BE: lxv -; P9BE-NEXT: xxswapd -; P9BE-NEXT: xvcvdpuxds v2 -; P9BE-NEXT: blr -; P9LE: sldi -; P9LE: lxv -; P9LE-NEXT: xxswapd -; P9LE-NEXT: xvcvdpuxds v2 -; P9LE-NEXT: blr -; P8BE: sldi -; P8BE: lxvd2x -; P8BE-NEXT: xxswapd -; P8BE-NEXT: xvcvdpuxds v2 -; P8BE-NEXT: blr -; P8LE: sldi -; P8LE: lxvd2x -; P8LE-NEXT: xvcvdpuxds v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvdtoull(double %val) { +; P9BE-LABEL: spltRegValConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xscvdpuxds f0, f1 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltRegValConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xscvdpuxds f0, f1 +; P9LE-NEXT: xxspltd v2, vs0, 0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltRegValConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xscvdpuxds f0, f1 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltRegValConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xscvdpuxds f0, f1 +; P8LE-NEXT: xxspltd v2, vs0, 0 +; P8LE-NEXT: blr entry: %conv = fptoui double %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltRegValConvdtoull -; P9LE-LABEL: spltRegValConvdtoull -; P8BE-LABEL: spltRegValConvdtoull -; P8LE-LABEL: spltRegValConvdtoull -; P9BE: xscvdpuxds -; P9BE-NEXT: xxspltd v2 -; P9BE-NEXT: blr -; P9LE: xscvdpuxds -; P9LE-NEXT: xxspltd v2 -; P9LE-NEXT: blr -; P8BE: xscvdpuxds -; P8BE-NEXT: xxspltd v2 -; P8BE-NEXT: blr -; P8LE: xscvdpuxds -; P8LE-NEXT: xxspltd v2 -; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvdtoull(double* nocapture readonly %ptr) { +; P9BE-LABEL: spltMemValConvdtoull: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxvdsx vs0, 0, r3 +; P9BE-NEXT: xvcvdpuxds v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: spltMemValConvdtoull: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvdsx vs0, 0, r3 +; P9LE-NEXT: xvcvdpuxds v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: spltMemValConvdtoull: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxvdsx vs0, 0, r3 +; P8BE-NEXT: xvcvdpuxds v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: spltMemValConvdtoull: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvdsx vs0, 0, r3 +; P8LE-NEXT: xvcvdpuxds v2, vs0 +; P8LE-NEXT: blr entry: %0 = load double, double* %ptr, align 8 %conv = fptoui double %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; P9BE-LABEL: spltMemValConvdtoull -; P9LE-LABEL: spltMemValConvdtoull -; P8BE-LABEL: spltMemValConvdtoull -; P8LE-LABEL: spltMemValConvdtoull -; P9BE: lxvdsx -; P9BE-NEXT: xvcvdpuxds -; P9BE-NEXT: blr -; P9LE: lxvdsx -; P9LE-NEXT: xvcvdpuxds -; P9LE-NEXT: blr -; P8BE: lxvdsx -; P8BE-NEXT: xvcvdpuxds -; P8BE-NEXT: blr -; P8LE: lxvdsx -; P8LE-NEXT: xvcvdpuxds -; P8LE-NEXT: blr } |

