CodeGen: Power: Add lowering for shifts of v1i128.

When legalizing vector operations on vNi128, they will be split to v1i128 because that is a legal type on ppc64, but then the compiler will crash in selection dag because it fails to select for these operations. This patch fixes shift operations. Logical shift right and left shift can be performed in the vector unit, but algebraic shift right requires being split. Differential Revision: https://reviews.llvm.org/D32774 llvm-svn: 303307
author: Kyle Butt <kyle+llvm@iteratee.net> 2017-05-17 21:54:41 +0000
committer: Kyle Butt <kyle+llvm@iteratee.net> 2017-05-17 21:54:41 +0000
commit: f6c61ef64d1293a0531cbcd0afeb2a401edd501e (patch)
tree: 1441f525eb865508738f3b765fa9f47738b4e9f2
parent: ab12984634c6614b5cb17390e0190562d248ed77 (diff)
download: bcm5719-llvm-f6c61ef64d1293a0531cbcd0afeb2a401edd501e.tar.gz
bcm5719-llvm-f6c61ef64d1293a0531cbcd0afeb2a401edd501e.zip
3 files changed, 111 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 144aea85083..e65b1f1aa0a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -689,6 +689,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 
+        // 128 bit shifts can be accomplished via 3 instructions for SHL and
+        // SRL, but not for SRA because of the instructions available:
+        // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
+        // doing
+        setOperationAction(ISD::SHL, MVT::v1i128, Expand);
+        setOperationAction(ISD::SRL, MVT::v1i128, Expand);
+        setOperationAction(ISD::SRA, MVT::v1i128, Expand);
+
         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
       }
       else {
@@ -742,6 +750,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     if (Subtarget.hasP9Vector()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+      // 128 bit shifts can be accomplished via 3 instructions for SHL and
+      // SRL, but not for SRA because of the instructions available:
+      // VS{RL} and VS{RL}O.
+      setOperationAction(ISD::SHL, MVT::v1i128, Legal);
+      setOperationAction(ISD::SRL, MVT::v1i128, Legal);
+      setOperationAction(ISD::SRA, MVT::v1i128, Expand);
     }
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index e14d18fd543..5465b5f2d66 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -987,12 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
           (v8i16 (VSLH $vA, $vB))>;
 def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
           (v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
 def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
           (v16i8 (VSLB $vA, $vB))>;
 def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
           (v8i16 (VSLH $vA, $vB))>;
 def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
           (v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
 
 def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
           (v16i8 (VSRB $vA, $vB))>;
@@ -1000,12 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
           (v8i16 (VSRH $vA, $vB))>;
 def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
           (v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
 def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
           (v16i8 (VSRB $vA, $vB))>;
 def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
           (v8i16 (VSRH $vA, $vB))>;
 def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
           (v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
 
 def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
           (v16i8 (VSRAB $vA, $vB))>;
diff --git a/llvm/test/CodeGen/PowerPC/shift128.ll b/llvm/test/CodeGen/PowerPC/shift128.ll
index 17a380c71c3..48e1b96f838 100644
--- a/llvm/test/CodeGen/PowerPC/shift128.ll
+++ b/llvm/test/CodeGen/PowerPC/shift128.ll
@@ -1,14 +1,98 @@
-; RUN: llc -verify-machineinstrs < %s -march=ppc64 | grep sld | count 5
+; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefix=P8 --check-prefix=CHECK %s
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs < %s | FileCheck --check-prefix=P9 --check-prefix=CHECK %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
 
-define i128 @foo_lshr(i128 %x, i128 %y) {
+; CHECK-LABEL: lshr:
+; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
+; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
+; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
+; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
+; CHECK-DAG: srd [[R4:[0-9]+]], 4, [[R1]]
+; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
+; CHECK-DAG: or 3, [[R5]], [[R4]]
+; CHECK-DAG: srd 4, 4, 5
+; CHECK: blr
+define i128 @lshr(i128 %x, i128 %y) {
   %r = lshr i128 %x, %y
   ret i128 %r
 }
-define i128 @foo_ashr(i128 %x, i128 %y) {
+; CHECK-LABEL: ashr:
+; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
+; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
+; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
+; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
+; CHECK-DAG: srad [[R4:[0-9]+]], 4, [[R1]]
+; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
+; CHECK-DAG: cmpwi [[R1]], 1
+; CHECK-DAG: srad 4, 4, 5
+; CHECK: isel 3, [[R5]], [[R4]], 0
+; CHECK: blr
+define i128 @ashr(i128 %x, i128 %y) {
   %r = ashr i128 %x, %y
   ret i128 %r
 }
-define i128 @foo_shl(i128 %x, i128 %y) {
+; CHECK-LABEL: shl:
+; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
+; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
+; CHECK-DAG: sld [[R2:[0-9]+]], 4, 5
+; CHECK-DAG: srd [[R3:[0-9]+]], 3, [[R0]]
+; CHECK-DAG: sld [[R4:[0-9]+]], 3, [[R1]]
+; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
+; CHECK-DAG: or 4, [[R5]], [[R4]]
+; CHECK-DAG: sld 3, 3, 5
+; CHECK: blr
+define i128 @shl(i128 %x, i128 %y) {
   %r = shl i128 %x, %y
   ret i128 %r
 }
+
+; CHECK-LABEL: shl_v1i128:
+; P8-NOT: {{\b}}vslo
+; P8-NOT: {{\b}}vsl
+; P9-DAG: vslo
+; P9-DAG: vspltb
+; P9: vsl
+; P9-NOT: {{\b}}sld
+; P9-NOT: {{\b}}srd
+; CHECK: blr
+define i128 @shl_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i128> undef, i128 %arg, i32 0
+  %1 = insertelement <1 x i128> undef, i128 %amt, i32 0
+  %2 = shl <1 x i128> %0, %1
+  %retval = extractelement <1 x i128> %2, i32 0
+  ret i128 %retval
+}
+
+; CHECK-LABEL: lshr_v1i128:
+; P8-NOT: {{\b}}vsro
+; P8-NOT: {{\b}}vsr
+; P9-DAG: vsro
+; P9-DAG: vspltb
+; P9: vsr
+; P9-NOT: {{\b}}srd
+; P9-NOT: {{\b}}sld
+; CHECK: blr
+define i128 @lshr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i128> undef, i128 %arg, i32 0
+  %1 = insertelement <1 x i128> undef, i128 %amt, i32 0
+  %2 = lshr <1 x i128> %0, %1
+  %retval = extractelement <1 x i128> %2, i32 0
+  ret i128 %retval
+}
+
+; Arithmetic shift right is not available as an operation on the vector registers.
+; CHECK-LABEL: ashr_v1i128:
+; CHECK-NOT: {{\b}}vsro
+; CHECK-NOT: {{\b}}vsr
+; CHECK: blr
+define i128 @ashr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i128> undef, i128 %arg, i32 0
+  %1 = insertelement <1 x i128> undef, i128 %amt, i32 0
+  %2 = ashr <1 x i128> %0, %1
+  %retval = extractelement <1 x i128> %2, i32 0
+  ret i128 %retval
+}
author	Kyle Butt <kyle+llvm@iteratee.net>	2017-05-17 21:54:41 +0000
committer	Kyle Butt <kyle+llvm@iteratee.net>	2017-05-17 21:54:41 +0000
commit	f6c61ef64d1293a0531cbcd0afeb2a401edd501e (patch)
tree	1441f525eb865508738f3b765fa9f47738b4e9f2
parent	ab12984634c6614b5cb17390e0190562d248ed77 (diff)
download	bcm5719-llvm-f6c61ef64d1293a0531cbcd0afeb2a401edd501e.tar.gz bcm5719-llvm-f6c61ef64d1293a0531cbcd0afeb2a401edd501e.zip