[SystemZ] Use NILL instruction instead of NILF where possible

Summary: SystemZ shift instructions only use the last 6 bits of the shift amount. When the result of an AND operation is used as a shift amount, this means that we can use the NILL instruction (which operates on the last 16 bits) rather than NILF (which operates on the last 32 bits) for a 16-bit savings in instruction size. Reviewers: uweigand Subscribers: llvm-commits Author: colpell Committing on behalf of Elliot. Differential Revision: http://reviews.llvm.org/D21686 llvm-svn: 274066
author: Zhan Jun Liau <zhanjunl@ca.ibm.com> 2016-06-28 21:03:19 +0000
committer: Zhan Jun Liau <zhanjunl@ca.ibm.com> 2016-06-28 21:03:19 +0000
commit: 347db3e18ee7375cf038a8173880466fd0fe6312 (patch)
tree: 2e8d8a0d835570af697ceba4ffbe3ccba2ee081e
parent: 0fecee9cd8af1de3b103ca7c6dec054bedbca035 (diff)
download: bcm5719-llvm-347db3e18ee7375cf038a8173880466fd0fe6312.tar.gz
bcm5719-llvm-347db3e18ee7375cf038a8173880466fd0fe6312.zip
3 files changed, 130 insertions, 1 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 7a7a3e30863..012d170552f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1022,7 +1022,7 @@ let Defs = [CC] in {
   // ANDs of memory.
   let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
     defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
-    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; 
+    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
   }
 
   // AND to memory
@@ -1685,6 +1685,37 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
 def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
                           (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
 
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
 // Peepholes for turning scalar operations into block operations.
 defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
                       XCSequence, 1>;
diff --git a/llvm/test/CodeGen/SystemZ/rot-01.ll b/llvm/test/CodeGen/SystemZ/rot-01.ll
new file mode 100644
index 00000000000..5e97f5d6505
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/rot-01.ll
@@ -0,0 +1,35 @@
+; Test shortening of NILL to NILF when the result is used as a rotate amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 32-bit rotate.
+define i32 @f1(i32 %val, i32 %amt) {
+; CHECK-LABEL: f1:
+; CHECK: nill %r3, 31
+; CHECK: rll %r2, %r2, 0(%r3)
+  %mod = urem i32 %amt, 32
+
+  %inv = sub i32 32, %mod
+  %parta = shl i32 %val, %mod
+  %partb = lshr i32 %val, %inv
+
+  %rotl = or i32 %parta, %partb
+
+  ret i32 %rotl
+}
+
+; Test 64-bit rotate.
+define i64 @f2(i64 %val, i64 %amt) {
+; CHECK-LABEL: f2:
+; CHECK: nill %r3, 63
+; CHECK: rllg %r2, %r2, 0(%r3)
+  %mod = urem i64 %amt, 64
+
+  %inv = sub i64 64, %mod
+  %parta = shl i64 %val, %mod
+  %partb = lshr i64 %val, %inv
+
+  %rotl = or i64 %parta, %partb
+
+  ret i64 %rotl
+}
diff --git a/llvm/test/CodeGen/SystemZ/shift-11.ll b/llvm/test/CodeGen/SystemZ/shift-11.ll
new file mode 100644
index 00000000000..9741fa5a0b5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/shift-11.ll
@@ -0,0 +1,63 @@
+; Test shortening of NILL to NILF when the result is used as a shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test logical shift right.
+define i32 @f1(i32 %a, i32 %sh) {
+; CHECK-LABEL: f1:
+; CHECK: nill %r3, 31
+; CHECK: srl %r2, 0(%r3)
+  %and = and i32 %sh, 31
+  %shift = lshr i32 %a, %and
+  ret i32 %shift
+}
+
+; Test arithmetic shift right.
+define i32 @f2(i32 %a, i32 %sh) {
+; CHECK-LABEL: f2:
+; CHECK: nill %r3, 31
+; CHECK: sra %r2, 0(%r3)
+  %and = and i32 %sh, 31
+  %shift = ashr i32 %a, %and
+  ret i32 %shift
+}
+
+; Test shift left.
+define i32 @f3(i32 %a, i32 %sh) {
+; CHECK-LABEL: f3:
+; CHECK: nill %r3, 31
+; CHECK: sll %r2, 0(%r3)
+  %and = and i32 %sh, 31
+  %shift = shl i32 %a, %and
+  ret i32 %shift
+}
+
+; Test 64-bit logical shift right.
+define i64 @f4(i64 %a, i64 %sh) {
+; CHECK-LABEL: f4:
+; CHECK: nill %r3, 31
+; CHECK: srlg %r2, %r2, 0(%r3)
+  %and = and i64 %sh, 31
+  %shift = lshr i64 %a, %and
+  ret i64 %shift
+}
+
+; Test 64-bit arithmetic shift right.
+define i64 @f5(i64 %a, i64 %sh) {
+; CHECK-LABEL: f5:
+; CHECK: nill %r3, 31
+; CHECK: srag %r2, %r2, 0(%r3)
+  %and = and i64 %sh, 31
+  %shift = ashr i64 %a, %and
+  ret i64 %shift
+}
+
+; Test 64-bit shift left.
+define i64 @f6(i64 %a, i64 %sh) {
+; CHECK-LABEL: f6:
+; CHECK: nill %r3, 31
+; CHECK: sllg %r2, %r2, 0(%r3)
+  %and = and i64 %sh, 31
+  %shift = shl i64 %a, %and
+  ret i64 %shift
+}
author	Zhan Jun Liau <zhanjunl@ca.ibm.com>	2016-06-28 21:03:19 +0000
committer	Zhan Jun Liau <zhanjunl@ca.ibm.com>	2016-06-28 21:03:19 +0000
commit	347db3e18ee7375cf038a8173880466fd0fe6312 (patch)
tree	2e8d8a0d835570af697ceba4ffbe3ccba2ee081e
parent	0fecee9cd8af1de3b103ca7c6dec054bedbca035 (diff)
download	bcm5719-llvm-347db3e18ee7375cf038a8173880466fd0fe6312.tar.gz bcm5719-llvm-347db3e18ee7375cf038a8173880466fd0fe6312.zip