summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZhan Jun Liau <zhanjunl@ca.ibm.com>2016-06-28 21:03:19 +0000
committerZhan Jun Liau <zhanjunl@ca.ibm.com>2016-06-28 21:03:19 +0000
commit347db3e18ee7375cf038a8173880466fd0fe6312 (patch)
tree2e8d8a0d835570af697ceba4ffbe3ccba2ee081e
parent0fecee9cd8af1de3b103ca7c6dec054bedbca035 (diff)
downloadbcm5719-llvm-347db3e18ee7375cf038a8173880466fd0fe6312.tar.gz
bcm5719-llvm-347db3e18ee7375cf038a8173880466fd0fe6312.zip
[SystemZ] Use NILL instruction instead of NILF where possible
Summary: SystemZ shift instructions only use the last 6 bits of the shift amount. When the result of an AND operation is used as a shift amount, this means that we can use the NILL instruction (which operates on the last 16 bits) rather than NILF (which operates on the last 32 bits) for a 16-bit savings in instruction size. Reviewers: uweigand Subscribers: llvm-commits Author: colpell Committing on behalf of Elliot. Differential Revision: http://reviews.llvm.org/D21686 llvm-svn: 274066
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td33
-rw-r--r--llvm/test/CodeGen/SystemZ/rot-01.ll35
-rw-r--r--llvm/test/CodeGen/SystemZ/shift-11.ll63
3 files changed, 130 insertions, 1 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 7a7a3e30863..012d170552f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1022,7 +1022,7 @@ let Defs = [CC] in {
// ANDs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
- def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
}
// AND to memory
@@ -1685,6 +1685,37 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
(XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+ def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
// Peepholes for turning scalar operations into block operations.
defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
XCSequence, 1>;
diff --git a/llvm/test/CodeGen/SystemZ/rot-01.ll b/llvm/test/CodeGen/SystemZ/rot-01.ll
new file mode 100644
index 00000000000..5e97f5d6505
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/rot-01.ll
@@ -0,0 +1,35 @@
+; Test shortening of NILL to NILF when the result is used as a rotate amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 32-bit rotate.
+define i32 @f1(i32 %val, i32 %amt) {
+; CHECK-LABEL: f1:
+; CHECK: nill %r3, 31
+; CHECK: rll %r2, %r2, 0(%r3)
+ %mod = urem i32 %amt, 32
+
+ %inv = sub i32 32, %mod
+ %parta = shl i32 %val, %mod
+ %partb = lshr i32 %val, %inv
+
+ %rotl = or i32 %parta, %partb
+
+ ret i32 %rotl
+}
+
+; Test 64-bit rotate.
+define i64 @f2(i64 %val, i64 %amt) {
+; CHECK-LABEL: f2:
+; CHECK: nill %r3, 63
+; CHECK: rllg %r2, %r2, 0(%r3)
+ %mod = urem i64 %amt, 64
+
+ %inv = sub i64 64, %mod
+ %parta = shl i64 %val, %mod
+ %partb = lshr i64 %val, %inv
+
+ %rotl = or i64 %parta, %partb
+
+ ret i64 %rotl
+}
diff --git a/llvm/test/CodeGen/SystemZ/shift-11.ll b/llvm/test/CodeGen/SystemZ/shift-11.ll
new file mode 100644
index 00000000000..9741fa5a0b5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/shift-11.ll
@@ -0,0 +1,63 @@
+; Test shortening of NILL to NILF when the result is used as a shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test logical shift right.
+define i32 @f1(i32 %a, i32 %sh) {
+; CHECK-LABEL: f1:
+; CHECK: nill %r3, 31
+; CHECK: srl %r2, 0(%r3)
+ %and = and i32 %sh, 31
+ %shift = lshr i32 %a, %and
+ ret i32 %shift
+}
+
+; Test arithmetic shift right.
+define i32 @f2(i32 %a, i32 %sh) {
+; CHECK-LABEL: f2:
+; CHECK: nill %r3, 31
+; CHECK: sra %r2, 0(%r3)
+ %and = and i32 %sh, 31
+ %shift = ashr i32 %a, %and
+ ret i32 %shift
+}
+
+; Test shift left.
+define i32 @f3(i32 %a, i32 %sh) {
+; CHECK-LABEL: f3:
+; CHECK: nill %r3, 31
+; CHECK: sll %r2, 0(%r3)
+ %and = and i32 %sh, 31
+ %shift = shl i32 %a, %and
+ ret i32 %shift
+}
+
+; Test 64-bit logical shift right.
+define i64 @f4(i64 %a, i64 %sh) {
+; CHECK-LABEL: f4:
+; CHECK: nill %r3, 31
+; CHECK: srlg %r2, %r2, 0(%r3)
+ %and = and i64 %sh, 31
+ %shift = lshr i64 %a, %and
+ ret i64 %shift
+}
+
+; Test 64-bit arithmetic shift right.
+define i64 @f5(i64 %a, i64 %sh) {
+; CHECK-LABEL: f5:
+; CHECK: nill %r3, 31
+; CHECK: srag %r2, %r2, 0(%r3)
+ %and = and i64 %sh, 31
+ %shift = ashr i64 %a, %and
+ ret i64 %shift
+}
+
+; Test 64-bit shift left.
+define i64 @f6(i64 %a, i64 %sh) {
+; CHECK-LABEL: f6:
+; CHECK: nill %r3, 31
+; CHECK: sllg %r2, %r2, 0(%r3)
+ %and = and i64 %sh, 31
+ %shift = shl i64 %a, %and
+ ret i64 %shift
+}
OpenPOWER on IntegriCloud