summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/funnel-shift.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-08-01 17:17:08 +0000
committerSanjay Patel <spatel@rotateright.com>2018-08-01 17:17:08 +0000
commit8aac22e06a196541961cd6d6cc46ffd4f39b60c3 (patch)
treece54e012d6bc3a7d3f1e697f5ab4cdd9fdb69bbd /llvm/test/CodeGen/PowerPC/funnel-shift.ll
parent0bb8d83c89321c0bfe955c5e2b2069a2870912d4 (diff)
downloadbcm5719-llvm-8aac22e06a196541961cd6d6cc46ffd4f39b60c3.tar.gz
bcm5719-llvm-8aac22e06a196541961cd6d6cc46ffd4f39b60c3.zip
[SelectionDAG] fix bug in translating funnel shift with non-power-of-2 type
The bug is visible in the constant-folded x86 tests. We can't use the negated shift amount when the type is not power-of-2: https://rise4fun.com/Alive/US1r ...so in that case, use the regular lowering that includes a select to guard against a shift-by-bitwidth. This path is improved by only calculating the modulo shift amount once now. Also, improve the rotate (with power-of-2 size) lowering to use a negate rather than subtract from bitwidth. This improves the codegen whether we have a rotate instruction or not (although we can still see that we're not matching to a legal rotate in all cases). llvm-svn: 338592
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/funnel-shift.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/funnel-shift.ll32
1 files changed, 10 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 9acc1ac5221..05d105bd6eb 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -18,9 +18,8 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: fshl_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 6, 5, 32
; CHECK-NEXT: andi. 5, 5, 31
-; CHECK-NEXT: clrlwi 6, 6, 27
+; CHECK-NEXT: subfic 6, 5, 32
; CHECK-NEXT: slw 5, 3, 5
; CHECK-NEXT: srw 4, 4, 6
; CHECK-NEXT: or 4, 5, 4
@@ -36,24 +35,19 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshl_i37:
; CHECK: # %bb.0:
; CHECK-NEXT: lis 6, -8857
-; CHECK-NEXT: subfic 7, 5, 37
; CHECK-NEXT: clrldi 5, 5, 27
; CHECK-NEXT: clrldi 4, 4, 27
; CHECK-NEXT: ori 6, 6, 51366
-; CHECK-NEXT: clrldi 7, 7, 27
; CHECK-NEXT: sldi 6, 6, 32
; CHECK-NEXT: oris 6, 6, 3542
; CHECK-NEXT: ori 6, 6, 31883
-; CHECK-NEXT: mulhdu 8, 7, 6
; CHECK-NEXT: mulhdu 6, 5, 6
-; CHECK-NEXT: rldicl 8, 8, 59, 5
; CHECK-NEXT: rldicl 6, 6, 59, 5
-; CHECK-NEXT: mulli 8, 8, 37
; CHECK-NEXT: mulli 6, 6, 37
-; CHECK-NEXT: sub 7, 7, 8
; CHECK-NEXT: subf. 5, 6, 5
-; CHECK-NEXT: srd 4, 4, 7
+; CHECK-NEXT: subfic 6, 5, 37
; CHECK-NEXT: sld 5, 3, 5
+; CHECK-NEXT: srd 4, 4, 6
; CHECK-NEXT: or 4, 5, 4
; CHECK-NEXT: isel 3, 3, 4, 2
; CHECK-NEXT: blr
@@ -130,9 +124,8 @@ define i8 @fshl_i8_const_fold() {
define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: fshr_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 6, 5, 32
; CHECK-NEXT: andi. 5, 5, 31
-; CHECK-NEXT: clrlwi 6, 6, 27
+; CHECK-NEXT: subfic 6, 5, 32
; CHECK-NEXT: srw 5, 4, 5
; CHECK-NEXT: slw 3, 3, 6
; CHECK-NEXT: or 3, 3, 5
@@ -148,24 +141,19 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshr_i37:
; CHECK: # %bb.0:
; CHECK-NEXT: lis 6, -8857
-; CHECK-NEXT: subfic 7, 5, 37
; CHECK-NEXT: clrldi 5, 5, 27
-; CHECK-NEXT: clrldi 9, 4, 27
; CHECK-NEXT: ori 6, 6, 51366
-; CHECK-NEXT: clrldi 7, 7, 27
; CHECK-NEXT: sldi 6, 6, 32
; CHECK-NEXT: oris 6, 6, 3542
; CHECK-NEXT: ori 6, 6, 31883
-; CHECK-NEXT: mulhdu 8, 5, 6
-; CHECK-NEXT: mulhdu 6, 7, 6
-; CHECK-NEXT: rldicl 8, 8, 59, 5
+; CHECK-NEXT: mulhdu 6, 5, 6
; CHECK-NEXT: rldicl 6, 6, 59, 5
-; CHECK-NEXT: mulli 8, 8, 37
; CHECK-NEXT: mulli 6, 6, 37
-; CHECK-NEXT: subf. 5, 8, 5
-; CHECK-NEXT: sub 6, 7, 6
-; CHECK-NEXT: srd 5, 9, 5
-; CHECK-NEXT: sld 3, 3, 6
+; CHECK-NEXT: subf. 5, 6, 5
+; CHECK-NEXT: clrldi 6, 4, 27
+; CHECK-NEXT: subfic 7, 5, 37
+; CHECK-NEXT: srd 5, 6, 5
+; CHECK-NEXT: sld 3, 3, 7
; CHECK-NEXT: or 3, 3, 5
; CHECK-NEXT: isel 3, 4, 3, 2
; CHECK-NEXT: blr
OpenPOWER on IntegriCloud