summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-07-16 22:59:31 +0000
committerSanjay Patel <spatel@rotateright.com>2018-07-16 22:59:31 +0000
commitc71adc8040b1e382b195a0096015cb5c39628b23 (patch)
tree8711ea739eab9d1354abf5fed2412f7d00f75293 /llvm/test/CodeGen/PowerPC
parentc4846a551e0c1499e67f4aa287abe89be20ffe5f (diff)
downloadbcm5719-llvm-c71adc8040b1e382b195a0096015cb5c39628b23.tar.gz
bcm5719-llvm-c71adc8040b1e382b195a0096015cb5c39628b23.zip
[Intrinsics] define funnel shift IR intrinsics + DAG builder support
As discussed here: http://lists.llvm.org/pipermail/llvm-dev/2018-May/123292.html http://lists.llvm.org/pipermail/llvm-dev/2018-July/124400.html We want to add rotate intrinsics because the IR expansion of that pattern is 4+ instructions, and we can lose pieces of the pattern before it gets to the backend. Generalizing the operation by allowing 2 different input values (plus the 3rd shift/rotate amount) gives us a "funnel shift" operation which may also be a single hardware instruction. Initially, I thought we needed to define new DAG nodes for these ops, and I spent time working on that (much larger patch), but then I concluded that we don't need it. At least as a first step, we have all of the backend support necessary to match these ops...because it was required. And shepherding these through the IR optimizer is the primary concern, so the IR intrinsics are likely all that we'll ever need. There was also a question about converting the intrinsics to the existing ROTL/ROTR DAG nodes (along with improving the oversized shift documentation). Again, I don't think that's strictly necessary (as the test results here prove). That can be an efficiency improvement as a small follow-up patch. So all we're left with is documentation, definition of the IR intrinsics, and DAG builder support. Differential Revision: https://reviews.llvm.org/D49242 llvm-svn: 337221
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll214
-rw-r--r--llvm/test/CodeGen/PowerPC/funnel-shift.ll271
2 files changed, 485 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
new file mode 100644
index 00000000000..41317ed68d9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -0,0 +1,214 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+; When first 2 operands match, it's a rotate.
+
+define i8 @rotl_i8_const_shift(i8 %x) {
+; CHECK-LABEL: rotl_i8_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 3, 27, 0, 31
+; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
+ ret i8 %f
+}
+
+define i64 @rotl_i64_const_shift(i64 %x) {
+; CHECK-LABEL: rotl_i64_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 3
+; CHECK-NEXT: blr
+ %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
+ ret i64 %f
+}
+
+; When first 2 operands match, it's a rotate (by variable amount).
+
+define i16 @rotl_i16(i16 %x, i16 %z) {
+; CHECK-LABEL: rotl_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subfic 5, 4, 16
+; CHECK-NEXT: clrlwi 6, 3, 16
+; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT: clrlwi 5, 5, 28
+; CHECK-NEXT: slw 3, 3, 4
+; CHECK-NEXT: srw 4, 6, 5
+; CHECK-NEXT: or 3, 3, 4
+; CHECK-NEXT: blr
+ %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
+ ret i16 %f
+}
+
+define i32 @rotl_i32(i32 %x, i32 %z) {
+; CHECK-LABEL: rotl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31
+; CHECK-NEXT: rlwnm 3, 3, 4, 0, 31
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
+ ret i32 %f
+}
+
+; Vector rotate.
+
+define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
+; CHECK-LABEL: rotl_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l
+; CHECK-NEXT: lvx 4, 0, 3
+; CHECK-NEXT: vsubuwm 4, 4, 3
+; CHECK-NEXT: vslw 3, 2, 3
+; CHECK-NEXT: vsrw 2, 2, 4
+; CHECK-NEXT: xxlor 34, 35, 34
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
+ ret <4 x i32> %f
+}
+
+; Vector rotate by constant splat amount.
+
+define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
+; CHECK-LABEL: rotl_v4i32_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vspltisw 3, -16
+; CHECK-NEXT: vspltisw 4, 13
+; CHECK-NEXT: vspltisw 5, 3
+; CHECK-NEXT: vsubuwm 3, 4, 3
+; CHECK-NEXT: vslw 4, 2, 5
+; CHECK-NEXT: vsrw 2, 2, 3
+; CHECK-NEXT: xxlor 34, 36, 34
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+ ret <4 x i32> %f
+}
+
+; Repeat everything for funnel shift right.
+
+define i8 @rotr_i8_const_shift(i8 %x) {
+; CHECK-LABEL: rotr_i8_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 3, 29, 0, 31
+; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
+ ret i8 %f
+}
+
+define i32 @rotr_i32_const_shift(i32 %x) {
+; CHECK-LABEL: rotr_i32_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 3, 3, 29, 0, 31
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
+ ret i32 %f
+}
+
+; When first 2 operands match, it's a rotate (by variable amount).
+
+define i16 @rotr_i16(i16 %x, i16 %z) {
+; CHECK-LABEL: rotr_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subfic 5, 4, 16
+; CHECK-NEXT: clrlwi 6, 3, 16
+; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT: clrlwi 5, 5, 28
+; CHECK-NEXT: srw 4, 6, 4
+; CHECK-NEXT: slw 3, 3, 5
+; CHECK-NEXT: or 3, 3, 4
+; CHECK-NEXT: blr
+ %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
+ ret i16 %f
+}
+
+define i64 @rotr_i64(i64 %x, i64 %z) {
+; CHECK-LABEL: rotr_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subfic 4, 4, 64
+; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31
+; CHECK-NEXT: rotld 3, 3, 4
+; CHECK-NEXT: blr
+ %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
+ ret i64 %f
+}
+
+; Vector rotate.
+
+define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
+; CHECK-LABEL: rotr_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis 3, 2, .LCPI10_0@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI10_0@toc@l
+; CHECK-NEXT: lvx 4, 0, 3
+; CHECK-NEXT: vsubuwm 4, 4, 3
+; CHECK-NEXT: vsrw 3, 2, 3
+; CHECK-NEXT: vslw 2, 2, 4
+; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
+ ret <4 x i32> %f
+}
+
+; Vector rotate by constant splat amount.
+
+define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
+; CHECK-LABEL: rotr_v4i32_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vspltisw 3, -16
+; CHECK-NEXT: vspltisw 4, 13
+; CHECK-NEXT: vspltisw 5, 3
+; CHECK-NEXT: vsubuwm 3, 4, 3
+; CHECK-NEXT: vsrw 4, 2, 5
+; CHECK-NEXT: vslw 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 36
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+ ret <4 x i32> %f
+}
+
+define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
+; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
+ ret i32 %f
+}
+
+define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
+; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
+ ret i32 %f
+}
+
+define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
+; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+ ret <4 x i32> %f
+}
+
+define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
+; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+ ret <4 x i32> %f
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
new file mode 100644
index 00000000000..9acc1ac5221
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+; General case - all operands can be variables.
+
+define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: fshl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subfic 6, 5, 32
+; CHECK-NEXT: andi. 5, 5, 31
+; CHECK-NEXT: clrlwi 6, 6, 27
+; CHECK-NEXT: slw 5, 3, 5
+; CHECK-NEXT: srw 4, 4, 6
+; CHECK-NEXT: or 4, 5, 4
+; CHECK-NEXT: isel 3, 3, 4, 2
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
+ ret i32 %f
+}
+
+; Verify that weird types are minimally supported.
+declare i37 @llvm.fshl.i37(i37, i37, i37)
+define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
+; CHECK-LABEL: fshl_i37:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lis 6, -8857
+; CHECK-NEXT: subfic 7, 5, 37
+; CHECK-NEXT: clrldi 5, 5, 27
+; CHECK-NEXT: clrldi 4, 4, 27
+; CHECK-NEXT: ori 6, 6, 51366
+; CHECK-NEXT: clrldi 7, 7, 27
+; CHECK-NEXT: sldi 6, 6, 32
+; CHECK-NEXT: oris 6, 6, 3542
+; CHECK-NEXT: ori 6, 6, 31883
+; CHECK-NEXT: mulhdu 8, 7, 6
+; CHECK-NEXT: mulhdu 6, 5, 6
+; CHECK-NEXT: rldicl 8, 8, 59, 5
+; CHECK-NEXT: rldicl 6, 6, 59, 5
+; CHECK-NEXT: mulli 8, 8, 37
+; CHECK-NEXT: mulli 6, 6, 37
+; CHECK-NEXT: sub 7, 7, 8
+; CHECK-NEXT: subf. 5, 6, 5
+; CHECK-NEXT: srd 4, 4, 7
+; CHECK-NEXT: sld 5, 3, 5
+; CHECK-NEXT: or 4, 5, 4
+; CHECK-NEXT: isel 3, 3, 4, 2
+; CHECK-NEXT: blr
+ %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
+ ret i37 %f
+}
+
+; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
+
+declare i7 @llvm.fshl.i7(i7, i7, i7)
+define i7 @fshl_i7_const_fold() {
+; CHECK-LABEL: fshl_i7_const_fold:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li 3, 67
+; CHECK-NEXT: blr
+ %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
+ ret i7 %f
+}
+
+; With constant shift amount, this is rotate + insert (missing extended mnemonics).
+
+define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 4, 9, 0, 31
+; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
+ ret i32 %f
+}
+
+; Check modulo math on shift amount.
+
+define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_const_overshift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 4, 9, 0, 31
+; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
+ ret i32 %f
+}
+
+; 64-bit should also work.
+
+define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
+; CHECK-LABEL: fshl_i64_const_overshift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 4, 4, 41
+; CHECK-NEXT: rldimi 4, 3, 41, 0
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
+ ret i64 %f
+}
+
+; This should work without any node-specific logic.
+
+define i8 @fshl_i8_const_fold() {
+; CHECK-LABEL: fshl_i8_const_fold:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li 3, 128
+; CHECK-NEXT: blr
+ %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
+ ret i8 %f
+}
+
+; Repeat everything for funnel shift right.
+
+; General case - all operands can be variables.
+
+define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: fshr_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subfic 6, 5, 32
+; CHECK-NEXT: andi. 5, 5, 31
+; CHECK-NEXT: clrlwi 6, 6, 27
+; CHECK-NEXT: srw 5, 4, 5
+; CHECK-NEXT: slw 3, 3, 6
+; CHECK-NEXT: or 3, 3, 5
+; CHECK-NEXT: isel 3, 4, 3, 2
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
+ ret i32 %f
+}
+
+; Verify that weird types are minimally supported.
+declare i37 @llvm.fshr.i37(i37, i37, i37)
+define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
+; CHECK-LABEL: fshr_i37:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lis 6, -8857
+; CHECK-NEXT: subfic 7, 5, 37
+; CHECK-NEXT: clrldi 5, 5, 27
+; CHECK-NEXT: clrldi 9, 4, 27
+; CHECK-NEXT: ori 6, 6, 51366
+; CHECK-NEXT: clrldi 7, 7, 27
+; CHECK-NEXT: sldi 6, 6, 32
+; CHECK-NEXT: oris 6, 6, 3542
+; CHECK-NEXT: ori 6, 6, 31883
+; CHECK-NEXT: mulhdu 8, 5, 6
+; CHECK-NEXT: mulhdu 6, 7, 6
+; CHECK-NEXT: rldicl 8, 8, 59, 5
+; CHECK-NEXT: rldicl 6, 6, 59, 5
+; CHECK-NEXT: mulli 8, 8, 37
+; CHECK-NEXT: mulli 6, 6, 37
+; CHECK-NEXT: subf. 5, 8, 5
+; CHECK-NEXT: sub 6, 7, 6
+; CHECK-NEXT: srd 5, 9, 5
+; CHECK-NEXT: sld 3, 3, 6
+; CHECK-NEXT: or 3, 3, 5
+; CHECK-NEXT: isel 3, 4, 3, 2
+; CHECK-NEXT: blr
+ %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
+ ret i37 %f
+}
+
+; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
+
+declare i7 @llvm.fshr.i7(i7, i7, i7)
+define i7 @fshr_i7_const_fold() {
+; CHECK-LABEL: fshr_i7_const_fold:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li 3, 31
+; CHECK-NEXT: blr
+ %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
+ ret i7 %f
+}
+
+; With constant shift amount, this is rotate + insert (missing extended mnemonics).
+
+define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_const_shift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 4, 23, 0, 31
+; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
+ ret i32 %f
+}
+
+; Check modulo math on shift amount. 41-32=9.
+
+define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_const_overshift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rlwinm 4, 4, 23, 0, 31
+; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
+ ret i32 %f
+}
+
+; 64-bit should also work. 105-64 = 41.
+
+define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
+; CHECK-LABEL: fshr_i64_const_overshift:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 4, 4, 23
+; CHECK-NEXT: rldimi 4, 3, 23, 0
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
+ ret i64 %f
+}
+
+; This should work without any node-specific logic.
+
+define i8 @fshr_i8_const_fold() {
+; CHECK-LABEL: fshr_i8_const_fold:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li 3, 254
+; CHECK-NEXT: blr
+ %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
+ ret i8 %f
+}
+
+define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
+ ret i32 %f
+}
+
+define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
+ ret i32 %f
+}
+
+define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+ ret <4 x i32> %f
+}
+
+define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
+ %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+ ret <4 x i32> %f
+}
+
OpenPOWER on IntegriCloud