summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp19
-rw-r--r--llvm/test/CodeGen/X86/pr43820.ll383
2 files changed, 392 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 757f3911b11..0e193ba383b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -365,15 +365,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
- const TargetLowering &TLI,
+static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If the value won't fit in the prefered type, just use something safe. It
- // will be legalized when the shift is expanded.
- if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. It will be legalized when the shift is expanded.
+ if (!ShiftVT.isVector() &&
+ ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
ShiftVT = MVT::i32;
return ShiftVT;
}
@@ -385,7 +385,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -397,7 +397,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -1058,8 +1058,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
- TLI, DAG);
+ EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
diff --git a/llvm/test/CodeGen/X86/pr43820.ll b/llvm/test/CodeGen/X86/pr43820.ll
new file mode 100644
index 00000000000..5bdf7872d61
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr43820.ll
@@ -0,0 +1,383 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define i1000 @square(i1000 %A) nounwind {
+; CHECK-LABEL: square:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT: bswapq %rbx
+; CHECK-NEXT: movabsq $1085102592571150095, %rdi # imm = 0xF0F0F0F0F0F0F0F
+; CHECK-NEXT: movq %rbx, %rbp
+; CHECK-NEXT: andq %rdi, %rbp
+; CHECK-NEXT: shlq $4, %rbp
+; CHECK-NEXT: movabsq $-1085102592571150096, %r11 # imm = 0xF0F0F0F0F0F0F0F0
+; CHECK-NEXT: andq %r11, %rbx
+; CHECK-NEXT: movq %r11, %rax
+; CHECK-NEXT: shrq $4, %rbx
+; CHECK-NEXT: orq %rbp, %rbx
+; CHECK-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
+; CHECK-NEXT: movq %rbx, %r14
+; CHECK-NEXT: andq %r11, %r14
+; CHECK-NEXT: movabsq $-3689348814741910324, %rbp # imm = 0xCCCCCCCCCCCCCCCC
+; CHECK-NEXT: andq %rbp, %rbx
+; CHECK-NEXT: movq %rbp, %r15
+; CHECK-NEXT: shrq $2, %rbx
+; CHECK-NEXT: leaq (%rbx,%r14,4), %r14
+; CHECK-NEXT: movabsq $6148914691230924800, %rbx # imm = 0x5555555555000000
+; CHECK-NEXT: andq %r14, %rbx
+; CHECK-NEXT: movabsq $-6148914691247702016, %rbp # imm = 0xAAAAAAAAAA000000
+; CHECK-NEXT: andq %r14, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%rbx,2), %rbx
+; CHECK-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: bswapq %r10
+; CHECK-NEXT: movq %r10, %rbx
+; CHECK-NEXT: andq %rdi, %rbx
+; CHECK-NEXT: shlq $4, %rbx
+; CHECK-NEXT: andq %rax, %r10
+; CHECK-NEXT: shrq $4, %r10
+; CHECK-NEXT: orq %rbx, %r10
+; CHECK-NEXT: movq %r10, %rbx
+; CHECK-NEXT: andq %r11, %rbx
+; CHECK-NEXT: andq %r15, %r10
+; CHECK-NEXT: shrq $2, %r10
+; CHECK-NEXT: leaq (%r10,%rbx,4), %rbp
+; CHECK-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: movabsq $-6148914691236517206, %r13 # imm = 0xAAAAAAAAAAAAAAAA
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %rax, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %rax, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %rax, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %rax, %rbp
+; CHECK-NEXT: movq %rax, %r14
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %r14, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %r14, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %r14, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %r14, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT: bswapq %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: shlq $4, %r10
+; CHECK-NEXT: andq %r14, %rbp
+; CHECK-NEXT: shrq $4, %rbp
+; CHECK-NEXT: orq %r10, %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %r11, %r10
+; CHECK-NEXT: andq %r15, %rbp
+; CHECK-NEXT: shrq $2, %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT: movq %rbp, %r10
+; CHECK-NEXT: andq %rbx, %r10
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: bswapq %r9
+; CHECK-NEXT: movq %r9, %rbp
+; CHECK-NEXT: andq %rdi, %rbp
+; CHECK-NEXT: shlq $4, %rbp
+; CHECK-NEXT: andq %r14, %r9
+; CHECK-NEXT: shrq $4, %r9
+; CHECK-NEXT: orq %rbp, %r9
+; CHECK-NEXT: movq %r9, %rbp
+; CHECK-NEXT: andq %r11, %rbp
+; CHECK-NEXT: andq %r15, %r9
+; CHECK-NEXT: shrq $2, %r9
+; CHECK-NEXT: leaq (%r9,%rbp,4), %rbp
+; CHECK-NEXT: movq %rbp, %r9
+; CHECK-NEXT: andq %rbx, %r9
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r9,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: bswapq %r8
+; CHECK-NEXT: movq %r8, %rbp
+; CHECK-NEXT: andq %rdi, %rbp
+; CHECK-NEXT: shlq $4, %rbp
+; CHECK-NEXT: andq %r14, %r8
+; CHECK-NEXT: shrq $4, %r8
+; CHECK-NEXT: orq %rbp, %r8
+; CHECK-NEXT: movq %r8, %rbp
+; CHECK-NEXT: andq %r11, %rbp
+; CHECK-NEXT: andq %r15, %r8
+; CHECK-NEXT: movq %r15, %r9
+; CHECK-NEXT: shrq $2, %r8
+; CHECK-NEXT: leaq (%r8,%rbp,4), %rbp
+; CHECK-NEXT: movq %rbp, %r8
+; CHECK-NEXT: andq %rbx, %r8
+; CHECK-NEXT: andq %r13, %rbp
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: leaq (%rbp,%r8,2), %rbp
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: bswapq %rcx
+; CHECK-NEXT: movq %rcx, %rbp
+; CHECK-NEXT: andq %rdi, %rbp
+; CHECK-NEXT: shlq $4, %rbp
+; CHECK-NEXT: andq %r14, %rcx
+; CHECK-NEXT: shrq $4, %rcx
+; CHECK-NEXT: orq %rbp, %rcx
+; CHECK-NEXT: movq %rcx, %rbp
+; CHECK-NEXT: andq %r11, %rbp
+; CHECK-NEXT: andq %r15, %rcx
+; CHECK-NEXT: shrq $2, %rcx
+; CHECK-NEXT: leaq (%rcx,%rbp,4), %rcx
+; CHECK-NEXT: movq %rcx, %rbp
+; CHECK-NEXT: andq %rbx, %rbp
+; CHECK-NEXT: andq %r13, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: leaq (%rcx,%rbp,2), %r15
+; CHECK-NEXT: bswapq %rdx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: andq %rdi, %rbp
+; CHECK-NEXT: shlq $4, %rbp
+; CHECK-NEXT: andq %r14, %rdx
+; CHECK-NEXT: shrq $4, %rdx
+; CHECK-NEXT: orq %rbp, %rdx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: andq %r11, %rbp
+; CHECK-NEXT: andq %r9, %rdx
+; CHECK-NEXT: shrq $2, %rdx
+; CHECK-NEXT: leaq (%rdx,%rbp,4), %rdx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: andq %rbx, %rbp
+; CHECK-NEXT: andq %r13, %rdx
+; CHECK-NEXT: shrq %rdx
+; CHECK-NEXT: leaq (%rdx,%rbp,2), %rdx
+; CHECK-NEXT: bswapq %rsi
+; CHECK-NEXT: andq %rsi, %rdi
+; CHECK-NEXT: andq %r14, %rsi
+; CHECK-NEXT: shlq $4, %rdi
+; CHECK-NEXT: shrq $4, %rsi
+; CHECK-NEXT: orq %rdi, %rsi
+; CHECK-NEXT: andq %rsi, %r11
+; CHECK-NEXT: andq %r9, %rsi
+; CHECK-NEXT: shrq $2, %rsi
+; CHECK-NEXT: leaq (%rsi,%r11,4), %rsi
+; CHECK-NEXT: andq %rsi, %rbx
+; CHECK-NEXT: andq %r13, %rsi
+; CHECK-NEXT: shrq %rsi
+; CHECK-NEXT: leaq (%rsi,%rbx,2), %r13
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rax, %r11
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rcx, %rax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rbp, %rcx
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %r12, %rbp
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %r14, %r12
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rbx, %r14
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %r10, %rbx
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %r9, %r10
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %r8, %r9
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rdi, %r8
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rsi, %rdi
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT: shrdq $24, %rax, %rsi
+; CHECK-NEXT: shrdq $24, %r15, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrdq $24, %rdx, %r15
+; CHECK-NEXT: shrdq $24, %r13, %rdx
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT: movq %rdx, 112(%rax)
+; CHECK-NEXT: movq %r15, 104(%rax)
+; CHECK-NEXT: movq %rcx, 96(%rax)
+; CHECK-NEXT: movq %rsi, 88(%rax)
+; CHECK-NEXT: movq %rdi, 80(%rax)
+; CHECK-NEXT: movq %r8, 72(%rax)
+; CHECK-NEXT: movq %r9, 64(%rax)
+; CHECK-NEXT: movq %r10, 56(%rax)
+; CHECK-NEXT: movq %rbx, 48(%rax)
+; CHECK-NEXT: movq %r14, 40(%rax)
+; CHECK-NEXT: movq %r12, 32(%rax)
+; CHECK-NEXT: movq %rbp, 24(%rax)
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT: movq %rcx, 16(%rax)
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT: movq %rcx, 8(%rax)
+; CHECK-NEXT: movq %r11, (%rax)
+; CHECK-NEXT: movq %r13, %rcx
+; CHECK-NEXT: shrq $56, %r13
+; CHECK-NEXT: movb %r13b, 124(%rax)
+; CHECK-NEXT: shrq $24, %rcx
+; CHECK-NEXT: movl %ecx, 120(%rax)
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+ %Z = call i1000 @llvm.bitreverse.i1000(i1000 %A)
+ ret i1000 %Z
+}
+
+declare i1000 @llvm.bitreverse.i1000(i1000)
OpenPOWER on IntegriCloud