diff options
| author | Nirav Dave <niravd@google.com> | 2017-06-30 12:56:02 +0000 |
|---|---|---|
| committer | Nirav Dave <niravd@google.com> | 2017-06-30 12:56:02 +0000 |
| commit | a35938d827eb4539c27fe18001b21243dc662cce (patch) | |
| tree | 3d15bb77137778b2359f3ed1a8983d65a60677b6 | |
| parent | c5a48c1ee8a5b91dd91871a7be5e597fe4400918 (diff) | |
| download | bcm5719-llvm-a35938d827eb4539c27fe18001b21243dc662cce.tar.gz bcm5719-llvm-a35938d827eb4539c27fe18001b21243dc662cce.zip | |
Revert "[DAG] Rewrite areNonVolatileConsecutiveLoads to use BaseIndexOffset"
This reverts commit r306819 which appears be exposing underlying
issues in a stage1 ppc64be build
llvm-svn: 306820
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 47 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/BPF/undef.ll | 54 | ||||
| -rw-r--r-- | llvm/test/CodeGen/MSP430/Inst16mm.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bswap-wide-int.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/build-vector-128.ll | 23 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/build-vector-256.ll | 29 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/build-vector-512.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll | 236 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/wide-integer-cmp.ll | 2 |
10 files changed, 254 insertions, 183 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f41323bd08b..98553152117 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" -#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -7631,13 +7630,45 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, SDValue Loc = LD->getOperand(1); SDValue BaseLoc = Base->getOperand(1); - - auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); - auto LocDecomp = BaseIndexOffset::match(Loc, *this); - - int64_t Offset = 0; - if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) - return (Dist * Bytes == Offset); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI.getObjectSize(FI); + int BFS = MFI.getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); + } + + // Handle X + C. + if (isBaseWithConstantOffset(Loc)) { + int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc) { + // If the base location is a simple address with no offset itself, then + // the second load's first add operand should be the base address. + if (LocOffset == Dist * (int)Bytes) + return true; + } else if (isBaseWithConstantOffset(BaseLoc)) { + // The base location itself has an offset, so subtract that value from the + // second load's offset before comparing to distance * size. + int64_t BOffset = + cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { + if ((LocOffset - BOffset) == Dist * (int)Bytes) + return true; + } + } + } + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index cd256efd4c3..4e899ae6668 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -60,18 +60,12 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { int64_t Offset = 0; bool IsIndexSignExt = false; - // Consume constant adds & ors with appropriate masking. - while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { - if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { - // Only consider ORs which act as adds. - if (Base->getOpcode() == ISD::OR && - !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) - break; - Offset += C->getSExtValue(); - Base = Base->getOperand(0); - continue; - } - break; + // Consume constant adds + while (Base->getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Base->getOperand(1))) { + int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue(); + Offset += POffset; + Base = Base->getOperand(0); } if (Base->getOpcode() == ISD::ADD) { diff --git a/llvm/test/CodeGen/BPF/undef.ll b/llvm/test/CodeGen/BPF/undef.ll index d888a835e6d..de14bfde1ab 100644 --- a/llvm/test/CodeGen/BPF/undef.ll +++ b/llvm/test/CodeGen/BPF/undef.ll @@ -13,30 +13,36 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { -; CHECK: r1 = r10 -; CHECK: r1 += -2 -; CHECK: r2 = 0 -; CHECK: *(u16 *)(r1 + 6) = r2 -; CHECK: *(u16 *)(r1 + 4) = r2 -; CHECK: *(u16 *)(r1 + 2) = r2 -; CHECK: r1 = 134678021 -; CHECK: *(u32 *)(r10 - 8) = r1 -; CHECK: r1 = 9 -; CHECK: *(u8 *)(r10 - 4) = r1 -; CHECK: r1 = 10 -; CHECK: *(u8 *)(r10 - 3) = r1 -; CHECK: *(u16 *)(r10 + 24) = r2 -; CHECK: *(u16 *)(r10 + 22) = r2 -; CHECK: *(u16 *)(r10 + 20) = r2 -; CHECK: *(u16 *)(r10 + 18) = r2 -; CHECK: *(u16 *)(r10 + 16) = r2 -; CHECK: *(u16 *)(r10 + 14) = r2 -; CHECK: *(u16 *)(r10 + 12) = r2 -; CHECK: *(u16 *)(r10 + 10) = r2 -; CHECK: *(u16 *)(r10 + 8) = r2 -; CHECK: *(u16 *)(r10 + 6) = r2 -; CHECK: *(u16 *)(r10 - 2) = r2 -; CHECK: *(u16 *)(r10 + 26) = r2 +; CHECK: r2 = r10 +; CHECK: r2 += -2 +; CHECK: r1 = 0 +; CHECK: *(u16 *)(r2 + 6) = r1 +; CHECK: *(u16 *)(r2 + 4) = r1 +; CHECK: *(u16 *)(r2 + 2) = r1 +; CHECK: r2 = 6 +; CHECK: *(u8 *)(r10 - 7) = r2 +; CHECK: r2 = 5 +; CHECK: *(u8 *)(r10 - 8) = r2 +; CHECK: r2 = 7 +; CHECK: *(u8 *)(r10 - 6) = r2 +; CHECK: r2 = 8 +; CHECK: *(u8 *)(r10 - 5) = r2 +; CHECK: r2 = 9 +; CHECK: *(u8 *)(r10 - 4) = r2 +; CHECK: r2 = 10 +; CHECK: *(u8 *)(r10 - 3) = r2 +; CHECK: *(u16 *)(r10 + 24) = r1 +; CHECK: *(u16 *)(r10 + 22) = r1 +; CHECK: *(u16 *)(r10 + 20) = r1 +; CHECK: *(u16 *)(r10 + 18) = r1 +; CHECK: *(u16 *)(r10 + 16) = r1 +; CHECK: *(u16 *)(r10 + 14) = r1 +; CHECK: *(u16 *)(r10 + 12) = r1 +; CHECK: *(u16 *)(r10 + 10) = r1 +; CHECK: *(u16 *)(r10 + 8) = r1 +; CHECK: *(u16 *)(r10 + 6) = r1 +; CHECK: *(u16 *)(r10 - 2) = r1 +; CHECK: *(u16 *)(r10 + 26) = r1 ; CHECK: r2 = r10 ; CHECK: r2 += -8 ; CHECK: r1 = <MCOperand Expr:(routing)>ll diff --git a/llvm/test/CodeGen/MSP430/Inst16mm.ll b/llvm/test/CodeGen/MSP430/Inst16mm.ll index 14a799b9171..951002d60a0 100644 --- a/llvm/test/CodeGen/MSP430/Inst16mm.ll +++ b/llvm/test/CodeGen/MSP430/Inst16mm.ll @@ -64,6 +64,6 @@ entry: %0 = load i16, i16* %retval ; <i16> [#uses=1] ret i16 %0 ; CHECK-LABEL: mov2: -; CHECK-DAG: mov.w 2(r1), 6(r1) -; CHECK-DAG: mov.w 0(r1), 4(r1) +; CHECK: mov.w 0(r1), 4(r1) +; CHECK: mov.w 2(r1), 6(r1) } diff --git a/llvm/test/CodeGen/X86/bswap-wide-int.ll b/llvm/test/CodeGen/X86/bswap-wide-int.ll index 858dbf5fd85..db48eb80de4 100644 --- a/llvm/test/CodeGen/X86/bswap-wide-int.ll +++ b/llvm/test/CodeGen/X86/bswap-wide-int.ll @@ -71,8 +71,8 @@ define i128 @bswap_i128(i128 %a0) nounwind { ; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-MOVBE-NEXT: movbel %esi, 12(%eax) ; X86-MOVBE-NEXT: movbel %edi, 8(%eax) -; X86-MOVBE-NEXT: movbel %edx, 4(%eax) -; X86-MOVBE-NEXT: movbel %ecx, (%eax) +; X86-MOVBE-NEXT: movbel %ecx, 4(%eax) +; X86-MOVBE-NEXT: movbel %edx, (%eax) ; X86-MOVBE-NEXT: popl %esi ; X86-MOVBE-NEXT: popl %edi ; X86-MOVBE-NEXT: retl $4 diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll index 531c6de5f90..c73d7654045 100644 --- a/llvm/test/CodeGen/X86/build-vector-128.ll +++ b/llvm/test/CodeGen/X86/build-vector-128.ll @@ -72,10 +72,12 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa } define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { -; SSE-32-LABEL: test_buildvector_v2i64: -; SSE-32: # BB#0: -; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 -; SSE-32-NEXT: retl +; SSE2-32-LABEL: test_buildvector_v2i64: +; SSE2-32: # BB#0: +; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-32-NEXT: retl ; ; SSE-64-LABEL: test_buildvector_v2i64: ; SSE-64: # BB#0: @@ -84,9 +86,20 @@ define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-64-NEXT: retq ; +; SSE41-32-LABEL: test_buildvector_v2i64: +; SSE41-32: # BB#0: +; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 +; SSE41-32-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0 +; SSE41-32-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 +; SSE41-32-NEXT: retl +; ; AVX-32-LABEL: test_buildvector_v2i64: ; AVX-32: # BB#0: -; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: test_buildvector_v2i64: diff --git a/llvm/test/CodeGen/X86/build-vector-256.ll b/llvm/test/CodeGen/X86/build-vector-256.ll index 942b7779abe..1ced1fc3a38 100644 --- a/llvm/test/CodeGen/X86/build-vector-256.ll +++ b/llvm/test/CodeGen/X86/build-vector-256.ll @@ -51,10 +51,18 @@ define <8 x float> @test_buildvector_v8f32(float %a0, float %a1, float %a2, floa } define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { -; AVX-32-LABEL: test_buildvector_v4i64: -; AVX-32: # BB#0: -; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %ymm0 -; AVX-32-NEXT: retl +; AVX1-32-LABEL: test_buildvector_v4i64: +; AVX1-32: # BB#0: +; AVX1-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX1-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-32-NEXT: retl ; ; AVX1-64-LABEL: test_buildvector_v4i64: ; AVX1-64: # BB#0: @@ -67,6 +75,19 @@ define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { ; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-64-NEXT: retq ; +; AVX2-32-LABEL: test_buildvector_v4i64: +; AVX2-32: # BB#0: +; AVX2-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX2-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX2-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-32-NEXT: retl +; ; AVX2-64-LABEL: test_buildvector_v4i64: ; AVX2-64: # BB#0: ; AVX2-64-NEXT: vmovq %rcx, %xmm0 diff --git a/llvm/test/CodeGen/X86/build-vector-512.ll b/llvm/test/CodeGen/X86/build-vector-512.ll index fbfbf2d53c6..21737cca93a 100644 --- a/llvm/test/CodeGen/X86/build-vector-512.ll +++ b/llvm/test/CodeGen/X86/build-vector-512.ll @@ -79,7 +79,25 @@ define <16 x float> @test_buildvector_v16f32(float %a0, float %a1, float %a2, fl define <8 x i64> @test_buildvector_v8i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { ; AVX-32-LABEL: test_buildvector_v8i64: ; AVX-32: # BB#0: -; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %zmm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; AVX-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 +; AVX-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: test_buildvector_v8i64: diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll index e2a4368b255..a6bc5aa321f 100644 --- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -1063,89 +1063,87 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; ; AVX1-LABEL: _clearupper32xi8b: ; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: pushq %r15 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: pushq %r12 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: vmovq %xmm0, %rcx -; AVX1-NEXT: movq %rcx, %r8 -; AVX1-NEXT: movq %rcx, %r9 -; AVX1-NEXT: movq %rcx, %r10 -; AVX1-NEXT: movq %rcx, %r11 -; AVX1-NEXT: movq %rcx, %r14 -; AVX1-NEXT: movq %rcx, %r15 +; AVX1-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r14 ; AVX1-NEXT: vpextrq $1, %xmm0, %rdx -; AVX1-NEXT: movq %rdx, %r12 -; AVX1-NEXT: movq %rdx, %r13 -; AVX1-NEXT: movq %rdx, %rbx -; AVX1-NEXT: movq %rdx, %rax -; AVX1-NEXT: movq %rdx, %rdi +; AVX1-NEXT: movq %rdx, %r8 +; AVX1-NEXT: movq %rdx, %r9 +; AVX1-NEXT: movq %rdx, %r11 ; AVX1-NEXT: movq %rdx, %rsi -; AVX1-NEXT: movq %rdx, %rbp +; AVX1-NEXT: movq %rdx, %rdi +; AVX1-NEXT: movq %rdx, %rcx +; AVX1-NEXT: movq %rdx, %rax ; AVX1-NEXT: andb $15, %dl ; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %rcx, %rdx +; AVX1-NEXT: shrq $56, %rax +; AVX1-NEXT: andb $15, %al +; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq %r14, %r10 +; AVX1-NEXT: shrq $48, %rcx ; AVX1-NEXT: andb $15, %cl ; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $56, %rbp -; AVX1-NEXT: andb $15, %bpl -; AVX1-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $48, %rsi +; AVX1-NEXT: movq %r14, %rdx +; AVX1-NEXT: shrq $40, %rdi +; AVX1-NEXT: andb $15, %dil +; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq %r14, %rax +; AVX1-NEXT: shrq $32, %rsi ; AVX1-NEXT: andb $15, %sil ; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $40, %rdi +; AVX1-NEXT: movq %r14, %rcx +; AVX1-NEXT: shrq $24, %r11 +; AVX1-NEXT: andb $15, %r11b +; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq %r14, %rsi +; AVX1-NEXT: shrq $16, %r9 +; AVX1-NEXT: andb $15, %r9b +; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq %r14, %rdi +; AVX1-NEXT: shrq $8, %r8 +; AVX1-NEXT: andb $15, %r8b +; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq %r14, %rbx +; AVX1-NEXT: andb $15, %r14b +; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $8, %r10 +; AVX1-NEXT: shrq $16, %rdx +; AVX1-NEXT: shrq $24, %rax +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: shrq $40, %rsi +; AVX1-NEXT: shrq $48, %rdi +; AVX1-NEXT: shrq $56, %rbx +; AVX1-NEXT: andb $15, %bl +; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: andb $15, %dil ; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $32, %rax +; AVX1-NEXT: andb $15, %sil +; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andb $15, %cl +; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: andb $15, %al ; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $24, %rbx -; AVX1-NEXT: andb $15, %bl -; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $16, %r13 -; AVX1-NEXT: andb $15, %r13b -; AVX1-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $8, %r12 -; AVX1-NEXT: andb $15, %r12b -; AVX1-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $8, %r8 -; AVX1-NEXT: shrq $16, %r9 -; AVX1-NEXT: shrq $24, %r10 -; AVX1-NEXT: shrq $32, %r11 -; AVX1-NEXT: shrq $40, %r14 -; AVX1-NEXT: shrq $48, %r15 -; AVX1-NEXT: shrq $56, %rdx ; AVX1-NEXT: andb $15, %dl ; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %r15b -; AVX1-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %r14b -; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %r11b -; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: andb $15, %r10b ; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %r9b -; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %r8b -; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: movq %rax, %r8 ; AVX1-NEXT: movq %rax, %rdx ; AVX1-NEXT: movq %rax, %rsi ; AVX1-NEXT: movq %rax, %rdi -; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: movl %eax, %ebx +; AVX1-NEXT: movl %eax, %ecx ; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: shrl $8, %eax ; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX1-NEXT: shrl $16, %ebx -; AVX1-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1 -; AVX1-NEXT: shrl $24, %ebp -; AVX1-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: shrl $24, %ebx +; AVX1-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 ; AVX1-NEXT: shrq $32, %rdi ; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 ; AVX1-NEXT: shrq $40, %rsi @@ -1155,8 +1153,8 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; AVX1-NEXT: shrq $48, %rdx ; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: shrq $56, %rcx -; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 +; AVX1-NEXT: shrq $56, %r8 +; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 ; AVX1-NEXT: movl %eax, %ecx ; AVX1-NEXT: shrl $8, %ecx ; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 @@ -1224,98 +1222,92 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: popq %r12 -; AVX1-NEXT: popq %r13 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: popq %r15 -; AVX1-NEXT: popq %rbp ; AVX1-NEXT: retq ; ; AVX2-LABEL: _clearupper32xi8b: ; AVX2: # BB#0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: pushq %r15 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: vmovq %xmm0, %rcx -; AVX2-NEXT: movq %rcx, %r8 -; AVX2-NEXT: movq %rcx, %r9 -; AVX2-NEXT: movq %rcx, %r10 -; AVX2-NEXT: movq %rcx, %r11 -; AVX2-NEXT: movq %rcx, %r14 -; AVX2-NEXT: movq %rcx, %r15 +; AVX2-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r14 ; AVX2-NEXT: vpextrq $1, %xmm0, %rdx -; AVX2-NEXT: movq %rdx, %r12 -; AVX2-NEXT: movq %rdx, %r13 -; AVX2-NEXT: movq %rdx, %rbx -; AVX2-NEXT: movq %rdx, %rax -; AVX2-NEXT: movq %rdx, %rdi +; AVX2-NEXT: movq %rdx, %r8 +; AVX2-NEXT: movq %rdx, %r9 +; AVX2-NEXT: movq %rdx, %r11 ; AVX2-NEXT: movq %rdx, %rsi -; AVX2-NEXT: movq %rdx, %rbp +; AVX2-NEXT: movq %rdx, %rdi +; AVX2-NEXT: movq %rdx, %rcx +; AVX2-NEXT: movq %rdx, %rax ; AVX2-NEXT: andb $15, %dl ; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rcx, %rdx +; AVX2-NEXT: shrq $56, %rax +; AVX2-NEXT: andb $15, %al +; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r14, %r10 +; AVX2-NEXT: shrq $48, %rcx ; AVX2-NEXT: andb $15, %cl ; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $56, %rbp -; AVX2-NEXT: andb $15, %bpl -; AVX2-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $48, %rsi +; AVX2-NEXT: movq %r14, %rdx +; AVX2-NEXT: shrq $40, %rdi +; AVX2-NEXT: andb $15, %dil +; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r14, %rax +; AVX2-NEXT: shrq $32, %rsi ; AVX2-NEXT: andb $15, %sil ; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $40, %rdi +; AVX2-NEXT: movq %r14, %rcx +; AVX2-NEXT: shrq $24, %r11 +; AVX2-NEXT: andb $15, %r11b +; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r14, %rsi +; AVX2-NEXT: shrq $16, %r9 +; AVX2-NEXT: andb $15, %r9b +; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r14, %rdi +; AVX2-NEXT: shrq $8, %r8 +; AVX2-NEXT: andb $15, %r8b +; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %r14, %rbx +; AVX2-NEXT: andb $15, %r14b +; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $8, %r10 +; AVX2-NEXT: shrq $16, %rdx +; AVX2-NEXT: shrq $24, %rax +; AVX2-NEXT: shrq $32, %rcx +; AVX2-NEXT: shrq $40, %rsi +; AVX2-NEXT: shrq $48, %rdi +; AVX2-NEXT: shrq $56, %rbx +; AVX2-NEXT: andb $15, %bl +; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: andb $15, %dil ; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $32, %rax +; AVX2-NEXT: andb $15, %sil +; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: andb $15, %cl +; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: andb $15, %al ; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $24, %rbx -; AVX2-NEXT: andb $15, %bl -; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $16, %r13 -; AVX2-NEXT: andb $15, %r13b -; AVX2-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $8, %r12 -; AVX2-NEXT: andb $15, %r12b -; AVX2-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $8, %r8 -; AVX2-NEXT: shrq $16, %r9 -; AVX2-NEXT: shrq $24, %r10 -; AVX2-NEXT: shrq $32, %r11 -; AVX2-NEXT: shrq $40, %r14 -; AVX2-NEXT: shrq $48, %r15 -; AVX2-NEXT: shrq $56, %rdx ; AVX2-NEXT: andb $15, %dl ; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %r15b -; AVX2-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %r14b -; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %r11b -; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: andb $15, %r10b ; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %r9b -; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %r8b -; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax -; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: movq %rax, %r8 ; AVX2-NEXT: movq %rax, %rdx ; AVX2-NEXT: movq %rax, %rsi ; AVX2-NEXT: movq %rax, %rdi -; AVX2-NEXT: movl %eax, %ebp ; AVX2-NEXT: movl %eax, %ebx +; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: shrl $8, %eax ; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX2-NEXT: shrl $16, %ebx -; AVX2-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1 -; AVX2-NEXT: shrl $24, %ebp -; AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 +; AVX2-NEXT: shrl $16, %ecx +; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: shrl $24, %ebx +; AVX2-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 ; AVX2-NEXT: shrq $32, %rdi ; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 ; AVX2-NEXT: shrq $40, %rsi @@ -1325,8 +1317,8 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; AVX2-NEXT: shrq $48, %rdx ; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax -; AVX2-NEXT: shrq $56, %rcx -; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 +; AVX2-NEXT: shrq $56, %r8 +; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $8, %ecx ; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 @@ -1394,11 +1386,7 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %r12 -; AVX2-NEXT: popq %r13 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: popq %r15 -; AVX2-NEXT: popq %rbp ; AVX2-NEXT: retq %x4 = bitcast <32 x i8> %0 to <64 x i4> %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll index 182d7cc73c9..b5c7f86567a 100644 --- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll +++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll @@ -101,8 +101,8 @@ define i32 @test_wide(i128 %a, i128 %b) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: jge .LBB4_2 ; CHECK-NEXT: # BB#1: # %bb1 ; CHECK-NEXT: movl $1, %eax |

