summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp56
-rw-r--r--llvm/test/CodeGen/AMDGPU/calling-conventions.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll54
-rw-r--r--llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll22
-rw-r--r--llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll3
-rw-r--r--llvm/test/CodeGen/Hexagon/subi-asl.ll5
-rw-r--r--llvm/test/CodeGen/X86/scheduler-backtracking.ll221
-rw-r--r--llvm/test/CodeGen/X86/signbit-shift.ll5
-rw-r--r--llvm/test/CodeGen/X86/split-store.ll7
9 files changed, 203 insertions, 176 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4448b6b06d3..29926a49cc1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -341,7 +341,8 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
- SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitADDLike(SDNode *N);
+ SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDSAT(SDNode *N);
SDValue visitSUBSAT(SDNode *N);
@@ -2111,7 +2112,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
}
-SDValue DAGCombiner::visitADD(SDNode *N) {
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
@@ -2264,20 +2268,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
N0.getOperand(1));
}
- if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
- return V;
-
- if (SDValue V = foldAddSubOfSignBit(N, DAG))
- return V;
-
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- // fold (a+b) -> (a|b) iff a and b share no bits.
- if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- DAG.haveNoCommonBitsSet(N0, N1))
- return DAG.getNode(ISD::OR, DL, VT, N0, N1);
-
if (isOneOrOneSplat(N1)) {
// fold (add (xor a, -1), 1) -> (sub 0, a)
if (isBitwiseNot(N0))
@@ -2303,15 +2296,38 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
- if (SDValue Combined = visitADDLike(N0, N1, N))
+ if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
return Combined;
- if (SDValue Combined = visitADDLike(N1, N0, N))
+ if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
return Combined;
return SDValue();
}
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADDSAT(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
@@ -2414,7 +2430,9 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
}
-SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
@@ -5546,6 +5564,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // If OR can be rewritten into ADD, try combines based on ADD.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 748222529d7..4c148d938fe 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -171,9 +171,9 @@ define amdgpu_ps <2 x half> @ps_mesa_inreg_v2f16(<2 x half> inreg %arg0) {
; SI: v_lshlrev_b32_e32 v1, 16, v1
; SI: v_add_i32_e32 v0, vcc, 1, v0
-; SI: v_add_i32_e32 v1, vcc, 0x10000, v1
; SI: v_and_b32
; SI: v_or_b32
+; SI: v_add_i32_e32 v0, vcc, 0x10000, v0
define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {
%add = add <2 x i16> %arg0, <i16 1, i16 1>
store <2 x i16> %add, <2 x i16> addrspace(1)* undef
@@ -183,16 +183,16 @@ define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {
; GCN-LABEL: {{^}}ps_mesa_inreg_v2i16:
; VI: s_and_b32 s1, s0, 0xffff0000
; VI: s_add_i32 s0, s0, 1
-; VI: s_add_i32 s1, s1, 0x10000
; VI: s_and_b32 s0, s0, 0xffff
; VI: s_or_b32 s0, s0, s1
+; VI: s_add_i32 s0, s0, 0x10000
; VI: v_mov_b32_e32 v0, s0
; SI: s_lshl_b32 s1, s1, 16
; SI: s_add_i32 s0, s0, 1
-; SI: s_add_i32 s1, s1, 0x10000
; SI: s_and_b32 s0, s0, 0xffff
; SI: s_or_b32 s0, s0, s1
+; SI: s_add_i32 s0, s0, 0x10000
define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
%add = add <2 x i16> %arg0, <i16 1, i16 1>
store <2 x i16> %add, <2 x i16> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index b2b5ae01557..53277027f58 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -276,34 +276,33 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
; SI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
-; SI-NEXT: s_movk_i32 s12, 0x900
+; SI-NEXT: s_movk_i32 s12, 0xff
; SI-NEXT: s_mov_b32 s10, s2
; SI-NEXT: s_mov_b32 s11, s3
-; SI-NEXT: s_movk_i32 s13, 0xff
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1
-; SI-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; SI-NEXT: v_and_b32_e32 v6, 0xff00, v1
; SI-NEXT: v_add_i32_e32 v7, vcc, 9, v1
+; SI-NEXT: v_and_b32_e32 v6, 0xff00, v1
+; SI-NEXT: v_lshrrev_b32_e32 v5, 24, v1
; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v1
; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1
; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v6
-; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4
+; SI-NEXT: v_and_b32_e32 v7, s12, v7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
-; SI-NEXT: v_add_i32_e32 v6, vcc, s12, v6
-; SI-NEXT: v_and_b32_e32 v7, s13, v7
; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_add_i32_e32 v1, vcc, s12, v5
-; SI-NEXT: v_and_b32_e32 v2, s13, v4
-; SI-NEXT: v_or_b32_e32 v0, v7, v6
-; SI-NEXT: v_or_b32_e32 v1, v2, v1
-; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_or_b32_e32 v0, v0, v1
+; SI-NEXT: v_or_b32_e32 v1, v7, v6
+; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
+; SI-NEXT: v_and_b32_e32 v0, s12, v4
+; SI-NEXT: v_or_b32_e32 v0, v0, v5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x900, v1
+; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; SI-NEXT: v_or_b32_e32 v0, v1, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
@@ -313,7 +312,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
-; VI-NEXT: s_movk_i32 s8, 0x900
+; VI-NEXT: v_mov_b32_e32 v4, 9
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
@@ -323,23 +322,24 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s6, s2
; VI-NEXT: s_mov_b32 s7, s3
-; VI-NEXT: v_mov_b32_e32 v4, 9
+; VI-NEXT: s_movk_i32 s8, 0x900
+; VI-NEXT: v_mov_b32_e32 v6, s8
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v5
+; VI-NEXT: v_lshrrev_b32_e32 v7, 24, v5
; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v5
; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v5
; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v5
; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v5
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v5
-; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6
-; VI-NEXT: v_add_u16_e32 v8, 9, v5
-; VI-NEXT: v_add_u16_e32 v0, s8, v7
-; VI-NEXT: v_add_u16_e32 v1, s8, v1
-; VI-NEXT: v_add_u16_sdwa v2, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_and_b32_e32 v8, 0xffffff00, v5
+; VI-NEXT: v_add_u16_e32 v9, 9, v5
+; VI-NEXT: v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v7
+; VI-NEXT: v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT: v_add_u16_e32 v0, s8, v0
+; VI-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 598048dcff9..61b03750e7e 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -14,9 +14,9 @@
; CIVI: s_max_i32
; CIVI: s_max_i32
; CIVI: s_add_i32
-; CIVI: s_add_i32
-; CIVI: s_and_b32
-; CIVI: s_or_b32
+; CIVI-DAG: s_add_i32
+; CIVI-DAG: s_and_b32
+; CIVI-DAG: s_or_b32
define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
%neg = sub <2 x i16> zeroinitializer, %val
%cond = icmp sgt <2 x i16> %val, %neg
@@ -45,14 +45,14 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
; CI: buffer_load_dword v
; CI: v_lshrrev_b32_e32
-; CI: v_sub_i32_e32
-; CI: v_bfe_i32
-; CI: v_bfe_i32
-; CI: v_max_i32
-; CI: v_max_i32
-; CI: v_add_i32
-; CI: v_add_i32
-; CI: v_or_b32
+; CI-DAG: v_sub_i32_e32
+; CI-DAG: v_bfe_i32
+; CI-DAG: v_bfe_i32
+; CI-DAG: v_max_i32
+; CI-DAG: v_max_i32
+; CI-DAG: v_add_i32
+; CI-DAG: v_add_i32
+; CI-DAG: v_or_b32
define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %src, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
index c5c4476d20f..a3216422f18 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
@@ -213,12 +213,11 @@ define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg)
; SI-NEXT: s_load_dword s0, s[0:1], 0x0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_and_b32 s1, s0, 0xff00
-; SI-NEXT: s_and_b32 s0, s0, 0xffff
; SI-NEXT: s_add_i32 s0, s0, 12
; SI-NEXT: s_or_b32 s0, s0, 4
-; SI-NEXT: s_addk_i32 s1, 0x2c00
; SI-NEXT: s_and_b32 s0, s0, 0xff
; SI-NEXT: s_or_b32 s0, s0, s1
+; SI-NEXT: s_addk_i32 s0, 0x2c00
; SI-NEXT: s_or_b32 s0, s0, 0x300
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
diff --git a/llvm/test/CodeGen/Hexagon/subi-asl.ll b/llvm/test/CodeGen/Hexagon/subi-asl.ll
index d7610ceb62a..0fd88384b89 100644
--- a/llvm/test/CodeGen/Hexagon/subi-asl.ll
+++ b/llvm/test/CodeGen/Hexagon/subi-asl.ll
@@ -3,7 +3,10 @@
; Check if S4_subi_asl_ri is being generated correctly.
; CHECK-LABEL: yes_sub_asl
-; CHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1))
+; FIXME: We no longer get subi_asl here.
+; XCHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1))
+; CHECK: [[REG1:(r[0-9]+)]] = asl([[REG1]],#1)
+; CHECK: = sub(#0,[[REG1]])
; CHECK-LABEL: no_sub_asl
; CHECK: [[REG2:(r[0-9]+)]] = asl(r{{[0-9]+}},#1)
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index c3c16d977d5..be6baaf42ae 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -17,131 +17,135 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: movq %rdi, %rax
; ILP-NEXT: xorl %r8d, %r8d
; ILP-NEXT: addl %esi, %esi
-; ILP-NEXT: addb $2, %sil
-; ILP-NEXT: orb $1, %sil
-; ILP-NEXT: movl $1, %r10d
-; ILP-NEXT: xorl %r14d, %r14d
+; ILP-NEXT: leal 3(%rsi), %r9d
+; ILP-NEXT: movb $125, %r10b
+; ILP-NEXT: movl $1, %edi
+; ILP-NEXT: xorl %r11d, %r11d
+; ILP-NEXT: movl %r9d, %ecx
+; ILP-NEXT: shldq %cl, %rdi, %r11
+; ILP-NEXT: subb %sil, %r10b
+; ILP-NEXT: addb $-125, %sil
+; ILP-NEXT: xorl %ebx, %ebx
; ILP-NEXT: movl %esi, %ecx
-; ILP-NEXT: shldq %cl, %r10, %r14
+; ILP-NEXT: shldq %cl, %rdi, %rbx
; ILP-NEXT: movl $1, %edx
; ILP-NEXT: shlq %cl, %rdx
-; ILP-NEXT: leal -128(%rsi), %r9d
-; ILP-NEXT: movb $-128, %r11b
-; ILP-NEXT: xorl %ebx, %ebx
+; ILP-NEXT: movl $1, %r14d
+; ILP-NEXT: movl %r10d, %ecx
+; ILP-NEXT: shrdq %cl, %r8, %r14
; ILP-NEXT: movl %r9d, %ecx
-; ILP-NEXT: shldq %cl, %r10, %rbx
-; ILP-NEXT: testb $64, %sil
-; ILP-NEXT: cmovneq %rdx, %r14
-; ILP-NEXT: cmovneq %r8, %rdx
-; ILP-NEXT: movl $1, %edi
; ILP-NEXT: shlq %cl, %rdi
-; ILP-NEXT: subb %sil, %r11b
-; ILP-NEXT: movl %r11d, %ecx
-; ILP-NEXT: shrdq %cl, %r8, %r10
-; ILP-NEXT: testb $64, %r11b
-; ILP-NEXT: cmovneq %r8, %r10
; ILP-NEXT: testb $64, %r9b
-; ILP-NEXT: cmovneq %rdi, %rbx
+; ILP-NEXT: cmovneq %rdi, %r11
; ILP-NEXT: cmovneq %r8, %rdi
-; ILP-NEXT: testb %sil, %sil
-; ILP-NEXT: cmovsq %r8, %r14
-; ILP-NEXT: cmovsq %r8, %rdx
-; ILP-NEXT: movq %r14, 8(%rax)
-; ILP-NEXT: movq %rdx, (%rax)
+; ILP-NEXT: testb $64, %r10b
+; ILP-NEXT: cmovneq %r8, %r14
+; ILP-NEXT: testb $64, %sil
+; ILP-NEXT: cmovneq %rdx, %rbx
+; ILP-NEXT: cmovneq %r8, %rdx
+; ILP-NEXT: testb %r9b, %r9b
+; ILP-NEXT: cmovsq %r8, %r11
+; ILP-NEXT: cmovsq %r8, %rdi
+; ILP-NEXT: movq %r11, 8(%rax)
+; ILP-NEXT: movq %rdi, (%rax)
; ILP-NEXT: cmovnsq %r8, %rbx
; ILP-NEXT: cmoveq %r8, %rbx
; ILP-NEXT: movq %rbx, 24(%rax)
-; ILP-NEXT: cmovnsq %r10, %rdi
-; ILP-NEXT: cmoveq %r8, %rdi
-; ILP-NEXT: movq %rdi, 16(%rax)
+; ILP-NEXT: cmovnsq %r14, %rdx
+; ILP-NEXT: cmoveq %r8, %rdx
+; ILP-NEXT: movq %rdx, 16(%rax)
; ILP-NEXT: popq %rbx
; ILP-NEXT: popq %r14
; ILP-NEXT: retq
;
; HYBRID-LABEL: test1:
; HYBRID: # %bb.0:
+; HYBRID-NEXT: pushq %rbx
; HYBRID-NEXT: movq %rdi, %rax
; HYBRID-NEXT: addl %esi, %esi
-; HYBRID-NEXT: addb $2, %sil
-; HYBRID-NEXT: orb $1, %sil
-; HYBRID-NEXT: movb $-128, %cl
+; HYBRID-NEXT: movb $125, %cl
; HYBRID-NEXT: subb %sil, %cl
; HYBRID-NEXT: xorl %r8d, %r8d
-; HYBRID-NEXT: movl $1, %r11d
+; HYBRID-NEXT: movl $1, %edi
; HYBRID-NEXT: movl $1, %r9d
; HYBRID-NEXT: shrdq %cl, %r8, %r9
; HYBRID-NEXT: testb $64, %cl
; HYBRID-NEXT: cmovneq %r8, %r9
-; HYBRID-NEXT: xorl %r10d, %r10d
-; HYBRID-NEXT: movl %esi, %ecx
-; HYBRID-NEXT: shldq %cl, %r11, %r10
-; HYBRID-NEXT: leal -128(%rsi), %ecx
-; HYBRID-NEXT: xorl %edi, %edi
-; HYBRID-NEXT: shldq %cl, %r11, %rdi
-; HYBRID-NEXT: movl $1, %edx
-; HYBRID-NEXT: shlq %cl, %rdx
-; HYBRID-NEXT: testb $64, %cl
-; HYBRID-NEXT: cmovneq %rdx, %rdi
-; HYBRID-NEXT: cmovneq %r8, %rdx
+; HYBRID-NEXT: leal 3(%rsi), %r10d
+; HYBRID-NEXT: xorl %r11d, %r11d
+; HYBRID-NEXT: movl %r10d, %ecx
+; HYBRID-NEXT: shldq %cl, %rdi, %r11
+; HYBRID-NEXT: addb $-125, %sil
+; HYBRID-NEXT: xorl %edx, %edx
; HYBRID-NEXT: movl %esi, %ecx
-; HYBRID-NEXT: shlq %cl, %r11
+; HYBRID-NEXT: shldq %cl, %rdi, %rdx
+; HYBRID-NEXT: movl $1, %ebx
+; HYBRID-NEXT: shlq %cl, %rbx
; HYBRID-NEXT: testb $64, %sil
-; HYBRID-NEXT: cmovneq %r11, %r10
-; HYBRID-NEXT: cmovneq %r8, %r11
-; HYBRID-NEXT: testb %sil, %sil
-; HYBRID-NEXT: cmovsq %r8, %r10
-; HYBRID-NEXT: movq %r10, 8(%rax)
+; HYBRID-NEXT: cmovneq %rbx, %rdx
+; HYBRID-NEXT: cmovneq %r8, %rbx
+; HYBRID-NEXT: movl %r10d, %ecx
+; HYBRID-NEXT: shlq %cl, %rdi
+; HYBRID-NEXT: testb $64, %r10b
+; HYBRID-NEXT: cmovneq %rdi, %r11
+; HYBRID-NEXT: cmovneq %r8, %rdi
+; HYBRID-NEXT: testb %r10b, %r10b
; HYBRID-NEXT: cmovsq %r8, %r11
-; HYBRID-NEXT: movq %r11, (%rax)
-; HYBRID-NEXT: cmovnsq %r8, %rdi
-; HYBRID-NEXT: cmoveq %r8, %rdi
-; HYBRID-NEXT: movq %rdi, 24(%rax)
-; HYBRID-NEXT: cmovnsq %r9, %rdx
+; HYBRID-NEXT: movq %r11, 8(%rax)
+; HYBRID-NEXT: cmovsq %r8, %rdi
+; HYBRID-NEXT: movq %rdi, (%rax)
+; HYBRID-NEXT: cmovnsq %r8, %rdx
; HYBRID-NEXT: cmoveq %r8, %rdx
-; HYBRID-NEXT: movq %rdx, 16(%rax)
+; HYBRID-NEXT: movq %rdx, 24(%rax)
+; HYBRID-NEXT: cmovnsq %r9, %rbx
+; HYBRID-NEXT: cmoveq %r8, %rbx
+; HYBRID-NEXT: movq %rbx, 16(%rax)
+; HYBRID-NEXT: popq %rbx
; HYBRID-NEXT: retq
;
; BURR-LABEL: test1:
; BURR: # %bb.0:
+; BURR-NEXT: pushq %rbx
; BURR-NEXT: movq %rdi, %rax
; BURR-NEXT: addl %esi, %esi
-; BURR-NEXT: addb $2, %sil
-; BURR-NEXT: orb $1, %sil
-; BURR-NEXT: movb $-128, %cl
+; BURR-NEXT: movb $125, %cl
; BURR-NEXT: subb %sil, %cl
; BURR-NEXT: xorl %r8d, %r8d
-; BURR-NEXT: movl $1, %r11d
+; BURR-NEXT: movl $1, %edi
; BURR-NEXT: movl $1, %r9d
; BURR-NEXT: shrdq %cl, %r8, %r9
; BURR-NEXT: testb $64, %cl
; BURR-NEXT: cmovneq %r8, %r9
-; BURR-NEXT: xorl %r10d, %r10d
+; BURR-NEXT: leal 3(%rsi), %r10d
+; BURR-NEXT: xorl %r11d, %r11d
+; BURR-NEXT: movl %r10d, %ecx
+; BURR-NEXT: shldq %cl, %rdi, %r11
+; BURR-NEXT: addb $-125, %sil
+; BURR-NEXT: xorl %edx, %edx
; BURR-NEXT: movl %esi, %ecx
-; BURR-NEXT: shldq %cl, %r11, %r10
-; BURR-NEXT: leal -128(%rsi), %ecx
-; BURR-NEXT: xorl %edi, %edi
-; BURR-NEXT: shldq %cl, %r11, %rdi
-; BURR-NEXT: movl $1, %edx
-; BURR-NEXT: shlq %cl, %rdx
-; BURR-NEXT: testb $64, %cl
-; BURR-NEXT: cmovneq %rdx, %rdi
-; BURR-NEXT: cmovneq %r8, %rdx
-; BURR-NEXT: movl %esi, %ecx
-; BURR-NEXT: shlq %cl, %r11
+; BURR-NEXT: shldq %cl, %rdi, %rdx
+; BURR-NEXT: movl $1, %ebx
+; BURR-NEXT: shlq %cl, %rbx
; BURR-NEXT: testb $64, %sil
-; BURR-NEXT: cmovneq %r11, %r10
-; BURR-NEXT: cmovneq %r8, %r11
-; BURR-NEXT: testb %sil, %sil
-; BURR-NEXT: cmovsq %r8, %r10
-; BURR-NEXT: movq %r10, 8(%rax)
+; BURR-NEXT: cmovneq %rbx, %rdx
+; BURR-NEXT: cmovneq %r8, %rbx
+; BURR-NEXT: movl %r10d, %ecx
+; BURR-NEXT: shlq %cl, %rdi
+; BURR-NEXT: testb $64, %r10b
+; BURR-NEXT: cmovneq %rdi, %r11
+; BURR-NEXT: cmovneq %r8, %rdi
+; BURR-NEXT: testb %r10b, %r10b
; BURR-NEXT: cmovsq %r8, %r11
-; BURR-NEXT: movq %r11, (%rax)
-; BURR-NEXT: cmovnsq %r8, %rdi
-; BURR-NEXT: cmoveq %r8, %rdi
-; BURR-NEXT: movq %rdi, 24(%rax)
-; BURR-NEXT: cmovnsq %r9, %rdx
+; BURR-NEXT: movq %r11, 8(%rax)
+; BURR-NEXT: cmovsq %r8, %rdi
+; BURR-NEXT: movq %rdi, (%rax)
+; BURR-NEXT: cmovnsq %r8, %rdx
; BURR-NEXT: cmoveq %r8, %rdx
-; BURR-NEXT: movq %rdx, 16(%rax)
+; BURR-NEXT: movq %rdx, 24(%rax)
+; BURR-NEXT: cmovnsq %r9, %rbx
+; BURR-NEXT: cmoveq %r8, %rbx
+; BURR-NEXT: movq %rbx, 16(%rax)
+; BURR-NEXT: popq %rbx
; BURR-NEXT: retq
;
; SRC-LABEL: test1:
@@ -149,9 +153,8 @@ define i256 @test1(i256 %a) nounwind {
; SRC-NEXT: pushq %rbx
; SRC-NEXT: movq %rdi, %rax
; SRC-NEXT: addl %esi, %esi
-; SRC-NEXT: addb $2, %sil
-; SRC-NEXT: orb $1, %sil
-; SRC-NEXT: movb $-128, %cl
+; SRC-NEXT: leal 3(%rsi), %r9d
+; SRC-NEXT: movb $125, %cl
; SRC-NEXT: subb %sil, %cl
; SRC-NEXT: xorl %r8d, %r8d
; SRC-NEXT: movl $1, %edi
@@ -159,24 +162,24 @@ define i256 @test1(i256 %a) nounwind {
; SRC-NEXT: shrdq %cl, %r8, %r10
; SRC-NEXT: testb $64, %cl
; SRC-NEXT: cmovneq %r8, %r10
-; SRC-NEXT: leal -128(%rsi), %r9d
+; SRC-NEXT: addb $-125, %sil
; SRC-NEXT: xorl %edx, %edx
-; SRC-NEXT: movl %r9d, %ecx
+; SRC-NEXT: movl %esi, %ecx
; SRC-NEXT: shldq %cl, %rdi, %rdx
; SRC-NEXT: xorl %r11d, %r11d
-; SRC-NEXT: movl %esi, %ecx
+; SRC-NEXT: movl %r9d, %ecx
; SRC-NEXT: shldq %cl, %rdi, %r11
; SRC-NEXT: movl $1, %ebx
; SRC-NEXT: shlq %cl, %rbx
-; SRC-NEXT: testb $64, %sil
+; SRC-NEXT: testb $64, %r9b
; SRC-NEXT: cmovneq %rbx, %r11
; SRC-NEXT: cmovneq %r8, %rbx
-; SRC-NEXT: movl %r9d, %ecx
+; SRC-NEXT: movl %esi, %ecx
; SRC-NEXT: shlq %cl, %rdi
-; SRC-NEXT: testb $64, %r9b
+; SRC-NEXT: testb $64, %sil
; SRC-NEXT: cmovneq %rdi, %rdx
; SRC-NEXT: cmovneq %r8, %rdi
-; SRC-NEXT: testb %sil, %sil
+; SRC-NEXT: testb %r9b, %r9b
; SRC-NEXT: cmovnsq %r10, %rdi
; SRC-NEXT: cmoveq %r8, %rdi
; SRC-NEXT: cmovnsq %r8, %rdx
@@ -196,31 +199,29 @@ define i256 @test1(i256 %a) nounwind {
; LIN-NEXT: xorl %r9d, %r9d
; LIN-NEXT: movl $1, %r8d
; LIN-NEXT: addl %esi, %esi
-; LIN-NEXT: addb $2, %sil
-; LIN-NEXT: orb $1, %sil
-; LIN-NEXT: movl $1, %edx
-; LIN-NEXT: movl %esi, %ecx
-; LIN-NEXT: shlq %cl, %rdx
-; LIN-NEXT: testb $64, %sil
-; LIN-NEXT: movq %rdx, %rcx
-; LIN-NEXT: cmovneq %r9, %rcx
-; LIN-NEXT: testb %sil, %sil
-; LIN-NEXT: cmovsq %r9, %rcx
-; LIN-NEXT: movq %rcx, (%rdi)
-; LIN-NEXT: xorl %edi, %edi
-; LIN-NEXT: movl %esi, %ecx
-; LIN-NEXT: shldq %cl, %r8, %rdi
-; LIN-NEXT: cmovneq %rdx, %rdi
-; LIN-NEXT: cmovsq %r9, %rdi
-; LIN-NEXT: movq %rdi, 8(%rax)
-; LIN-NEXT: leal -128(%rsi), %r10d
+; LIN-NEXT: leal 3(%rsi), %ecx
+; LIN-NEXT: movl $1, %edi
+; LIN-NEXT: shlq %cl, %rdi
+; LIN-NEXT: testb $64, %cl
+; LIN-NEXT: movq %rdi, %rdx
+; LIN-NEXT: cmovneq %r9, %rdx
+; LIN-NEXT: testb %cl, %cl
+; LIN-NEXT: cmovsq %r9, %rdx
+; LIN-NEXT: movq %rdx, (%rax)
+; LIN-NEXT: xorl %edx, %edx
+; LIN-NEXT: # kill: def $cl killed $cl killed $ecx
+; LIN-NEXT: shldq %cl, %r8, %rdx
+; LIN-NEXT: cmovneq %rdi, %rdx
+; LIN-NEXT: cmovsq %r9, %rdx
+; LIN-NEXT: movq %rdx, 8(%rax)
+; LIN-NEXT: leal -125(%rsi), %r10d
; LIN-NEXT: movl $1, %edx
; LIN-NEXT: movl %r10d, %ecx
; LIN-NEXT: shlq %cl, %rdx
; LIN-NEXT: testb $64, %r10b
; LIN-NEXT: movq %rdx, %rdi
; LIN-NEXT: cmovneq %r9, %rdi
-; LIN-NEXT: movb $-128, %cl
+; LIN-NEXT: movb $125, %cl
; LIN-NEXT: subb %sil, %cl
; LIN-NEXT: movl $1, %esi
; LIN-NEXT: shrdq %cl, %r9, %rsi
diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll
index 7c2ce7a0802..94d68ccc84b 100644
--- a/llvm/test/CodeGen/X86/signbit-shift.ll
+++ b/llvm/test/CodeGen/X86/signbit-shift.ll
@@ -33,8 +33,9 @@ define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
+; CHECK-NEXT: psubd %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = zext <4 x i1> %c to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/split-store.ll b/llvm/test/CodeGen/X86/split-store.ll
index 575f46c04c0..a5c34c41526 100644
--- a/llvm/test/CodeGen/X86/split-store.ll
+++ b/llvm/test/CodeGen/X86/split-store.ll
@@ -217,10 +217,9 @@ define void @int1_int1_pair(i1 signext %tmp1, i1 signext %tmp2, i2* %ref.tmp) {
; CHECK-LABEL: int1_int1_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: addb %sil, %sil
-; CHECK-NEXT: andb $1, %dil
-; CHECK-NEXT: orb %sil, %dil
-; CHECK-NEXT: andb $3, %dil
-; CHECK-NEXT: movb %dil, (%rdx)
+; CHECK-NEXT: subb %dil, %sil
+; CHECK-NEXT: andb $3, %sil
+; CHECK-NEXT: movb %sil, (%rdx)
; CHECK-NEXT: retq
%t1 = zext i1 %tmp2 to i2
%t2 = shl nuw i2 %t1, 1
OpenPOWER on IntegriCloud