diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 56 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/calling-conventions.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 54 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/subi-asl.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/scheduler-backtracking.ll | 221 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/signbit-shift.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/split-store.ll | 7 | 
9 files changed, 203 insertions, 176 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4448b6b06d3..29926a49cc1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -341,7 +341,8 @@ namespace {      SDValue visitTokenFactor(SDNode *N);      SDValue visitMERGE_VALUES(SDNode *N);      SDValue visitADD(SDNode *N); -    SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); +    SDValue visitADDLike(SDNode *N); +    SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);      SDValue visitSUB(SDNode *N);      SDValue visitADDSAT(SDNode *N);      SDValue visitSUBSAT(SDNode *N); @@ -2111,7 +2112,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {    return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));  } -SDValue DAGCombiner::visitADD(SDNode *N) { +/// Try to fold a node that behaves like an ADD (note that N isn't necessarily +/// an ISD::ADD here, it could for example be an ISD::OR if we know that there +/// are no common bits set in the operands). +SDValue DAGCombiner::visitADDLike(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType(); @@ -2264,20 +2268,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {                           N0.getOperand(1));    } -  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) -    return V; - -  if (SDValue V = foldAddSubOfSignBit(N, DAG)) -    return V; -    if (SimplifyDemandedBits(SDValue(N, 0)))      return SDValue(N, 0); -  // fold (a+b) -> (a|b) iff a and b share no bits. -  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && -      DAG.haveNoCommonBitsSet(N0, N1)) -    return DAG.getNode(ISD::OR, DL, VT, N0, N1); -    if (isOneOrOneSplat(N1)) {      // fold (add (xor a, -1), 1) -> (sub 0, a)      if (isBitwiseNot(N0)) @@ -2303,15 +2296,38 @@ SDValue DAGCombiner::visitADD(SDNode *N) {      }    } -  if (SDValue Combined = visitADDLike(N0, N1, N)) +  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))      return Combined; -  if (SDValue Combined = visitADDLike(N1, N0, N)) +  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))      return Combined;    return SDValue();  } +SDValue DAGCombiner::visitADD(SDNode *N) { +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  EVT VT = N0.getValueType(); +  SDLoc DL(N); + +  if (SDValue Combined = visitADDLike(N)) +    return Combined; + +  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) +    return V; + +  if (SDValue V = foldAddSubOfSignBit(N, DAG)) +    return V; + +  // fold (a+b) -> (a|b) iff a and b share no bits. +  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && +      DAG.haveNoCommonBitsSet(N0, N1)) +    return DAG.getNode(ISD::OR, DL, VT, N0, N1); + +  return SDValue(); +} +  SDValue DAGCombiner::visitADDSAT(SDNode *N) {    unsigned Opcode = N->getOpcode();    SDValue N0 = N->getOperand(0); @@ -2414,7 +2430,9 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,    return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));  } -SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { +/// Helper for doing combines based on N0 and N1 being added to each other. +SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, +                                          SDNode *LocReference) {    EVT VT = N0.getValueType();    SDLoc DL(LocReference); @@ -5546,6 +5564,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {    if (SimplifyDemandedBits(SDValue(N, 0)))      return SDValue(N, 0); +  // If OR can be rewritten into ADD, try combines based on ADD. +  if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && +      DAG.haveNoCommonBitsSet(N0, N1)) +    if (SDValue Combined = visitADDLike(N)) +      return Combined; +    return SDValue();  } diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll index 748222529d7..4c148d938fe 100644 --- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll @@ -171,9 +171,9 @@ define amdgpu_ps <2 x half> @ps_mesa_inreg_v2f16(<2 x half> inreg %arg0) {  ; SI: v_lshlrev_b32_e32 v1, 16, v1  ; SI: v_add_i32_e32 v0, vcc, 1, v0 -; SI: v_add_i32_e32 v1, vcc, 0x10000, v1  ; SI: v_and_b32  ; SI: v_or_b32 +; SI: v_add_i32_e32 v0, vcc, 0x10000, v0  define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {    %add = add <2 x i16> %arg0, <i16 1, i16 1>    store <2 x i16> %add, <2 x i16> addrspace(1)* undef @@ -183,16 +183,16 @@ define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {  ; GCN-LABEL: {{^}}ps_mesa_inreg_v2i16:  ; VI: s_and_b32 s1, s0, 0xffff0000  ; VI: s_add_i32 s0, s0, 1 -; VI: s_add_i32 s1, s1, 0x10000  ; VI: s_and_b32 s0, s0, 0xffff  ; VI: s_or_b32 s0, s0, s1 +; VI: s_add_i32 s0, s0, 0x10000  ; VI: v_mov_b32_e32 v0, s0  ; SI: s_lshl_b32 s1, s1, 16  ; SI: s_add_i32 s0, s0, 1 -; SI: s_add_i32 s1, s1, 0x10000  ; SI: s_and_b32 s0, s0, 0xffff  ; SI: s_or_b32 s0, s0, s1 +; SI: s_add_i32 s0, s0, 0x10000  define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {    %add = add <2 x i16> %arg0, <i16 1, i16 1>    store <2 x i16> %add, <2 x i16> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index b2b5ae01557..53277027f58 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -276,34 +276,33 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n  ; SI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64  ; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9  ; SI-NEXT:    s_mov_b32 s2, -1 -; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb -; SI-NEXT:    s_movk_i32 s12, 0x900 +; SI-NEXT:    s_movk_i32 s12, 0xff  ; SI-NEXT:    s_mov_b32 s10, s2  ; SI-NEXT:    s_mov_b32 s11, s3 -; SI-NEXT:    s_movk_i32 s13, 0xff +; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb  ; SI-NEXT:    s_waitcnt vmcnt(0)  ; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v1 -; SI-NEXT:    v_lshrrev_b32_e32 v5, 24, v1 -; SI-NEXT:    v_and_b32_e32 v6, 0xff00, v1  ; SI-NEXT:    v_add_i32_e32 v7, vcc, 9, v1 +; SI-NEXT:    v_and_b32_e32 v6, 0xff00, v1 +; SI-NEXT:    v_lshrrev_b32_e32 v5, 24, v1  ; SI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v1  ; SI-NEXT:    v_cvt_f32_ubyte2_e32 v2, v1  ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v1  ; SI-NEXT:    v_cvt_f32_ubyte1_e32 v1, v6 -; SI-NEXT:    v_lshlrev_b32_e32 v5, 8, v5  ; SI-NEXT:    v_add_i32_e32 v4, vcc, 9, v4 +; SI-NEXT:    v_and_b32_e32 v7, s12, v7  ; SI-NEXT:    s_waitcnt lgkmcnt(0)  ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 -; SI-NEXT:    v_add_i32_e32 v6, vcc, s12, v6 -; SI-NEXT:    v_and_b32_e32 v7, s13, v7  ; SI-NEXT:    s_waitcnt expcnt(0) -; SI-NEXT:    v_add_i32_e32 v1, vcc, s12, v5 -; SI-NEXT:    v_and_b32_e32 v2, s13, v4 -; SI-NEXT:    v_or_b32_e32 v0, v7, v6 -; SI-NEXT:    v_or_b32_e32 v1, v2, v1 -; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0 -; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT:    v_or_b32_e32 v0, v0, v1 +; SI-NEXT:    v_or_b32_e32 v1, v7, v6 +; SI-NEXT:    v_lshlrev_b32_e32 v5, 8, v5 +; SI-NEXT:    v_and_b32_e32 v0, s12, v4 +; SI-NEXT:    v_or_b32_e32 v0, v0, v5 +; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x900, v1 +; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1 +; SI-NEXT:    v_or_b32_e32 v0, v1, v0 +; SI-NEXT:    v_add_i32_e32 v0, vcc, 0x9000000, v0  ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0  ; SI-NEXT:    s_endpgm  ; @@ -313,7 +312,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n  ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0  ; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24  ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c -; VI-NEXT:    s_movk_i32 s8, 0x900 +; VI-NEXT:    v_mov_b32_e32 v4, 9  ; VI-NEXT:    s_waitcnt lgkmcnt(0)  ; VI-NEXT:    v_mov_b32_e32 v1, s3  ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0 @@ -323,23 +322,24 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n  ; VI-NEXT:    s_mov_b32 s2, -1  ; VI-NEXT:    s_mov_b32 s6, s2  ; VI-NEXT:    s_mov_b32 s7, s3 -; VI-NEXT:    v_mov_b32_e32 v4, 9 +; VI-NEXT:    s_movk_i32 s8, 0x900 +; VI-NEXT:    v_mov_b32_e32 v6, s8  ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT:    v_lshrrev_b32_e32 v6, 24, v5 +; VI-NEXT:    v_lshrrev_b32_e32 v7, 24, v5  ; VI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v5  ; VI-NEXT:    v_cvt_f32_ubyte2_e32 v2, v5  ; VI-NEXT:    v_cvt_f32_ubyte1_e32 v1, v5  ; VI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v5  ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; VI-NEXT:    v_and_b32_e32 v7, 0xffffff00, v5 -; VI-NEXT:    v_lshlrev_b16_e32 v1, 8, v6 -; VI-NEXT:    v_add_u16_e32 v8, 9, v5 -; VI-NEXT:    v_add_u16_e32 v0, s8, v7 -; VI-NEXT:    v_add_u16_e32 v1, s8, v1 -; VI-NEXT:    v_add_u16_sdwa v2, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT:    v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT:    v_and_b32_e32 v8, 0xffffff00, v5 +; VI-NEXT:    v_add_u16_e32 v9, 9, v5 +; VI-NEXT:    v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT:    v_lshlrev_b16_e32 v1, 8, v7 +; VI-NEXT:    v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; VI-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; VI-NEXT:    v_add_u16_e32 v0, s8, v0 +; VI-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT:    v_or_b32_e32 v0, v0, v1  ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0  ; VI-NEXT:    s_endpgm    %tid.x = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll index 598048dcff9..61b03750e7e 100644 --- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -14,9 +14,9 @@  ; CIVI: s_max_i32  ; CIVI: s_max_i32  ; CIVI: s_add_i32 -; CIVI: s_add_i32 -; CIVI: s_and_b32 -; CIVI: s_or_b32 +; CIVI-DAG: s_add_i32 +; CIVI-DAG: s_and_b32 +; CIVI-DAG: s_or_b32  define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {    %neg = sub <2 x i16> zeroinitializer, %val    %cond = icmp sgt <2 x i16> %val, %neg @@ -45,14 +45,14 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %  ; CI: buffer_load_dword v  ; CI: v_lshrrev_b32_e32 -; CI: v_sub_i32_e32 -; CI: v_bfe_i32 -; CI: v_bfe_i32 -; CI: v_max_i32 -; CI: v_max_i32 -; CI: v_add_i32 -; CI: v_add_i32 -; CI: v_or_b32 +; CI-DAG: v_sub_i32_e32 +; CI-DAG: v_bfe_i32 +; CI-DAG: v_bfe_i32 +; CI-DAG: v_max_i32 +; CI-DAG: v_max_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_or_b32  define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {    %tid = call i32 @llvm.amdgcn.workitem.id.x()    %gep.in = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %src, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll index c5c4476d20f..a3216422f18 100644 --- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -213,12 +213,11 @@ define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg)  ; SI-NEXT:    s_load_dword s0, s[0:1], 0x0  ; SI-NEXT:    s_waitcnt lgkmcnt(0)  ; SI-NEXT:    s_and_b32 s1, s0, 0xff00 -; SI-NEXT:    s_and_b32 s0, s0, 0xffff  ; SI-NEXT:    s_add_i32 s0, s0, 12  ; SI-NEXT:    s_or_b32 s0, s0, 4 -; SI-NEXT:    s_addk_i32 s1, 0x2c00  ; SI-NEXT:    s_and_b32 s0, s0, 0xff  ; SI-NEXT:    s_or_b32 s0, s0, s1 +; SI-NEXT:    s_addk_i32 s0, 0x2c00  ; SI-NEXT:    s_or_b32 s0, s0, 0x300  ; SI-NEXT:    v_mov_b32_e32 v0, s0  ; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0 diff --git a/llvm/test/CodeGen/Hexagon/subi-asl.ll b/llvm/test/CodeGen/Hexagon/subi-asl.ll index d7610ceb62a..0fd88384b89 100644 --- a/llvm/test/CodeGen/Hexagon/subi-asl.ll +++ b/llvm/test/CodeGen/Hexagon/subi-asl.ll @@ -3,7 +3,10 @@  ; Check if S4_subi_asl_ri is being generated correctly.  ; CHECK-LABEL: yes_sub_asl -; CHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1)) +; FIXME: We no longer get subi_asl here.  +; XCHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1)) +; CHECK: [[REG1:(r[0-9]+)]] = asl([[REG1]],#1) +; CHECK:  = sub(#0,[[REG1]])  ; CHECK-LABEL: no_sub_asl  ; CHECK: [[REG2:(r[0-9]+)]] = asl(r{{[0-9]+}},#1) diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll index c3c16d977d5..be6baaf42ae 100644 --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -17,131 +17,135 @@ define i256 @test1(i256 %a) nounwind {  ; ILP-NEXT:    movq %rdi, %rax  ; ILP-NEXT:    xorl %r8d, %r8d  ; ILP-NEXT:    addl %esi, %esi -; ILP-NEXT:    addb $2, %sil -; ILP-NEXT:    orb $1, %sil -; ILP-NEXT:    movl $1, %r10d -; ILP-NEXT:    xorl %r14d, %r14d +; ILP-NEXT:    leal 3(%rsi), %r9d +; ILP-NEXT:    movb $125, %r10b +; ILP-NEXT:    movl $1, %edi +; ILP-NEXT:    xorl %r11d, %r11d +; ILP-NEXT:    movl %r9d, %ecx +; ILP-NEXT:    shldq %cl, %rdi, %r11 +; ILP-NEXT:    subb %sil, %r10b +; ILP-NEXT:    addb $-125, %sil +; ILP-NEXT:    xorl %ebx, %ebx  ; ILP-NEXT:    movl %esi, %ecx -; ILP-NEXT:    shldq %cl, %r10, %r14 +; ILP-NEXT:    shldq %cl, %rdi, %rbx  ; ILP-NEXT:    movl $1, %edx  ; ILP-NEXT:    shlq %cl, %rdx -; ILP-NEXT:    leal -128(%rsi), %r9d -; ILP-NEXT:    movb $-128, %r11b -; ILP-NEXT:    xorl %ebx, %ebx +; ILP-NEXT:    movl $1, %r14d +; ILP-NEXT:    movl %r10d, %ecx +; ILP-NEXT:    shrdq %cl, %r8, %r14  ; ILP-NEXT:    movl %r9d, %ecx -; ILP-NEXT:    shldq %cl, %r10, %rbx -; ILP-NEXT:    testb $64, %sil -; ILP-NEXT:    cmovneq %rdx, %r14 -; ILP-NEXT:    cmovneq %r8, %rdx -; ILP-NEXT:    movl $1, %edi  ; ILP-NEXT:    shlq %cl, %rdi -; ILP-NEXT:    subb %sil, %r11b -; ILP-NEXT:    movl %r11d, %ecx -; ILP-NEXT:    shrdq %cl, %r8, %r10 -; ILP-NEXT:    testb $64, %r11b -; ILP-NEXT:    cmovneq %r8, %r10  ; ILP-NEXT:    testb $64, %r9b -; ILP-NEXT:    cmovneq %rdi, %rbx +; ILP-NEXT:    cmovneq %rdi, %r11  ; ILP-NEXT:    cmovneq %r8, %rdi -; ILP-NEXT:    testb %sil, %sil -; ILP-NEXT:    cmovsq %r8, %r14 -; ILP-NEXT:    cmovsq %r8, %rdx -; ILP-NEXT:    movq %r14, 8(%rax) -; ILP-NEXT:    movq %rdx, (%rax) +; ILP-NEXT:    testb $64, %r10b +; ILP-NEXT:    cmovneq %r8, %r14 +; ILP-NEXT:    testb $64, %sil +; ILP-NEXT:    cmovneq %rdx, %rbx +; ILP-NEXT:    cmovneq %r8, %rdx +; ILP-NEXT:    testb %r9b, %r9b +; ILP-NEXT:    cmovsq %r8, %r11 +; ILP-NEXT:    cmovsq %r8, %rdi +; ILP-NEXT:    movq %r11, 8(%rax) +; ILP-NEXT:    movq %rdi, (%rax)  ; ILP-NEXT:    cmovnsq %r8, %rbx  ; ILP-NEXT:    cmoveq %r8, %rbx  ; ILP-NEXT:    movq %rbx, 24(%rax) -; ILP-NEXT:    cmovnsq %r10, %rdi -; ILP-NEXT:    cmoveq %r8, %rdi -; ILP-NEXT:    movq %rdi, 16(%rax) +; ILP-NEXT:    cmovnsq %r14, %rdx +; ILP-NEXT:    cmoveq %r8, %rdx +; ILP-NEXT:    movq %rdx, 16(%rax)  ; ILP-NEXT:    popq %rbx  ; ILP-NEXT:    popq %r14  ; ILP-NEXT:    retq  ;  ; HYBRID-LABEL: test1:  ; HYBRID:       # %bb.0: +; HYBRID-NEXT:    pushq %rbx  ; HYBRID-NEXT:    movq %rdi, %rax  ; HYBRID-NEXT:    addl %esi, %esi -; HYBRID-NEXT:    addb $2, %sil -; HYBRID-NEXT:    orb $1, %sil -; HYBRID-NEXT:    movb $-128, %cl +; HYBRID-NEXT:    movb $125, %cl  ; HYBRID-NEXT:    subb %sil, %cl  ; HYBRID-NEXT:    xorl %r8d, %r8d -; HYBRID-NEXT:    movl $1, %r11d +; HYBRID-NEXT:    movl $1, %edi  ; HYBRID-NEXT:    movl $1, %r9d  ; HYBRID-NEXT:    shrdq %cl, %r8, %r9  ; HYBRID-NEXT:    testb $64, %cl  ; HYBRID-NEXT:    cmovneq %r8, %r9 -; HYBRID-NEXT:    xorl %r10d, %r10d -; HYBRID-NEXT:    movl %esi, %ecx -; HYBRID-NEXT:    shldq %cl, %r11, %r10 -; HYBRID-NEXT:    leal -128(%rsi), %ecx -; HYBRID-NEXT:    xorl %edi, %edi -; HYBRID-NEXT:    shldq %cl, %r11, %rdi -; HYBRID-NEXT:    movl $1, %edx -; HYBRID-NEXT:    shlq %cl, %rdx -; HYBRID-NEXT:    testb $64, %cl -; HYBRID-NEXT:    cmovneq %rdx, %rdi -; HYBRID-NEXT:    cmovneq %r8, %rdx +; HYBRID-NEXT:    leal 3(%rsi), %r10d +; HYBRID-NEXT:    xorl %r11d, %r11d +; HYBRID-NEXT:    movl %r10d, %ecx +; HYBRID-NEXT:    shldq %cl, %rdi, %r11 +; HYBRID-NEXT:    addb $-125, %sil +; HYBRID-NEXT:    xorl %edx, %edx  ; HYBRID-NEXT:    movl %esi, %ecx -; HYBRID-NEXT:    shlq %cl, %r11 +; HYBRID-NEXT:    shldq %cl, %rdi, %rdx +; HYBRID-NEXT:    movl $1, %ebx +; HYBRID-NEXT:    shlq %cl, %rbx  ; HYBRID-NEXT:    testb $64, %sil -; HYBRID-NEXT:    cmovneq %r11, %r10 -; HYBRID-NEXT:    cmovneq %r8, %r11 -; HYBRID-NEXT:    testb %sil, %sil -; HYBRID-NEXT:    cmovsq %r8, %r10 -; HYBRID-NEXT:    movq %r10, 8(%rax) +; HYBRID-NEXT:    cmovneq %rbx, %rdx +; HYBRID-NEXT:    cmovneq %r8, %rbx +; HYBRID-NEXT:    movl %r10d, %ecx +; HYBRID-NEXT:    shlq %cl, %rdi +; HYBRID-NEXT:    testb $64, %r10b +; HYBRID-NEXT:    cmovneq %rdi, %r11 +; HYBRID-NEXT:    cmovneq %r8, %rdi +; HYBRID-NEXT:    testb %r10b, %r10b  ; HYBRID-NEXT:    cmovsq %r8, %r11 -; HYBRID-NEXT:    movq %r11, (%rax) -; HYBRID-NEXT:    cmovnsq %r8, %rdi -; HYBRID-NEXT:    cmoveq %r8, %rdi -; HYBRID-NEXT:    movq %rdi, 24(%rax) -; HYBRID-NEXT:    cmovnsq %r9, %rdx +; HYBRID-NEXT:    movq %r11, 8(%rax) +; HYBRID-NEXT:    cmovsq %r8, %rdi +; HYBRID-NEXT:    movq %rdi, (%rax) +; HYBRID-NEXT:    cmovnsq %r8, %rdx  ; HYBRID-NEXT:    cmoveq %r8, %rdx -; HYBRID-NEXT:    movq %rdx, 16(%rax) +; HYBRID-NEXT:    movq %rdx, 24(%rax) +; HYBRID-NEXT:    cmovnsq %r9, %rbx +; HYBRID-NEXT:    cmoveq %r8, %rbx +; HYBRID-NEXT:    movq %rbx, 16(%rax) +; HYBRID-NEXT:    popq %rbx  ; HYBRID-NEXT:    retq  ;  ; BURR-LABEL: test1:  ; BURR:       # %bb.0: +; BURR-NEXT:    pushq %rbx  ; BURR-NEXT:    movq %rdi, %rax  ; BURR-NEXT:    addl %esi, %esi -; BURR-NEXT:    addb $2, %sil -; BURR-NEXT:    orb $1, %sil -; BURR-NEXT:    movb $-128, %cl +; BURR-NEXT:    movb $125, %cl  ; BURR-NEXT:    subb %sil, %cl  ; BURR-NEXT:    xorl %r8d, %r8d -; BURR-NEXT:    movl $1, %r11d +; BURR-NEXT:    movl $1, %edi  ; BURR-NEXT:    movl $1, %r9d  ; BURR-NEXT:    shrdq %cl, %r8, %r9  ; BURR-NEXT:    testb $64, %cl  ; BURR-NEXT:    cmovneq %r8, %r9 -; BURR-NEXT:    xorl %r10d, %r10d +; BURR-NEXT:    leal 3(%rsi), %r10d +; BURR-NEXT:    xorl %r11d, %r11d +; BURR-NEXT:    movl %r10d, %ecx +; BURR-NEXT:    shldq %cl, %rdi, %r11 +; BURR-NEXT:    addb $-125, %sil +; BURR-NEXT:    xorl %edx, %edx  ; BURR-NEXT:    movl %esi, %ecx -; BURR-NEXT:    shldq %cl, %r11, %r10 -; BURR-NEXT:    leal -128(%rsi), %ecx -; BURR-NEXT:    xorl %edi, %edi -; BURR-NEXT:    shldq %cl, %r11, %rdi -; BURR-NEXT:    movl $1, %edx -; BURR-NEXT:    shlq %cl, %rdx -; BURR-NEXT:    testb $64, %cl -; BURR-NEXT:    cmovneq %rdx, %rdi -; BURR-NEXT:    cmovneq %r8, %rdx -; BURR-NEXT:    movl %esi, %ecx -; BURR-NEXT:    shlq %cl, %r11 +; BURR-NEXT:    shldq %cl, %rdi, %rdx +; BURR-NEXT:    movl $1, %ebx +; BURR-NEXT:    shlq %cl, %rbx  ; BURR-NEXT:    testb $64, %sil -; BURR-NEXT:    cmovneq %r11, %r10 -; BURR-NEXT:    cmovneq %r8, %r11 -; BURR-NEXT:    testb %sil, %sil -; BURR-NEXT:    cmovsq %r8, %r10 -; BURR-NEXT:    movq %r10, 8(%rax) +; BURR-NEXT:    cmovneq %rbx, %rdx +; BURR-NEXT:    cmovneq %r8, %rbx +; BURR-NEXT:    movl %r10d, %ecx +; BURR-NEXT:    shlq %cl, %rdi +; BURR-NEXT:    testb $64, %r10b +; BURR-NEXT:    cmovneq %rdi, %r11 +; BURR-NEXT:    cmovneq %r8, %rdi +; BURR-NEXT:    testb %r10b, %r10b  ; BURR-NEXT:    cmovsq %r8, %r11 -; BURR-NEXT:    movq %r11, (%rax) -; BURR-NEXT:    cmovnsq %r8, %rdi -; BURR-NEXT:    cmoveq %r8, %rdi -; BURR-NEXT:    movq %rdi, 24(%rax) -; BURR-NEXT:    cmovnsq %r9, %rdx +; BURR-NEXT:    movq %r11, 8(%rax) +; BURR-NEXT:    cmovsq %r8, %rdi +; BURR-NEXT:    movq %rdi, (%rax) +; BURR-NEXT:    cmovnsq %r8, %rdx  ; BURR-NEXT:    cmoveq %r8, %rdx -; BURR-NEXT:    movq %rdx, 16(%rax) +; BURR-NEXT:    movq %rdx, 24(%rax) +; BURR-NEXT:    cmovnsq %r9, %rbx +; BURR-NEXT:    cmoveq %r8, %rbx +; BURR-NEXT:    movq %rbx, 16(%rax) +; BURR-NEXT:    popq %rbx  ; BURR-NEXT:    retq  ;  ; SRC-LABEL: test1: @@ -149,9 +153,8 @@ define i256 @test1(i256 %a) nounwind {  ; SRC-NEXT:    pushq %rbx  ; SRC-NEXT:    movq %rdi, %rax  ; SRC-NEXT:    addl %esi, %esi -; SRC-NEXT:    addb $2, %sil -; SRC-NEXT:    orb $1, %sil -; SRC-NEXT:    movb $-128, %cl +; SRC-NEXT:    leal 3(%rsi), %r9d +; SRC-NEXT:    movb $125, %cl  ; SRC-NEXT:    subb %sil, %cl  ; SRC-NEXT:    xorl %r8d, %r8d  ; SRC-NEXT:    movl $1, %edi @@ -159,24 +162,24 @@ define i256 @test1(i256 %a) nounwind {  ; SRC-NEXT:    shrdq %cl, %r8, %r10  ; SRC-NEXT:    testb $64, %cl  ; SRC-NEXT:    cmovneq %r8, %r10 -; SRC-NEXT:    leal -128(%rsi), %r9d +; SRC-NEXT:    addb $-125, %sil  ; SRC-NEXT:    xorl %edx, %edx -; SRC-NEXT:    movl %r9d, %ecx +; SRC-NEXT:    movl %esi, %ecx  ; SRC-NEXT:    shldq %cl, %rdi, %rdx  ; SRC-NEXT:    xorl %r11d, %r11d -; SRC-NEXT:    movl %esi, %ecx +; SRC-NEXT:    movl %r9d, %ecx  ; SRC-NEXT:    shldq %cl, %rdi, %r11  ; SRC-NEXT:    movl $1, %ebx  ; SRC-NEXT:    shlq %cl, %rbx -; SRC-NEXT:    testb $64, %sil +; SRC-NEXT:    testb $64, %r9b  ; SRC-NEXT:    cmovneq %rbx, %r11  ; SRC-NEXT:    cmovneq %r8, %rbx -; SRC-NEXT:    movl %r9d, %ecx +; SRC-NEXT:    movl %esi, %ecx  ; SRC-NEXT:    shlq %cl, %rdi -; SRC-NEXT:    testb $64, %r9b +; SRC-NEXT:    testb $64, %sil  ; SRC-NEXT:    cmovneq %rdi, %rdx  ; SRC-NEXT:    cmovneq %r8, %rdi -; SRC-NEXT:    testb %sil, %sil +; SRC-NEXT:    testb %r9b, %r9b  ; SRC-NEXT:    cmovnsq %r10, %rdi  ; SRC-NEXT:    cmoveq %r8, %rdi  ; SRC-NEXT:    cmovnsq %r8, %rdx @@ -196,31 +199,29 @@ define i256 @test1(i256 %a) nounwind {  ; LIN-NEXT:    xorl %r9d, %r9d  ; LIN-NEXT:    movl $1, %r8d  ; LIN-NEXT:    addl %esi, %esi -; LIN-NEXT:    addb $2, %sil -; LIN-NEXT:    orb $1, %sil -; LIN-NEXT:    movl $1, %edx -; LIN-NEXT:    movl %esi, %ecx -; LIN-NEXT:    shlq %cl, %rdx -; LIN-NEXT:    testb $64, %sil -; LIN-NEXT:    movq %rdx, %rcx -; LIN-NEXT:    cmovneq %r9, %rcx -; LIN-NEXT:    testb %sil, %sil -; LIN-NEXT:    cmovsq %r9, %rcx -; LIN-NEXT:    movq %rcx, (%rdi) -; LIN-NEXT:    xorl %edi, %edi -; LIN-NEXT:    movl %esi, %ecx -; LIN-NEXT:    shldq %cl, %r8, %rdi -; LIN-NEXT:    cmovneq %rdx, %rdi -; LIN-NEXT:    cmovsq %r9, %rdi -; LIN-NEXT:    movq %rdi, 8(%rax) -; LIN-NEXT:    leal -128(%rsi), %r10d +; LIN-NEXT:    leal 3(%rsi), %ecx +; LIN-NEXT:    movl $1, %edi +; LIN-NEXT:    shlq %cl, %rdi +; LIN-NEXT:    testb $64, %cl +; LIN-NEXT:    movq %rdi, %rdx +; LIN-NEXT:    cmovneq %r9, %rdx +; LIN-NEXT:    testb %cl, %cl +; LIN-NEXT:    cmovsq %r9, %rdx +; LIN-NEXT:    movq %rdx, (%rax) +; LIN-NEXT:    xorl %edx, %edx +; LIN-NEXT:    # kill: def $cl killed $cl killed $ecx +; LIN-NEXT:    shldq %cl, %r8, %rdx +; LIN-NEXT:    cmovneq %rdi, %rdx +; LIN-NEXT:    cmovsq %r9, %rdx +; LIN-NEXT:    movq %rdx, 8(%rax) +; LIN-NEXT:    leal -125(%rsi), %r10d  ; LIN-NEXT:    movl $1, %edx  ; LIN-NEXT:    movl %r10d, %ecx  ; LIN-NEXT:    shlq %cl, %rdx  ; LIN-NEXT:    testb $64, %r10b  ; LIN-NEXT:    movq %rdx, %rdi  ; LIN-NEXT:    cmovneq %r9, %rdi -; LIN-NEXT:    movb $-128, %cl +; LIN-NEXT:    movb $125, %cl  ; LIN-NEXT:    subb %sil, %cl  ; LIN-NEXT:    movl $1, %esi  ; LIN-NEXT:    shrdq %cl, %r9, %rsi diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll index 7c2ce7a0802..94d68ccc84b 100644 --- a/llvm/test/CodeGen/X86/signbit-shift.ll +++ b/llvm/test/CodeGen/X86/signbit-shift.ll @@ -33,8 +33,9 @@ define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {  ; CHECK:       # %bb.0:  ; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1  ; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT:    psrld $31, %xmm0 -; CHECK-NEXT:    por {{.*}}(%rip), %xmm0 +; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [42,42,42,42] +; CHECK-NEXT:    psubd %xmm0, %xmm1 +; CHECK-NEXT:    movdqa %xmm1, %xmm0  ; CHECK-NEXT:    retq    %c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>    %e = zext <4 x i1> %c to <4 x i32> diff --git a/llvm/test/CodeGen/X86/split-store.ll b/llvm/test/CodeGen/X86/split-store.ll index 575f46c04c0..a5c34c41526 100644 --- a/llvm/test/CodeGen/X86/split-store.ll +++ b/llvm/test/CodeGen/X86/split-store.ll @@ -217,10 +217,9 @@ define void @int1_int1_pair(i1 signext %tmp1, i1 signext %tmp2, i2* %ref.tmp) {  ; CHECK-LABEL: int1_int1_pair:  ; CHECK:       # %bb.0:  ; CHECK-NEXT:    addb %sil, %sil -; CHECK-NEXT:    andb $1, %dil -; CHECK-NEXT:    orb %sil, %dil -; CHECK-NEXT:    andb $3, %dil -; CHECK-NEXT:    movb %dil, (%rdx) +; CHECK-NEXT:    subb %dil, %sil +; CHECK-NEXT:    andb $3, %sil +; CHECK-NEXT:    movb %sil, (%rdx)  ; CHECK-NEXT:    retq    %t1 = zext i1 %tmp2 to i2    %t2 = shl nuw i2 %t1, 1  | 

