summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp26
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll46
-rw-r--r--llvm/test/CodeGen/ARM/aapcs-hfa-code.ll6
-rw-r--r--llvm/test/CodeGen/Mips/cmov.ll27
-rw-r--r--llvm/test/CodeGen/R600/add_i64.ll4
-rw-r--r--llvm/test/CodeGen/R600/or.ll8
-rw-r--r--llvm/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll6
-rw-r--r--llvm/test/CodeGen/X86/i8-umulo.ll2
-rw-r--r--llvm/test/CodeGen/X86/jump_sign.ll2
-rw-r--r--llvm/test/CodeGen/X86/lower-bitcast.ll4
-rw-r--r--llvm/test/CodeGen/X86/pr15267.ll33
-rw-r--r--llvm/test/CodeGen/X86/store-narrow.ll10
-rw-r--r--llvm/test/CodeGen/X86/trunc-ext-ld-st.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv.ll14
-rw-r--r--llvm/test/CodeGen/X86/widen_cast-1.ll4
-rw-r--r--llvm/test/CodeGen/X86/widen_conv-1.ll2
-rw-r--r--llvm/test/CodeGen/X86/widen_load-2.ll7
-rw-r--r--llvm/test/CodeGen/X86/x86-64-tls-1.ll9
-rw-r--r--llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll16
19 files changed, 95 insertions, 137 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b526523063b..379ea7f7f09 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -104,6 +104,12 @@ namespace {
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
+ /// \brief Set of nodes which have been combined (at least once).
+ ///
+ /// This is used to allow us to reliably add any operands of a DAG node
+ /// which have not yet been combined to the worklist.
+ SmallPtrSet<SDNode *, 64> CombinedNodes;
+
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -136,6 +142,8 @@ namespace {
/// removeFromWorklist - remove all instances of N from the worklist.
///
void removeFromWorklist(SDNode *N) {
+ CombinedNodes.erase(N);
+
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
return; // Not in the worklist.
@@ -1152,6 +1160,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
if (recursivelyDeleteUnusedNodes(N))
continue;
+ DEBUG(dbgs() << "\nCombining: ";
+ N->dump(&DAG));
+
+ // Add any operands of the new node which have not yet been combined to the
+ // worklist as well. Because the worklist uniques things already, this
+ // won't repeatedly process the same operand.
+ CombinedNodes.insert(N);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (!CombinedNodes.count(N->getOperand(i).getNode()))
+ AddToWorklist(N->getOperand(i).getNode());
+
WorklistRemover DeadNodes(*this);
SDValue RV = combine(N);
@@ -1172,11 +1191,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- RV.getNode()->dump(&DAG);
- dbgs() << '\n');
+ DEBUG(dbgs() << " ... into: ";
+ RV.getNode()->dump(&DAG));
// Transfer debug value.
DAG.TransferDbgValues(SDValue(N, 0), RV);
diff --git a/llvm/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/llvm/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
deleted file mode 100644
index ce132c6afa4..00000000000
--- a/llvm/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: llc -O3 < %s | FileCheck %s
-; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
-; Test case for a DAG combiner bug where we combined an indexed load
-; with an extension (sext, zext, or any) into a regular extended load,
-; i.e., dropping the indexed value.
-; <rdar://problem/16389332>
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
-
-%class.A = type { i64, i64 }
-%class.C = type { i64 }
-
-; CHECK-LABEL: XX:
-; CHECK: ldr
-define i32 @XX(%class.A* %K, i1 %tst, i32* %addr, %class.C** %ppC, %class.C* %pC) {
-entry:
- br i1 %tst, label %if.then, label %lor.rhs.i
-
-lor.rhs.i: ; preds = %entry
- %tmp = load i32* %addr, align 4
- %y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1
- %tmp1 = load i64* %y.i.i.i, align 8
- %U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32
- %div11.i = sdiv i32 %U.sroa.3.8.extract.trunc.i, 17
- %add12.i = add nsw i32 0, %div11.i
- %U.sroa.3.12.extract.shift.i = lshr i64 %tmp1, 32
- %U.sroa.3.12.extract.trunc.i = trunc i64 %U.sroa.3.12.extract.shift.i to i32
- %div15.i = sdiv i32 %U.sroa.3.12.extract.trunc.i, 13
- %add16.i = add nsw i32 %add12.i, %div15.i
- %rem.i.i = srem i32 %add16.i, %tmp
- %idxprom = sext i32 %rem.i.i to i64
- %arrayidx = getelementptr inbounds %class.C** %ppC, i64 %idxprom
- %tobool533 = icmp eq %class.C* %pC, null
- br i1 %tobool533, label %while.end, label %while.body
-
-if.then: ; preds = %entry
- ret i32 42
-
-while.body: ; preds = %lor.rhs.i
- ret i32 5
-
-while.end: ; preds = %lor.rhs.i
- %tmp3 = load %class.C** %arrayidx, align 8
- ret i32 50
-}
diff --git a/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll b/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
index 396e83816cc..41ea6137b3d 100644
--- a/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
+++ b/llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -92,12 +92,10 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0)
; CHECK-LABEL: test_1double_misaligned:
-; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
-; CHECK-DAG: mov r[[BASE:[0-9]+]], sp
; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
+; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
; CHECK-DAG: movt [[ONEHI]], #16368
-; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]!
-; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4]
+; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8]
; CHECK-M4F-LABEL: test_1double_misaligned:
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
diff --git a/llvm/test/CodeGen/Mips/cmov.ll b/llvm/test/CodeGen/Mips/cmov.ll
index 0c13fb1adfb..48558a38e3a 100644
--- a/llvm/test/CodeGen/Mips/cmov.ll
+++ b/llvm/test/CodeGen/Mips/cmov.ll
@@ -757,24 +757,9 @@ define i32 @slti6(i32 %a) nounwind readnone {
; ALL-LABEL: slti6:
-; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 32-CMOV-DAG: xori [[R1]], [[R1]], 1
-; 32-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 32-CMOV-NOT: movn
-
-; 32-CMP-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 32-CMP-DAG: xori [[R1]], [[R1]], 1
-; 32-CMP-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 32-CMP-NOT: seleqz
-; 32-CMP-NOT: selnez
-
-; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 64-CMOV-DAG: xori [[R1]], [[R1]], 1
-; 64-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 64-CMOV-NOT: movn
-
-; 64-CMP-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 64-CMP-DAG: xori [[R1]], [[R1]], 1
-; 64-CMP-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 64-CMP-NOT: seleqz
-; 64-CMP-NOT: selnez
+; ALL-DAG: addiu [[R1:\$[0-9]+]], $zero, 6
+; ALL-DAG: slt [[R1]], [[R1]], $4
+; ALL-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; ALL-NOT: movn
+; ALL-NOT: seleqz
+; ALL-NOT: selnez
diff --git a/llvm/test/CodeGen/R600/add_i64.ll b/llvm/test/CodeGen/R600/add_i64.ll
index f733d904042..dac4f173e31 100644
--- a/llvm/test/CodeGen/R600/add_i64.ll
+++ b/llvm/test/CodeGen/R600/add_i64.ll
@@ -70,8 +70,8 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
}
; SI-LABEL: @trunc_i64_add_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
+; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI-NOT: ADDC
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll
index 3c3b475d077..a2b7e47f4d4 100644
--- a/llvm/test/CodeGen/R600/or.ll
+++ b/llvm/test/CodeGen/R600/or.ll
@@ -116,10 +116,10 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
}
; SI-LABEL: @trunc_i64_or_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
-; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
+; SI: S_OR_B32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = or i64 %b, %a
diff --git a/llvm/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/llvm/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 5372bc52278..60025bfcdc8 100644
--- a/llvm/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/llvm/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -7,7 +7,7 @@ entry:
%tmp1 = bitcast double %a to <8 x i8>
%tmp2 = bitcast double %b to <8 x i8>
%tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK: paddw
+; CHECK: paddb
store <8 x i8> %tmp3, <8 x i8>* null
ret void
}
@@ -18,7 +18,7 @@ entry:
%tmp1 = bitcast double %a to <4 x i16>
%tmp2 = bitcast double %b to <4 x i16>
%tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK: paddd
+; CHECK: paddw
store <4 x i16> %tmp3, <4 x i16>* null
ret void
}
@@ -29,7 +29,7 @@ entry:
%tmp1 = bitcast double %a to <2 x i32>
%tmp2 = bitcast double %b to <2 x i32>
%tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK: paddq
+; CHECK: paddd
store <2 x i32> %tmp3, <2 x i32>* null
ret void
}
diff --git a/llvm/test/CodeGen/X86/i8-umulo.ll b/llvm/test/CodeGen/X86/i8-umulo.ll
index ba846f3e9be..1d70f4a8754 100644
--- a/llvm/test/CodeGen/X86/i8-umulo.ll
+++ b/llvm/test/CodeGen/X86/i8-umulo.ll
@@ -3,7 +3,7 @@
declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
define i8 @testumulo(i32 %argc) {
-; CHECK: imulw
+; CHECK: imull
; CHECK: testb %{{.+}}, %{{.+}}
; CHECK: je [[NOOVERFLOWLABEL:.+]]
; CHECK: {{.*}}[[NOOVERFLOWLABEL]]:
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index d4174539f2f..dfa8aed4646 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -284,7 +284,7 @@ entry:
define i32 @func_test1(i32 %p1) nounwind uwtable {
entry:
; CHECK-LABEL: func_test1:
-; CHECK: testb
+; CHECK: andb
; CHECK: j
; CHECK: ret
%0 = load i32* @b, align 4
diff --git a/llvm/test/CodeGen/X86/lower-bitcast.ll b/llvm/test/CodeGen/X86/lower-bitcast.ll
index f47161e5520..edb8433ec30 100644
--- a/llvm/test/CodeGen/X86/lower-bitcast.ll
+++ b/llvm/test/CodeGen/X86/lower-bitcast.ll
@@ -68,13 +68,13 @@ define i64 @test4(i64 %A) {
%2 = bitcast <2 x i32> %add to i64
ret i64 %2
}
-; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
+; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
; Ideally, we should fold that sequence into a single paddd. This is fixed with
; the widening legalization.
;
; CHECK-LABEL: test4
; CHECK: pshufd
-; CHECK-NEXT: paddq
+; CHECK-NEXT: paddd
; CHECK-NEXT: pshufd
; CHECK: ret
;
diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll
index c8aaf327a7d..b4dc5fd4716 100644
--- a/llvm/test/CodeGen/X86/pr15267.ll
+++ b/llvm/test/CodeGen/X86/pr15267.ll
@@ -48,19 +48,22 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
; CHECK: test3
; CHECK: movzbl
-; CHECK: shrl
-; CHECK: andl $1
-; CHECK: andl $1
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $2
-; CHECK: andl $1
-; CHECK: pinsrd $2
-; CHECK: shrl $3
-; CHECK: andl $1
-; CHECK: pinsrd $3
-; CHECK: pslld
-; CHECK: psrad
-; CHECK: pmovsxdq
-; CHECK: pmovsxdq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: vpunpcklqdq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: vpunpcklqdq
+; CHECK: vinsertf128
; CHECK: ret
diff --git a/llvm/test/CodeGen/X86/store-narrow.ll b/llvm/test/CodeGen/X86/store-narrow.ll
index 51f6fb0dbbe..e3cc2fa668e 100644
--- a/llvm/test/CodeGen/X86/store-narrow.ll
+++ b/llvm/test/CodeGen/X86/store-narrow.ll
@@ -34,7 +34,7 @@ entry:
; X64: movb %sil, 1(%rdi)
; X32-LABEL: test2:
-; X32: movzbl 8(%esp), %e[[REG:[abcd]]]x
+; X32: movb 8(%esp), %[[REG:[abcd]]]l
; X32: movb %[[REG]]l, 1(%{{.*}})
}
@@ -67,8 +67,8 @@ entry:
; X64: movw %si, 2(%rdi)
; X32-LABEL: test4:
-; X32: movl 8(%esp), %e[[REG:[abcd]x]]
-; X32: movw %[[REG]], 2(%{{.*}})
+; X32: movw 8(%esp), %[[REG:[abcd]]]x
+; X32: movw %[[REG]]x, 2(%{{.*}})
}
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -84,8 +84,8 @@ entry:
; X64: movw %si, 2(%rdi)
; X32-LABEL: test5:
-; X32: movzwl 8(%esp), %e[[REG:[abcd]x]]
-; X32: movw %[[REG]], 2(%{{.*}})
+; X32: movw 8(%esp), %[[REG:[abcd]]]x
+; X32: movw %[[REG]]x, 2(%{{.*}})
}
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
diff --git a/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll b/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll
index d230f1f7e2c..b981871d94b 100644
--- a/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -32,7 +32,7 @@ define void @load_2_i16(<2 x i16>* %A) {
;CHECK-LABEL: load_2_i32:
;CHECK: pmovzxdq
-;CHECK: paddq
+;CHECK: paddd
;CHECK: pshufd
;CHECK: ret
define void @load_2_i32(<2 x i32>* %A) {
@@ -56,7 +56,7 @@ define void @load_4_i8(<4 x i8>* %A) {
;CHECK-LABEL: load_4_i16:
;CHECK: pmovzxwd
-;CHECK: paddd
+;CHECK: paddw
;CHECK: pshufb
;CHECK: ret
define void @load_4_i16(<4 x i16>* %A) {
@@ -68,7 +68,7 @@ define void @load_4_i16(<4 x i16>* %A) {
;CHECK-LABEL: load_8_i8:
;CHECK: pmovzxbw
-;CHECK: paddw
+;CHECK: paddb
;CHECK: pshufb
;CHECK: ret
define void @load_8_i8(<8 x i8>* %A) {
diff --git a/llvm/test/CodeGen/X86/vector-idiv.ll b/llvm/test/CodeGen/X86/vector-idiv.ll
index a3229073751..ec1ce3da5e1 100644
--- a/llvm/test/CodeGen/X86/vector-idiv.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv.ll
@@ -122,7 +122,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
; SSE41-LABEL: test8:
; SSE41: pmuldq
; SSE41: pshufd $49
-; SSE41-NOT: pshufd $49
+; SSE41: pshufd $49
; SSE41: pmuldq
; SSE41: shufps $-35
; SSE41: pshufd $-40
@@ -134,7 +134,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
; SSE-LABEL: test8:
; SSE: pmuludq
; SSE: pshufd $49
-; SSE-NOT: pshufd $49
+; SSE: pshufd $49
; SSE: pmuludq
; SSE: shufps $-35
; SSE: pshufd $-40
@@ -147,7 +147,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
; AVX-LABEL: test8:
; AVX: vpmuldq
; AVX: vpshufd $49
-; AVX-NOT: vpshufd $49
+; AVX: vpshufd $49
; AVX: vpmuldq
; AVX: vshufps $-35
; AVX: vpshufd $-40
@@ -162,10 +162,12 @@ define <8 x i32> @test9(<8 x i32> %a) {
ret <8 x i32> %div
; AVX-LABEL: test9:
-; AVX: vpalignr $4
; AVX: vpbroadcastd
+; AVX: vpalignr $4
+; AVX: vpalignr $4
; AVX: vpmuldq
; AVX: vpmuldq
+; AVX: vpalignr $4
; AVX: vpblendd $170
; AVX: vpadd
; AVX: vpsrld $31
@@ -195,10 +197,12 @@ define <8 x i32> @test11(<8 x i32> %a) {
ret <8 x i32> %rem
; AVX-LABEL: test11:
-; AVX: vpalignr $4
; AVX: vpbroadcastd
+; AVX: vpalignr $4
+; AVX: vpalignr $4
; AVX: vpmuldq
; AVX: vpmuldq
+; AVX: vpalignr $4
; AVX: vpblendd $170
; AVX: vpadd
; AVX: vpsrld $31
diff --git a/llvm/test/CodeGen/X86/widen_cast-1.ll b/llvm/test/CodeGen/X86/widen_cast-1.ll
index d115929f5aa..e0b861f29de 100644
--- a/llvm/test/CodeGen/X86/widen_cast-1.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-1.ll
@@ -2,12 +2,12 @@
; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
; CHECK: movl
-; CHECK: paddd
+; CHECK: paddw
; CHECK: movlpd
; Scheduler causes produce a different instruction order
; ATOM: movl
-; ATOM: paddd
+; ATOM: paddw
; ATOM: movlpd
; bitcast a v4i16 to v2i32
diff --git a/llvm/test/CodeGen/X86/widen_conv-1.ll b/llvm/test/CodeGen/X86/widen_conv-1.ll
index 9f6778cff59..3f54ab694c0 100644
--- a/llvm/test/CodeGen/X86/widen_conv-1.ll
+++ b/llvm/test/CodeGen/X86/widen_conv-1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: paddq
+; CHECK: paddd
; truncate v2i64 to v2i32
diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll
index 9d298adcc81..0ec3574d69e 100644
--- a/llvm/test/CodeGen/X86/widen_load-2.ll
+++ b/llvm/test/CodeGen/X86/widen_load-2.ll
@@ -91,10 +91,9 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
%i16vec4 = type <4 x i16>
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
; CHECK-LABEL: add4i16:
-; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]]
-; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]]
-; CHECK-NEXT: paddd %[[R0]], %[[R1]]
-; CHECK-NEXT: pshufb {{.*}}, %[[R1]]
+; CHECK: movq (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT: movq (%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT: paddw %[[R0]], %[[R1]]
; CHECK-NEXT: movq %[[R1]], (%{{.*}})
%a = load %i16vec4* %ap, align 16
%b = load %i16vec4* %bp, align 16
diff --git a/llvm/test/CodeGen/X86/x86-64-tls-1.ll b/llvm/test/CodeGen/X86/x86-64-tls-1.ll
index 641786f5a91..2879fb4e1e7 100644
--- a/llvm/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/llvm/test/CodeGen/X86/x86-64-tls-1.ll
@@ -1,10 +1,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
@tm_nest_level = internal thread_local global i32 0
define i64 @z() nounwind {
-; FIXME: The codegen here is primitive at best and could be much better.
-; The add and the moves can be folded together.
-; CHECK-DAG: movq $tm_nest_level@TPOFF, %rcx
-; CHECK-DAG: movq %fs:0, %rax
-; CHECK: addl %ecx, %eax
+; CHECK: movq $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
+; CHECK-NEXT: addl %fs:0, %e[[R0]]x
+; CHECK-NEXT: andq $100, %r[[R0]]x
+
ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
}
diff --git a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index 4fe6c664df5..4317d8ab6a2 100644
--- a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -2,10 +2,10 @@
define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
; CHECK-LABEL: LCPI0_0:
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216 ## 0x3f800000
+; CHECK-NEXT: .long 1065353216 ## 0x3f800000
+; CHECK-NEXT: .long 1065353216 ## 0x3f800000
+; CHECK-NEXT: .long 1065353216 ## 0x3f800000
; CHECK-LABEL: foo:
; CHECK: cmpeqps %xmm1, %xmm0
; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
@@ -59,10 +59,10 @@ define void @foo2(<4 x float>* noalias %result) nounwind {
; scalar value like what the zext creates.
define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind {
; CHECK-LABEL: LCPI3_0:
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
+; CHECK-NEXT: .long 1065353216 ## 0x3f800000
+; CHECK-NEXT: .long 0 ## 0x0
+; CHECK-NEXT: .long 1065353216 ## 0x3f800000
+; CHECK-NEXT: .long 0 ## 0x0
; CHECK-LABEL: foo3:
; CHECK: cmpeqps %xmm1, %xmm0
; CHECK-NEXT: andps LCPI3_0(%rip), %xmm0
OpenPOWER on IntegriCloud