summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dimitry@andric.com>2016-01-03 17:22:03 +0000
committerDimitry Andric <dimitry@andric.com>2016-01-03 17:22:03 +0000
commit227b928abce178fb1c3b70f510d5dc9ef7124758 (patch)
treeb9726f1e74f13393af712d3975153e5769837511 /llvm/test/CodeGen
parent569106fe997eb1ec3abf6eb373dd09170d42eb7a (diff)
downloadbcm5719-llvm-227b928abce178fb1c3b70f510d5dc9ef7124758.tar.gz
bcm5719-llvm-227b928abce178fb1c3b70f510d5dc9ef7124758.zip
Fix several accidental DOS line endings in source files
Summary: There are a number of files in the tree which have been accidentally checked in with DOS line endings. Convert these to native line endings. There are also a few files which have DOS line endings on purpose, and I have set the svn:eol-style property to 'CRLF' on those. Reviewers: joerg, aaron.ballman Subscribers: aaron.ballman, sanjoy, dsanders, llvm-commits Differential Revision: http://reviews.llvm.org/D15848 llvm-svn: 256707
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vector-ext.ll54
-rw-r--r--llvm/test/CodeGen/ARM/debugtrap.ll34
-rw-r--r--llvm/test/CodeGen/X86/2011-11-30-or.ll14
-rw-r--r--llvm/test/CodeGen/X86/avx512cd-intrinsics.ll36
-rw-r--r--llvm/test/CodeGen/X86/fpcmp-soft-fp.ll254
-rw-r--r--llvm/test/CodeGen/X86/pku.ll50
-rw-r--r--llvm/test/CodeGen/X86/pr21792.ll82
-rw-r--r--llvm/test/CodeGen/X86/pr24139.ll296
-rw-r--r--llvm/test/CodeGen/X86/statepoint-far-call.ll44
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll82
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll42
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll42
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll82
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-xsave.ll46
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll24
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll24
-rw-r--r--llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll46
-rw-r--r--llvm/test/CodeGen/X86/vec_partial.ll64
-rw-r--r--llvm/test/CodeGen/X86/vec_reassociate.ll238
-rw-r--r--llvm/test/CodeGen/X86/x86-32-intrcc.ll158
-rw-r--r--llvm/test/CodeGen/X86/x86-64-intrcc.ll170
21 files changed, 941 insertions, 941 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
index 994a9956cf7..921cf6a6f0d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func30
-;CHECK: movi.4h v1, #0x1
-;CHECK: and.8b v0, v0, v1
-;CHECK: ushll.4s v0, v0, #0
-;CHECK: str q0, [x0]
-;CHECK: ret
-
-%T0_30 = type <4 x i1>
-%T1_30 = type <4 x i32>
-define void @func30(%T0_30 %v0, %T1_30* %p1) {
- %r = zext %T0_30 %v0 to %T1_30
- store %T1_30 %r, %T1_30* %p1
- ret void
-}
-
-; Extend from v1i1 was crashing things (PR20791). Make sure we do something
-; sensible instead.
-define <1 x i32> @autogen_SD7918() {
-; CHECK-LABEL: autogen_SD7918
-; CHECK: movi d0, #0000000000000000
-; CHECK-NEXT: ret
- %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
- %ZE = zext <1 x i1> %I29 to <1 x i32>
- ret <1 x i32> %ZE
-}
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: movi.4h v1, #0x1
+;CHECK: and.8b v0, v0, v1
+;CHECK: ushll.4s v0, v0, #0
+;CHECK: str q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+ %r = zext %T0_30 %v0 to %T1_30
+ store %T1_30 %r, %T1_30* %p1
+ ret void
+}
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+ %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+ %ZE = zext <1 x i1> %I29 to <1 x i32>
+ ret <1 x i32> %ZE
+}
diff --git a/llvm/test/CodeGen/ARM/debugtrap.ll b/llvm/test/CodeGen/ARM/debugtrap.ll
index 9ce73939ce5..3d8cdea6cda 100644
--- a/llvm/test/CodeGen/ARM/debugtrap.ll
+++ b/llvm/test/CodeGen/ARM/debugtrap.ll
@@ -1,17 +1,17 @@
-; This test ensures the @llvm.debugtrap() call is not removed when generating
-; the 'pop' instruction to restore the callee saved registers on ARM.
-
-; RUN: llc < %s -mtriple=armv7 -O0 -filetype=asm | FileCheck %s
-
-declare void @llvm.debugtrap() nounwind
-declare void @foo() nounwind
-
-define void @test() nounwind {
-entry:
- ; CHECK: bl foo
- ; CHECK-NEXT: pop
- ; CHECK-NEXT: trap
- call void @foo()
- call void @llvm.debugtrap()
- ret void
-}
+; This test ensures the @llvm.debugtrap() call is not removed when generating
+; the 'pop' instruction to restore the callee saved registers on ARM.
+
+; RUN: llc < %s -mtriple=armv7 -O0 -filetype=asm | FileCheck %s
+
+declare void @llvm.debugtrap() nounwind
+declare void @foo() nounwind
+
+define void @test() nounwind {
+entry:
+ ; CHECK: bl foo
+ ; CHECK-NEXT: pop
+ ; CHECK-NEXT: trap
+ call void @foo()
+ call void @llvm.debugtrap()
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/2011-11-30-or.ll b/llvm/test/CodeGen/X86/2011-11-30-or.ll
index 4260e817b41..8378a022eab 100644
--- a/llvm/test/CodeGen/X86/2011-11-30-or.ll
+++ b/llvm/test/CodeGen/X86/2011-11-30-or.ll
@@ -2,13 +2,13 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "x86_64-apple-macosx10.6.6"
-
-; Test that the order of operands is correct
-; CHECK: select_func
-; CHECK: pblendvb {{LCPI0_[0-9]*}}(%rip), %xmm1
-; CHECK: ret
-
-define void @select_func(<8 x i16> %in) {
+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb {{LCPI0_[0-9]*}}(%rip), %xmm1
+; CHECK: ret
+
+define void @select_func(<8 x i16> %in) {
entry:
%c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
diff --git a/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll b/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll
index 29f17bbc019..febd3d69dd1 100644
--- a/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
-
-define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
- ; CHECK: test_x86_vbroadcastmw_512
- ; CHECK: vpbroadcastmw2d %k0, %zmm0
- %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ;
- ret <16 x i32> %res
-}
-declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
-
-define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
- ; CHECK: test_x86_broadcastmb_512
- ; CHECK: vpbroadcastmb2q %k0, %zmm0
- %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ;
- ret <8 x i64> %res
-}
-declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
-
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
+
+define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
+ ; CHECK: test_x86_vbroadcastmw_512
+ ; CHECK: vpbroadcastmw2d %k0, %zmm0
+ %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ;
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
+
+define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
+ ; CHECK: test_x86_broadcastmb_512
+ ; CHECK: vpbroadcastmb2q %k0, %zmm0
+ %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ;
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
+
diff --git a/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll b/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
index 58d57017d18..dac468e5cbf 100644
--- a/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
+++ b/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
@@ -1,127 +1,127 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s
-
-define i1 @test1(double %d) #0 {
-entry:
- %cmp = fcmp ule double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test1:
-; CHECK: calll __gtdf2
-; CHECK: setle
-; CHECK: retl
-
-define i1 @test2(double %d) #0 {
-entry:
- %cmp = fcmp ult double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test2:
-; CHECK: calll __gedf2
-; CHECK: sets
-; CHECK: retl
-
-define i1 @test3(double %d) #0 {
-entry:
- %cmp = fcmp ugt double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test3:
-; CHECK: calll __ledf2
-; CHECK: setg
-; CHECK: retl
-
-define i1 @test4(double %d) #0 {
-entry:
- %cmp = fcmp uge double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test4:
-; CHECK: calll __ltdf2
-; CHECK: setns
-; CHECK: retl
-
-define i1 @test5(double %d) #0 {
-entry:
- %cmp = fcmp ole double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test5:
-; CHECK: calll __ledf2
-; CHECK: setle
-; CHECK: retl
-
-define i1 @test6(double %d) #0 {
-entry:
- %cmp = fcmp olt double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test6:
-; CHECK: calll __ltdf2
-; CHECK: sets
-; CHECK: retl
-
-define i1 @test7(double %d) #0 {
-entry:
- %cmp = fcmp ogt double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test7:
-; CHECK: calll __gtdf2
-; CHECK: setg
-; CHECK: retl
-
-define i1 @test8(double %d) #0 {
-entry:
- %cmp = fcmp oge double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test8:
-; CHECK: calll __gedf2
-; CHECK: setns
-; CHECK: retl
-
-define i1 @test9(double %d) #0 {
-entry:
- %cmp = fcmp oeq double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test9:
-; CHECK: calll __eqdf2
-; CHECK: sete
-; CHECK: retl
-
-define i1 @test10(double %d) #0 {
-entry:
- %cmp = fcmp ueq double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test10:
-; CHECK: calll __eqdf2
-; CHECK: sete
-; CHECK: calll __unorddf2
-; CHECK: setne
-; CHECK: retl
-
-define i1 @test11(double %d) #0 {
-entry:
- %cmp = fcmp one double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test11:
-; CHECK: calll __gtdf2
-; CHECK: setg
-; CHECK: calll __ltdf2
-; CHECK: sets
-; CHECK: retl
-
-define i1 @test12(double %d) #0 {
-entry:
- %cmp = fcmp une double %d, 0.000000e+00
- ret i1 %cmp
-}
-; CHECK-LABEL: test12:
-; CHECK: calll __nedf2
-; CHECK: setne
-; CHECK: retl
-
-attributes #0 = { "use-soft-float"="true" }
+; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s
+
+define i1 @test1(double %d) #0 {
+entry:
+ %cmp = fcmp ule double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test1:
+; CHECK: calll __gtdf2
+; CHECK: setle
+; CHECK: retl
+
+define i1 @test2(double %d) #0 {
+entry:
+ %cmp = fcmp ult double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test2:
+; CHECK: calll __gedf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test3(double %d) #0 {
+entry:
+ %cmp = fcmp ugt double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test3:
+; CHECK: calll __ledf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test4(double %d) #0 {
+entry:
+ %cmp = fcmp uge double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test4:
+; CHECK: calll __ltdf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test5(double %d) #0 {
+entry:
+ %cmp = fcmp ole double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test5:
+; CHECK: calll __ledf2
+; CHECK: setle
+; CHECK: retl
+
+define i1 @test6(double %d) #0 {
+entry:
+ %cmp = fcmp olt double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test6:
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test7(double %d) #0 {
+entry:
+ %cmp = fcmp ogt double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test7:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test8(double %d) #0 {
+entry:
+ %cmp = fcmp oge double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test8:
+; CHECK: calll __gedf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test9(double %d) #0 {
+entry:
+ %cmp = fcmp oeq double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test9:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: retl
+
+define i1 @test10(double %d) #0 {
+entry:
+ %cmp = fcmp ueq double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test10:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: calll __unorddf2
+; CHECK: setne
+; CHECK: retl
+
+define i1 @test11(double %d) #0 {
+entry:
+ %cmp = fcmp one double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test11:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test12(double %d) #0 {
+entry:
+ %cmp = fcmp une double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test12:
+; CHECK: calll __nedf2
+; CHECK: setne
+; CHECK: retl
+
+attributes #0 = { "use-soft-float"="true" }
diff --git a/llvm/test/CodeGen/X86/pku.ll b/llvm/test/CodeGen/X86/pku.ll
index cbb83ae5b02..8568cf43abc 100644
--- a/llvm/test/CodeGen/X86/pku.ll
+++ b/llvm/test/CodeGen/X86/pku.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
-declare i32 @llvm.x86.rdpkru()
-declare void @llvm.x86.wrpkru(i32)
-
-define void @test_x86_wrpkru(i32 %src) {
-; CHECK-LABEL: test_x86_wrpkru:
-; CHECK: ## BB#0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: wrpkru
-; CHECK-NEXT: retq
- call void @llvm.x86.wrpkru(i32 %src)
- ret void
-}
-
-define i32 @test_x86_rdpkru() {
-; CHECK-LABEL: test_x86_rdpkru:
-; CHECK: ## BB#0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: rdpkru
-; CHECK-NEXT: retq
- %res = call i32 @llvm.x86.rdpkru()
- ret i32 %res
-}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+declare i32 @llvm.x86.rdpkru()
+declare void @llvm.x86.wrpkru(i32)
+
+define void @test_x86_wrpkru(i32 %src) {
+; CHECK-LABEL: test_x86_wrpkru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: wrpkru
+; CHECK-NEXT: retq
+ call void @llvm.x86.wrpkru(i32 %src)
+ ret void
+}
+
+define i32 @test_x86_rdpkru() {
+; CHECK-LABEL: test_x86_rdpkru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: rdpkru
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.rdpkru()
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/pr21792.ll b/llvm/test/CodeGen/X86/pr21792.ll
index 59866c09054..f6dca609bc0 100644
--- a/llvm/test/CodeGen/X86/pr21792.ll
+++ b/llvm/test/CodeGen/X86/pr21792.ll
@@ -1,41 +1,41 @@
-; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s
-; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by
-; PR21792
-
-@stuff = external constant [256 x double], align 16
-
-define void @func(<4 x float> %vx) {
-entry:
- %tmp2 = bitcast <4 x float> %vx to <2 x i64>
- %and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>
- %tmp3 = bitcast <2 x i64> %and.i to <4 x i32>
- %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0
- %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64
- %add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext
- %tmp4 = bitcast i8* %add.ptr to double*
- %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1
- %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64
- %add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5
- %tmp5 = bitcast i8* %add.ptr6 to double*
- %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2
- %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64
- %add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14
- %tmp6 = bitcast i8* %add.ptr15 to double*
- %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3
- %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64
- %add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19
- %tmp7 = bitcast i8* %add.ptr20 to double*
- %add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext
- %tmp16 = bitcast i8* %add.ptr46 to double*
- %add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5
- %tmp17 = bitcast i8* %add.ptr51 to double*
- call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)
- ret void
-; CHECK-LABEL: func:
-; CHECK: pextrq $1, %xmm0,
-; CHECK-NEXT: movd %xmm0, %r[[AX:..]]
-; CHECK-NEXT: movslq %e[[AX]],
-; CHECK-NEXT: sarq $32, %r[[AX]]
-}
-
-declare void @toto(double*, double*, double*, double*, double*, double*)
+; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s
+; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by
+; PR21792
+
+@stuff = external constant [256 x double], align 16
+
+define void @func(<4 x float> %vx) {
+entry:
+ %tmp2 = bitcast <4 x float> %vx to <2 x i64>
+ %and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>
+ %tmp3 = bitcast <2 x i64> %and.i to <4 x i32>
+ %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0
+ %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64
+ %add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext
+ %tmp4 = bitcast i8* %add.ptr to double*
+ %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1
+ %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64
+ %add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5
+ %tmp5 = bitcast i8* %add.ptr6 to double*
+ %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2
+ %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64
+ %add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14
+ %tmp6 = bitcast i8* %add.ptr15 to double*
+ %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3
+ %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64
+ %add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19
+ %tmp7 = bitcast i8* %add.ptr20 to double*
+ %add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext
+ %tmp16 = bitcast i8* %add.ptr46 to double*
+ %add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5
+ %tmp17 = bitcast i8* %add.ptr51 to double*
+ call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)
+ ret void
+; CHECK-LABEL: func:
+; CHECK: pextrq $1, %xmm0,
+; CHECK-NEXT: movd %xmm0, %r[[AX:..]]
+; CHECK-NEXT: movslq %e[[AX]],
+; CHECK-NEXT: sarq $32, %r[[AX]]
+}
+
+declare void @toto(double*, double*, double*, double*, double*, double*)
diff --git a/llvm/test/CodeGen/X86/pr24139.ll b/llvm/test/CodeGen/X86/pr24139.ll
index fbe55abcbf7..ec56345ba64 100644
--- a/llvm/test/CodeGen/X86/pr24139.ll
+++ b/llvm/test/CodeGen/X86/pr24139.ll
@@ -1,148 +1,148 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
-
-; Check that we do not get excessive spilling from splitting of constant live ranges.
-
-; CHECK-LABEL: PR24139:
-; CHECK: # 16-byte Spill
-; CHECK-NOT: # 16-byte Spill
-; CHECK: retq
-
-define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) {
- %tmp = bitcast <2 x double> %arg to <4 x float>
- %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
- %tmp4 = bitcast <2 x double> %arg to <4 x i32>
- %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
- %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
- %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float>
- %tmp8 = fadd <4 x float> %tmp3, %tmp7
- %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2
- %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
- %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2
- %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
- %tmp13 = fsub <4 x float> %tmp, %tmp12
- %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
- %tmp15 = fsub <4 x float> %tmp13, %tmp14
- %tmp16 = fmul <4 x float> %tmp15, %tmp15
- %tmp17 = fmul <4 x float> %tmp15, %tmp16
- %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
- %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
- %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
- %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
- %tmp22 = fmul <4 x float> %tmp16, %tmp19
- %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
- %tmp24 = fmul <4 x float> %tmp16, %tmp21
- %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
- %tmp26 = fmul <4 x float> %tmp16, %tmp23
- %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
- %tmp28 = fmul <4 x float> %tmp17, %tmp25
- %tmp29 = fadd <4 x float> %tmp15, %tmp28
- %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297>
- %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32>
- %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer
- %tmp33 = sext <4 x i1> %tmp32 to <4 x i32>
- %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float>
- %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2
- %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594>
- %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32>
- %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer
- %tmp39 = sext <4 x i1> %tmp38 to <4 x i32>
- %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32>
- %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
- %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float>
- %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float>
- %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2
- %tmp45 = bitcast <2 x double> %arg1 to <4 x float>
- %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
- %tmp47 = bitcast <2 x double> %arg1 to <4 x i32>
- %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
- %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
- %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float>
- %tmp51 = fadd <4 x float> %tmp46, %tmp50
- %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2
- %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64>
- %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2
- %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
- %tmp56 = fsub <4 x float> %tmp45, %tmp55
- %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
- %tmp58 = fsub <4 x float> %tmp56, %tmp57
- %tmp59 = fmul <4 x float> %tmp58, %tmp58
- %tmp60 = fmul <4 x float> %tmp58, %tmp59
- %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
- %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
- %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
- %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
- %tmp65 = fmul <4 x float> %tmp59, %tmp62
- %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
- %tmp67 = fmul <4 x float> %tmp59, %tmp64
- %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
- %tmp69 = fmul <4 x float> %tmp59, %tmp66
- %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
- %tmp71 = fmul <4 x float> %tmp60, %tmp68
- %tmp72 = fadd <4 x float> %tmp58, %tmp71
- %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297>
- %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32>
- %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer
- %tmp76 = sext <4 x i1> %tmp75 to <4 x i32>
- %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float>
- %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2
- %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594>
- %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32>
- %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer
- %tmp82 = sext <4 x i1> %tmp81 to <4 x i32>
- %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32>
- %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
- %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float>
- %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float>
- %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2
- %tmp88 = fadd <4 x float> %tmp44, %tmp87
- %tmp89 = bitcast <2 x double> %arg2 to <4 x float>
- %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
- %tmp91 = bitcast <2 x double> %arg2 to <4 x i32>
- %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
- %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
- %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float>
- %tmp95 = fadd <4 x float> %tmp90, %tmp94
- %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2
- %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64>
- %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2
- %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
- %tmp100 = fsub <4 x float> %tmp89, %tmp99
- %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
- %tmp102 = fsub <4 x float> %tmp100, %tmp101
- %tmp103 = fmul <4 x float> %tmp102, %tmp102
- %tmp104 = fmul <4 x float> %tmp102, %tmp103
- %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
- %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
- %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
- %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
- %tmp109 = fmul <4 x float> %tmp103, %tmp106
- %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
- %tmp111 = fmul <4 x float> %tmp103, %tmp108
- %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
- %tmp113 = fmul <4 x float> %tmp103, %tmp110
- %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
- %tmp115 = fmul <4 x float> %tmp104, %tmp112
- %tmp116 = fadd <4 x float> %tmp102, %tmp115
- %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297>
- %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32>
- %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer
- %tmp120 = sext <4 x i1> %tmp119 to <4 x i32>
- %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float>
- %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2
- %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594>
- %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32>
- %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer
- %tmp126 = sext <4 x i1> %tmp125 to <4 x i32>
- %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32>
- %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
- %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float>
- %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float>
- %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2
- %tmp132 = fadd <4 x float> %tmp88, %tmp131
- %tmp133 = bitcast <4 x float> %tmp132 to <2 x double>
- ret <2 x double> %tmp133
-}
-
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
-declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; Check that we do not get excessive spilling from splitting of constant live ranges.
+
+; CHECK-LABEL: PR24139:
+; CHECK: # 16-byte Spill
+; CHECK-NOT: # 16-byte Spill
+; CHECK: retq
+
+define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) {
+ %tmp = bitcast <2 x double> %arg to <4 x float>
+ %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+ %tmp4 = bitcast <2 x double> %arg to <4 x i32>
+ %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+ %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float>
+ %tmp8 = fadd <4 x float> %tmp3, %tmp7
+ %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2
+ %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
+ %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2
+ %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+ %tmp13 = fsub <4 x float> %tmp, %tmp12
+ %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+ %tmp15 = fsub <4 x float> %tmp13, %tmp14
+ %tmp16 = fmul <4 x float> %tmp15, %tmp15
+ %tmp17 = fmul <4 x float> %tmp15, %tmp16
+ %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+ %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+ %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+ %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+ %tmp22 = fmul <4 x float> %tmp16, %tmp19
+ %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+ %tmp24 = fmul <4 x float> %tmp16, %tmp21
+ %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+ %tmp26 = fmul <4 x float> %tmp16, %tmp23
+ %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %tmp28 = fmul <4 x float> %tmp17, %tmp25
+ %tmp29 = fadd <4 x float> %tmp15, %tmp28
+ %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297>
+ %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32>
+ %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer
+ %tmp33 = sext <4 x i1> %tmp32 to <4 x i32>
+ %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float>
+ %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2
+ %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594>
+ %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32>
+ %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer
+ %tmp39 = sext <4 x i1> %tmp38 to <4 x i32>
+ %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32>
+ %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float>
+ %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float>
+ %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2
+ %tmp45 = bitcast <2 x double> %arg1 to <4 x float>
+ %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+ %tmp47 = bitcast <2 x double> %arg1 to <4 x i32>
+ %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+ %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float>
+ %tmp51 = fadd <4 x float> %tmp46, %tmp50
+ %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2
+ %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64>
+ %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2
+ %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+ %tmp56 = fsub <4 x float> %tmp45, %tmp55
+ %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+ %tmp58 = fsub <4 x float> %tmp56, %tmp57
+ %tmp59 = fmul <4 x float> %tmp58, %tmp58
+ %tmp60 = fmul <4 x float> %tmp58, %tmp59
+ %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+ %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+ %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+ %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+ %tmp65 = fmul <4 x float> %tmp59, %tmp62
+ %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+ %tmp67 = fmul <4 x float> %tmp59, %tmp64
+ %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+ %tmp69 = fmul <4 x float> %tmp59, %tmp66
+ %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %tmp71 = fmul <4 x float> %tmp60, %tmp68
+ %tmp72 = fadd <4 x float> %tmp58, %tmp71
+ %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297>
+ %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32>
+ %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer
+ %tmp76 = sext <4 x i1> %tmp75 to <4 x i32>
+ %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float>
+ %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2
+ %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594>
+ %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32>
+ %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer
+ %tmp82 = sext <4 x i1> %tmp81 to <4 x i32>
+ %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32>
+ %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float>
+ %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float>
+ %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2
+ %tmp88 = fadd <4 x float> %tmp44, %tmp87
+ %tmp89 = bitcast <2 x double> %arg2 to <4 x float>
+ %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+ %tmp91 = bitcast <2 x double> %arg2 to <4 x i32>
+ %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+ %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float>
+ %tmp95 = fadd <4 x float> %tmp90, %tmp94
+ %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2
+ %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64>
+ %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2
+ %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+ %tmp100 = fsub <4 x float> %tmp89, %tmp99
+ %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+ %tmp102 = fsub <4 x float> %tmp100, %tmp101
+ %tmp103 = fmul <4 x float> %tmp102, %tmp102
+ %tmp104 = fmul <4 x float> %tmp102, %tmp103
+ %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+ %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+ %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+ %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+ %tmp109 = fmul <4 x float> %tmp103, %tmp106
+ %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+ %tmp111 = fmul <4 x float> %tmp103, %tmp108
+ %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+ %tmp113 = fmul <4 x float> %tmp103, %tmp110
+ %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %tmp115 = fmul <4 x float> %tmp104, %tmp112
+ %tmp116 = fadd <4 x float> %tmp102, %tmp115
+ %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297>
+ %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32>
+ %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer
+ %tmp120 = sext <4 x i1> %tmp119 to <4 x i32>
+ %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float>
+ %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2
+ %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594>
+ %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32>
+ %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer
+ %tmp126 = sext <4 x i1> %tmp125 to <4 x i32>
+ %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32>
+ %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float>
+ %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float>
+ %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2
+ %tmp132 = fadd <4 x float> %tmp88, %tmp131
+ %tmp133 = bitcast <4 x float> %tmp132 to <2 x double>
+ ret <2 x double> %tmp133
+}
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/X86/statepoint-far-call.ll b/llvm/test/CodeGen/X86/statepoint-far-call.ll
index 2ebf38c5c01..dc49061f646 100644
--- a/llvm/test/CodeGen/X86/statepoint-far-call.ll
+++ b/llvm/test/CodeGen/X86/statepoint-far-call.ll
@@ -1,22 +1,22 @@
-; RUN: llc < %s | FileCheck %s
-; Test to check that Statepoints with X64 far-immediate targets
-; are lowered correctly to an indirect call via a scratch register.
-
-target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-win64"
-
-define void @test_far_call() gc "statepoint-example" {
-; CHECK-LABEL: test_far_call
-; CHECK: pushq %rax
-; CHECK: movabsq $140727162896504, %rax
-; CHECK: callq *%rax
-; CHECK: popq %rax
-; CHECK: retq
-
-entry:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)
- ret void
-}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-
+; RUN: llc < %s | FileCheck %s
+; Test to check that Statepoints with X64 far-immediate targets
+; are lowered correctly to an indirect call via a scratch register.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win64"
+
+define void @test_far_call() gc "statepoint-example" {
+; CHECK-LABEL: test_far_call
+; CHECK: pushq %rax
+; CHECK: movabsq $140727162896504, %rax
+; CHECK: callq *%rax
+; CHECK: popq %rax
+; CHECK: retq
+
+entry:
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)
+ ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll
index feec9516220..62cd625e4f2 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll
@@ -1,41 +1,41 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave | FileCheck %s
-
-define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsave
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsave (%rdi)
- call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsave(i8*, i32, i32)
-
-define void @test_xsave64(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsave64
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsave64 (%rdi)
- call void @llvm.x86.xsave64(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsave64(i8*, i32, i32)
-
-define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xrstor
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xrstor (%rdi)
- call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xrstor(i8*, i32, i32)
-
-define void @test_xrstor64(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xrstor64
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xrstor64 (%rdi)
- call void @llvm.x86.xrstor64(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xrstor64(i8*, i32, i32)
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsave (%rdi)
+ call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xsave64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsave64 (%rdi)
+ call void @llvm.x86.xsave64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave64(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstor (%rdi)
+ call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
+
+define void @test_xrstor64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstor64 (%rdi)
+ call void @llvm.x86.xrstor64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
index 06803488651..c1c5cbd0471 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
@@ -1,21 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
-
-define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsavec
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsavec (%rdi)
- call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsavec(i8*, i32, i32)
-
-define void @test_xsavec64(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsavec64
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsavec64 (%rdi)
- call void @llvm.x86.xsavec64(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsavec64(i8*, i32, i32)
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsavec (%rdi)
+ call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
+
+define void @test_xsavec64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsavec64 (%rdi)
+ call void @llvm.x86.xsavec64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsavec64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
index ee0a5360da8..49603d4e216 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
@@ -1,21 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsaveopt | FileCheck %s
-
-define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsaveopt
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsaveopt (%rdi)
- call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsaveopt(i8*, i32, i32)
-
-define void @test_xsaveopt64(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsaveopt64
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsaveopt64 (%rdi)
- call void @llvm.x86.xsaveopt64(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsaveopt64(i8*, i32, i32)
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaveopt (%rdi)
+ call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
+
+define void @test_xsaveopt64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaveopt64 (%rdi)
+ call void @llvm.x86.xsaveopt64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaveopt64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
index 5c1c5be4e7e..08d90f5a5a8 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
@@ -1,41 +1,41 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
-
-define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsaves
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsaves (%rdi)
- call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsaves(i8*, i32, i32)
-
-define void @test_xsaves64(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsaves64
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xsaves64 (%rdi)
- call void @llvm.x86.xsaves64(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsaves64(i8*, i32, i32)
-
-define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xrstors
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xrstors (%rdi)
- call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xrstors(i8*, i32, i32)
-
-define void @test_xrstors64(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xrstors64
-; CHECK: movl %edx, %eax
-; CHECK: movl %esi, %edx
-; CHECK: xrstors64 (%rdi)
- call void @llvm.x86.xrstors64(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xrstors64(i8*, i32, i32)
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaves (%rdi)
+ call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xsaves64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaves64 (%rdi)
+ call void @llvm.x86.xsaves64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaves64(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstors (%rdi)
+ call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
+
+define void @test_xrstors64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstors64 (%rdi)
+ call void @llvm.x86.xrstors64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstors64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll
index ff9fb7e247a..deaf1bec3a7 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave | FileCheck %s
-
-define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsave
-; CHECK: movl 8(%esp), %edx
-; CHECK: movl 12(%esp), %eax
-; CHECK: movl 4(%esp), %ecx
-; CHECK: xsave (%ecx)
- call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsave(i8*, i32, i32)
-
-define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xrstor
-; CHECK: movl 8(%esp), %edx
-; CHECK: movl 12(%esp), %eax
-; CHECK: movl 4(%esp), %ecx
-; CHECK: xrstor (%ecx)
- call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xrstor(i8*, i32, i32)
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsave (%ecx)
+ call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xrstor (%ecx)
+ call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll
index 4a55ea9531b..a4576078f84 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
-
-define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsavec
-; CHECK: movl 8(%esp), %edx
-; CHECK: movl 12(%esp), %eax
-; CHECK: movl 4(%esp), %ecx
-; CHECK: xsavec (%ecx)
- call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsavec(i8*, i32, i32)
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsavec (%ecx)
+ call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
index f9bd7acd5a7..4bef3fd40ab 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaveopt | FileCheck %s
-
-define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsaveopt
-; CHECK: movl 8(%esp), %edx
-; CHECK: movl 12(%esp), %eax
-; CHECK: movl 4(%esp), %ecx
-; CHECK: xsaveopt (%ecx)
- call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsaveopt(i8*, i32, i32)
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsaveopt (%ecx)
+ call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll
index ca1c5c1a9ed..840bbbced2c 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
-
-define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xsaves
-; CHECK: movl 8(%esp), %edx
-; CHECK: movl 12(%esp), %eax
-; CHECK: movl 4(%esp), %ecx
-; CHECK: xsaves (%ecx)
- call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xsaves(i8*, i32, i32)
-
-define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
-; CHECK-LABEL: test_xrstors
-; CHECK: movl 8(%esp), %edx
-; CHECK: movl 12(%esp), %eax
-; CHECK: movl 4(%esp), %ecx
-; CHECK: xrstors (%ecx)
- call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
- ret void;
-}
-declare void @llvm.x86.xrstors(i8*, i32, i32)
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsaves (%ecx)
+ call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xrstors (%ecx)
+ call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/vec_partial.ll b/llvm/test/CodeGen/X86/vec_partial.ll
index 709f326e502..469667a28a7 100644
--- a/llvm/test/CodeGen/X86/vec_partial.ll
+++ b/llvm/test/CodeGen/X86/vec_partial.ll
@@ -1,32 +1,32 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-; PR11580
-define <3 x float> @addf3(<3 x float> %x) {
-; CHECK-LABEL: addf3
-; CHECK: # BB#0:
-; CHECK-NEXT: addps .LCPI0_0(%rip), %xmm0
-; CHECK-NEXT: retq
-entry:
- %add = fadd <3 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
- ret <3 x float> %add
-}
-
-; PR11580
-define <4 x float> @cvtf3_f4(<3 x float> %x) {
-; CHECK-LABEL: cvtf3_f4
-; CHECK: # BB#0:
-; CHECK-NEXT: retq
-entry:
- %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
- ret <4 x float> %extractVec
-}
-
-; PR11580
-define <3 x float> @cvtf4_f3(<4 x float> %x) {
-; CHECK-LABEL: cvtf4_f3
-; CHECK: # BB#0:
-; CHECK-NEXT: retq
-entry:
- %extractVec = shufflevector <4 x float> %x, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %extractVec
-}
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; PR11580
+define <3 x float> @addf3(<3 x float> %x) {
+; CHECK-LABEL: addf3
+; CHECK: # BB#0:
+; CHECK-NEXT: addps .LCPI0_0(%rip), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %add = fadd <3 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ ret <3 x float> %add
+}
+
+; PR11580
+define <4 x float> @cvtf3_f4(<3 x float> %x) {
+; CHECK-LABEL: cvtf3_f4
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
+entry:
+ %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ ret <4 x float> %extractVec
+}
+
+; PR11580
+define <3 x float> @cvtf4_f3(<4 x float> %x) {
+; CHECK-LABEL: cvtf4_f3
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
+entry:
+ %extractVec = shufflevector <4 x float> %x, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %extractVec
+}
diff --git a/llvm/test/CodeGen/X86/vec_reassociate.ll b/llvm/test/CodeGen/X86/vec_reassociate.ll
index bf2053f7842..0d3373528f5 100644
--- a/llvm/test/CodeGen/X86/vec_reassociate.ll
+++ b/llvm/test/CodeGen/X86/vec_reassociate.ll
@@ -1,119 +1,119 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s
-
-define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @add_4i32
- ;CHECK: # BB#0:
- ;CHECK-NEXT: paddd %xmm1, %xmm0
- ;CHECK-NEXT: retq
- %1 = add <4 x i32> %a0, <i32 1, i32 -2, i32 3, i32 -4>
- %2 = add <4 x i32> %a1, <i32 -1, i32 2, i32 -3, i32 4>
- %3 = add <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @add_4i32_commute
- ;CHECK: # BB#0:
- ;CHECK-NEXT: paddd %xmm1, %xmm0
- ;CHECK-NEXT: retq
- %1 = add <4 x i32> <i32 1, i32 -2, i32 3, i32 -4>, %a0
- %2 = add <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, %a1
- %3 = add <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @mul_4i32
- ;CHECK: # BB#0:
- ;CHECK-NEXT: pmulld %xmm1, %xmm0
- ;CHECK-NEXT: pmulld .LCPI2_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
- %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
- %3 = mul <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @mul_4i32_commute
- ;CHECK: # BB#0:
- ;CHECK-NEXT: pmulld %xmm1, %xmm0
- ;CHECK-NEXT: pmulld .LCPI3_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
- %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
- %3 = mul <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @and_4i32
- ;CHECK: # BB#0:
- ;CHECK-NEXT: andps %xmm1, %xmm0
- ;CHECK-NEXT: andps .LCPI4_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
- %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
- %3 = and <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @and_4i32_commute
- ;CHECK: # BB#0:
- ;CHECK-NEXT: andps %xmm1, %xmm0
- ;CHECK-NEXT: andps .LCPI5_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = and <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
- %2 = and <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
- %3 = and <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @or_4i32
- ;CHECK: # BB#0:
- ;CHECK-NEXT: orps %xmm1, %xmm0
- ;CHECK-NEXT: orps .LCPI6_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
- %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
- %3 = or <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @or_4i32_commute
- ;CHECK: # BB#0:
- ;CHECK-NEXT: orps %xmm1, %xmm0
- ;CHECK-NEXT: orps .LCPI7_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = or <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
- %2 = or <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
- %3 = or <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @xor_4i32
- ;CHECK: # BB#0:
- ;CHECK-NEXT: xorps %xmm1, %xmm0
- ;CHECK-NEXT: xorps .LCPI8_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
- %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
- %3 = xor <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
- ;CHECK-LABEL: @xor_4i32_commute
- ;CHECK: # BB#0:
- ;CHECK-NEXT: xorps %xmm1, %xmm0
- ;CHECK-NEXT: xorps .LCPI9_0(%rip), %xmm0
- ;CHECK-NEXT: retq
- %1 = xor <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
- %2 = xor <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
- %3 = xor <4 x i32> %1, %2
- ret <4 x i32> %3
-}
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s
+
+define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @add_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: paddd %xmm1, %xmm0
+ ;CHECK-NEXT: retq
+ %1 = add <4 x i32> %a0, <i32 1, i32 -2, i32 3, i32 -4>
+ %2 = add <4 x i32> %a1, <i32 -1, i32 2, i32 -3, i32 4>
+ %3 = add <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @add_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: paddd %xmm1, %xmm0
+ ;CHECK-NEXT: retq
+ %1 = add <4 x i32> <i32 1, i32 -2, i32 3, i32 -4>, %a0
+ %2 = add <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, %a1
+ %3 = add <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @mul_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: pmulld %xmm1, %xmm0
+ ;CHECK-NEXT: pmulld .LCPI2_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
+ %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
+ %3 = mul <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @mul_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: pmulld %xmm1, %xmm0
+ ;CHECK-NEXT: pmulld .LCPI3_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
+ %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
+ %3 = mul <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @and_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: andps %xmm1, %xmm0
+ ;CHECK-NEXT: andps .LCPI4_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
+ %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
+ %3 = and <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @and_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: andps %xmm1, %xmm0
+ ;CHECK-NEXT: andps .LCPI5_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = and <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
+ %2 = and <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
+ %3 = and <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @or_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: orps %xmm1, %xmm0
+ ;CHECK-NEXT: orps .LCPI6_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
+ %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
+ %3 = or <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @or_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: orps %xmm1, %xmm0
+ ;CHECK-NEXT: orps .LCPI7_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = or <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
+ %2 = or <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
+ %3 = or <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @xor_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: xorps %xmm1, %xmm0
+ ;CHECK-NEXT: xorps .LCPI8_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
+ %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
+ %3 = xor <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @xor_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: xorps %xmm1, %xmm0
+ ;CHECK-NEXT: xorps .LCPI9_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = xor <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
+ %2 = xor <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
+ %3 = xor <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 908da3d1120..99d0044c6de 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -1,79 +1,79 @@
-; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s
-; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
-
-%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
-
-@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*)], section "llvm.metadata"
-
-; Spills eax, putting original esp at +4.
-; No stack adjustment if declared with no error code
-define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
- ; CHECK-LABEL: test_isr_no_ecode:
- ; CHECK: pushl %eax
- ; CHECK: movl 12(%esp), %eax
- ; CHECK: popl %eax
- ; CHECK: iretl
- ; CHECK0-LABEL: test_isr_no_ecode:
- ; CHECK0: pushl %eax
- ; CHECK0: leal 4(%esp), %eax
- ; CHECK0: movl 8(%eax), %eax
- ; CHECK0: popl %eax
- ; CHECK0: iretl
- %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
- %flags = load i32, i32* %pflags, align 4
- call void asm sideeffect "", "r"(i32 %flags)
- ret void
-}
-
-; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
-; before return, popping the error code.
-define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %ecode) {
- ; CHECK-LABEL: test_isr_ecode
- ; CHECK: pushl %ecx
- ; CHECK: pushl %eax
- ; CHECK: movl 8(%esp), %eax
- ; CHECK: movl 20(%esp), %ecx
- ; CHECK: popl %eax
- ; CHECK: popl %ecx
- ; CHECK: addl $4, %esp
- ; CHECK: iretl
- ; CHECK0-LABEL: test_isr_ecode
- ; CHECK0: pushl %ecx
- ; CHECK0: pushl %eax
- ; CHECK0: movl 8(%esp), %eax
- ; CHECK0: leal 12(%esp), %ecx
- ; CHECK0: movl 8(%ecx), %ecx
- ; CHECK0: popl %eax
- ; CHECK0: popl %ecx
- ; CHECK0: addl $4, %esp
- ; CHECK0: iretl
- %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
- %flags = load i32, i32* %pflags, align 4
- call x86_fastcallcc void asm sideeffect "", "r,r"(i32 %flags, i32 %ecode)
- ret void
-}
-
-; All clobbered registers must be saved
-define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {
- call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
- ; CHECK-LABEL: test_isr_clobbers
- ; CHECK-SSE-NEXT: pushl %ebp
- ; CHECK-SSE-NEXT: pushl %ebx
- ; CHECK-SSE-NEXT; pushl %eax
- ; CHECK-SSE-NEXT: popl %eax
- ; CHECK-SSE-NEXT: popl %ebx
- ; CHECK-SSE-NEXT: popl %ebp
- ; CHECK-SSE-NEXT: addl $4, %esp
- ; CHECK-SSE-NEXT: iretl
- ; CHECK0-LABEL: test_isr_clobbers
- ; CHECK0-SSE-NEXT: pushl %ebp
- ; CHECK0-SSE-NEXT: pushl %ebx
- ; CHECK0-SSE-NEXT; pushl %eax
- ; CHECK0-SSE-NEXT: popl %eax
- ; CHECK0-SSE-NEXT: popl %ebx
- ; CHECK0-SSE-NEXT: popl %ebp
- ; CHECK0-SSE-NEXT: addl $4, %esp
- ; CHECK0-SSE-NEXT: iretl
- ret void
-}
-
+; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills eax, putting original esp at +4.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+ ; CHECK-LABEL: test_isr_no_ecode:
+ ; CHECK: pushl %eax
+ ; CHECK: movl 12(%esp), %eax
+ ; CHECK: popl %eax
+ ; CHECK: iretl
+ ; CHECK0-LABEL: test_isr_no_ecode:
+ ; CHECK0: pushl %eax
+ ; CHECK0: leal 4(%esp), %eax
+ ; CHECK0: movl 8(%eax), %eax
+ ; CHECK0: popl %eax
+ ; CHECK0: iretl
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i32, i32* %pflags, align 4
+ call void asm sideeffect "", "r"(i32 %flags)
+ ret void
+}
+
+; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %ecode) {
+ ; CHECK-LABEL: test_isr_ecode
+ ; CHECK: pushl %ecx
+ ; CHECK: pushl %eax
+ ; CHECK: movl 8(%esp), %eax
+ ; CHECK: movl 20(%esp), %ecx
+ ; CHECK: popl %eax
+ ; CHECK: popl %ecx
+ ; CHECK: addl $4, %esp
+ ; CHECK: iretl
+ ; CHECK0-LABEL: test_isr_ecode
+ ; CHECK0: pushl %ecx
+ ; CHECK0: pushl %eax
+ ; CHECK0: movl 8(%esp), %eax
+ ; CHECK0: leal 12(%esp), %ecx
+ ; CHECK0: movl 8(%ecx), %ecx
+ ; CHECK0: popl %eax
+ ; CHECK0: popl %ecx
+ ; CHECK0: addl $4, %esp
+ ; CHECK0: iretl
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i32, i32* %pflags, align 4
+ call x86_fastcallcc void asm sideeffect "", "r,r"(i32 %flags, i32 %ecode)
+ ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {
+ call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
+ ; CHECK-LABEL: test_isr_clobbers
+ ; CHECK-SSE-NEXT: pushl %ebp
+ ; CHECK-SSE-NEXT: pushl %ebx
+ ; CHECK-SSE-NEXT; pushl %eax
+ ; CHECK-SSE-NEXT: popl %eax
+ ; CHECK-SSE-NEXT: popl %ebx
+ ; CHECK-SSE-NEXT: popl %ebp
+ ; CHECK-SSE-NEXT: addl $4, %esp
+ ; CHECK-SSE-NEXT: iretl
+ ; CHECK0-LABEL: test_isr_clobbers
+ ; CHECK0-SSE-NEXT: pushl %ebp
+ ; CHECK0-SSE-NEXT: pushl %ebx
+ ; CHECK0-SSE-NEXT; pushl %eax
+ ; CHECK0-SSE-NEXT: popl %eax
+ ; CHECK0-SSE-NEXT: popl %ebx
+ ; CHECK0-SSE-NEXT: popl %ebp
+ ; CHECK0-SSE-NEXT: addl $4, %esp
+ ; CHECK0-SSE-NEXT: iretl
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
index 8f70b391fa1..429209c063c 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
@@ -1,86 +1,86 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
-
-%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
-
-@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*)], section "llvm.metadata"
-
-; Spills rax, putting original esp at +8.
-; No stack adjustment if declared with no error code
-define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
- ; CHECK-LABEL: test_isr_no_ecode:
- ; CHECK: pushq %rax
- ; CHECK: movq 24(%rsp), %rax
- ; CHECK: popq %rax
- ; CHECK: iretq
- ; CHECK0-LABEL: test_isr_no_ecode:
- ; CHECK0: pushq %rax
- ; CHECK0: leaq 8(%rsp), %rax
- ; CHECK0: movq 16(%rax), %rax
- ; CHECK0: popq %rax
- ; CHECK0: iretq
- %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
- %flags = load i64, i64* %pflags, align 4
- call void asm sideeffect "", "r"(i64 %flags)
- ret void
-}
-
-; Spills rax and rcx, putting original rsp at +16. Stack is adjusted up another 8 bytes
-; before return, popping the error code.
-define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %ecode) {
- ; CHECK-LABEL: test_isr_ecode
- ; CHECK: pushq %rax
- ; CHECK: pushq %rcx
- ; CHECK: movq 16(%rsp), %rax
- ; CHECK: movq 40(%rsp), %rcx
- ; CHECK: popq %rcx
- ; CHECK: popq %rax
- ; CHECK: addq $8, %rsp
- ; CHECK: iretq
- ; CHECK0-LABEL: test_isr_ecode
- ; CHECK0: pushq %rax
- ; CHECK0: pushq %rcx
- ; CHECK0: movq 16(%rsp), %rax
- ; CHECK0: leaq 24(%rsp), %rcx
- ; CHECK0: movq 16(%rcx), %rcx
- ; CHECK0: popq %rcx
- ; CHECK0: popq %rax
- ; CHECK0: addq $8, %rsp
- ; CHECK0: iretq
- %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
- %flags = load i64, i64* %pflags, align 4
- call void asm sideeffect "", "r,r"(i64 %flags, i64 %ecode)
- ret void
-}
-
-; All clobbered registers must be saved
-define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {
- call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()
- ; CHECK-LABEL: test_isr_clobbers
- ; CHECK-SSE-NEXT: pushq %rax
- ; CHECK-SSE-NEXT; pushq %r11
- ; CHECK-SSE-NEXT: pushq %rbp
- ; CHECK-SSE-NEXT: pushq %rbx
- ; CHECK-SSE-NEXT: movaps %xmm0
- ; CHECK-SSE-NEXT: movaps %xmm0
- ; CHECK-SSE-NEXT: popq %rbx
- ; CHECK-SSE-NEXT: popq %rbp
- ; CHECK-SSE-NEXT: popq %r11
- ; CHECK-SSE-NEXT: popq %rax
- ; CHECK-SSE-NEXT: addq $8, %rsp
- ; CHECK-SSE-NEXT: iretq
- ; CHECK0-LABEL: test_isr_clobbers
- ; CHECK0-SSE-NEXT: pushq %rax
- ; CHECK0-SSE-NEXT; pushq %r11
- ; CHECK0-SSE-NEXT: pushq %rbp
- ; CHECK0-SSE-NEXT: pushq %rbx
- ; CHECK0-SSE-NEXT: movaps %xmm0
- ; CHECK0-SSE-NEXT: movaps %xmm0
- ; CHECK0-SSE-NEXT: popq %rbx
- ; CHECK0-SSE-NEXT: popq %rbp
- ; CHECK0-SSE-NEXT: popq %r11
- ; CHECK0-SSE-NEXT: popq %rax
- ; CHECK0-SSE-NEXT: addq $8, %rsp
- ; CHECK0-SSE-NEXT: iretq
- ret void
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills rax, putting original esp at +8.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+ ; CHECK-LABEL: test_isr_no_ecode:
+ ; CHECK: pushq %rax
+ ; CHECK: movq 24(%rsp), %rax
+ ; CHECK: popq %rax
+ ; CHECK: iretq
+ ; CHECK0-LABEL: test_isr_no_ecode:
+ ; CHECK0: pushq %rax
+ ; CHECK0: leaq 8(%rsp), %rax
+ ; CHECK0: movq 16(%rax), %rax
+ ; CHECK0: popq %rax
+ ; CHECK0: iretq
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i64, i64* %pflags, align 4
+ call void asm sideeffect "", "r"(i64 %flags)
+ ret void
+}
+
+; Spills rax and rcx, putting original rsp at +16. Stack is adjusted up another 8 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %ecode) {
+ ; CHECK-LABEL: test_isr_ecode
+ ; CHECK: pushq %rax
+ ; CHECK: pushq %rcx
+ ; CHECK: movq 16(%rsp), %rax
+ ; CHECK: movq 40(%rsp), %rcx
+ ; CHECK: popq %rcx
+ ; CHECK: popq %rax
+ ; CHECK: addq $8, %rsp
+ ; CHECK: iretq
+ ; CHECK0-LABEL: test_isr_ecode
+ ; CHECK0: pushq %rax
+ ; CHECK0: pushq %rcx
+ ; CHECK0: movq 16(%rsp), %rax
+ ; CHECK0: leaq 24(%rsp), %rcx
+ ; CHECK0: movq 16(%rcx), %rcx
+ ; CHECK0: popq %rcx
+ ; CHECK0: popq %rax
+ ; CHECK0: addq $8, %rsp
+ ; CHECK0: iretq
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i64, i64* %pflags, align 4
+ call void asm sideeffect "", "r,r"(i64 %flags, i64 %ecode)
+ ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {
+ call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()
+ ; CHECK-LABEL: test_isr_clobbers
+ ; CHECK-SSE-NEXT: pushq %rax
+ ; CHECK-SSE-NEXT; pushq %r11
+ ; CHECK-SSE-NEXT: pushq %rbp
+ ; CHECK-SSE-NEXT: pushq %rbx
+ ; CHECK-SSE-NEXT: movaps %xmm0
+ ; CHECK-SSE-NEXT: movaps %xmm0
+ ; CHECK-SSE-NEXT: popq %rbx
+ ; CHECK-SSE-NEXT: popq %rbp
+ ; CHECK-SSE-NEXT: popq %r11
+ ; CHECK-SSE-NEXT: popq %rax
+ ; CHECK-SSE-NEXT: addq $8, %rsp
+ ; CHECK-SSE-NEXT: iretq
+ ; CHECK0-LABEL: test_isr_clobbers
+ ; CHECK0-SSE-NEXT: pushq %rax
+ ; CHECK0-SSE-NEXT; pushq %r11
+ ; CHECK0-SSE-NEXT: pushq %rbp
+ ; CHECK0-SSE-NEXT: pushq %rbx
+ ; CHECK0-SSE-NEXT: movaps %xmm0
+ ; CHECK0-SSE-NEXT: movaps %xmm0
+ ; CHECK0-SSE-NEXT: popq %rbx
+ ; CHECK0-SSE-NEXT: popq %rbp
+ ; CHECK0-SSE-NEXT: popq %r11
+ ; CHECK0-SSE-NEXT: popq %rax
+ ; CHECK0-SSE-NEXT: addq $8, %rsp
+ ; CHECK0-SSE-NEXT: iretq
+ ret void
} \ No newline at end of file
OpenPOWER on IntegriCloud