diff options
| author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2011-11-07 23:08:21 +0000 | 
|---|---|---|
| committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2011-11-07 23:08:21 +0000 | 
| commit | a70e9417fb72407ad458b18c3eeaae77ca079611 (patch) | |
| tree | db72e20262b46ad923ff875266496cd7f70c48b8 /llvm | |
| parent | 7a4be01ac819f76f6a051efb4a6f96853bffebc3 (diff) | |
| download | bcm5719-llvm-a70e9417fb72407ad458b18c3eeaae77ca079611.tar.gz bcm5719-llvm-a70e9417fb72407ad458b18c3eeaae77ca079611.zip  | |
Kill and collapse outstanding DomainValues.
DomainValues that are only used by "don't care" instructions are now
collapsed to the first possible execution domain after all basic blocks
have been processed.  This typically means the PS domain on x86.
For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are
completely collapsed to the PS domain instead of containing a mix of
execution domains created by isel.
llvm-svn: 144037
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/ExecutionDepsFix.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-logic.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/nontemporal.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-align-3.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-blend.ll | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_shuffle.ll | 15 | 
8 files changed, 58 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 3d6f256dd8e..bd77f655c14 100644 --- a/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -510,11 +510,20 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {      leaveBasicBlock(MBB);    } -  // Clear the LiveOuts vectors. Should we also collapse any remaining -  // DomainValues? -  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); -         i != e; ++i) -    delete[] i->second; +  // Clear the LiveOuts vectors and collapse any remaining DomainValues. +  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator +         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { +    LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); +    if (FI == LiveOuts.end()) +      continue; +    assert(FI->second && "Null entry"); +    // The DomainValue is collapsed when the last reference is killed. +    LiveRegs = FI->second; +    for (unsigned i = 0, e = NumRegs; i != e; ++i) +      if (LiveRegs[i]) +        Kill(i); +    delete[] LiveRegs; +  }    LiveOuts.clear();    Avail.clear();    Allocator.DestroyAll(); diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 276209ea756..3fa1d95bf2f 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone  define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { +  ; CHECK: test_x86_sse2_movnt_dq    ; CHECK: movl    ; CHECK: vmovntdq -  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) +  ; add operation forces the execution domain. +  %a2 = add <2 x i64> %a1, <i64 1, i64 1> +  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)    ret void  }  declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind  define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { +  ; CHECK test_x86_sse2_movnt_pd    ; CHECK: movl    ; CHECK: vmovntpd -  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) +  ; fadd operation forces the execution domain. +  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> +  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)    ret void  }  declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind  define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { +  ; CHECK: test_x86_sse2_mul_sd    ; CHECK: vmulsd    %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]    ret <2 x double> %res @@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone  define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { +  ; CHECK: test_x86_sse2_storel_dq    ; CHECK: movl    ; CHECK: vmovq    call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) @@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind  define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { +  ; CHECK: test_x86_sse2_storeu_dq    ; CHECK: movl    ; CHECK: vmovdqu    call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) @@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind  define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { +  ; CHECK: test_x86_sse2_storeu_pd    ; CHECK: movl    ; CHECK: vmovupd -  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) +  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> +  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)    ret void  }  declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind  define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { +  ; CHECK: test_x86_sse2_sub_sd    ; CHECK: vsubsd    %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]    ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/avx-logic.ll b/llvm/test/CodeGen/X86/avx-logic.ll index 518c09c8695..cd371355828 100644 --- a/llvm/test/CodeGen/X86/avx-logic.ll +++ b/llvm/test/CodeGen/X86/avx-logic.ll @@ -165,7 +165,9 @@ entry:  ; CHECK: vpandn  %xmm  define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {  entry: -  %y = xor <2 x i64> %a, <i64 -1, i64 -1> +  ; Force the execution domain with an add. +  %a2 = add <2 x i64> %a, <i64 1, i64 1> +  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>    %x = and <2 x i64> %a, %y    ret <2 x i64> %x  } @@ -173,7 +175,9 @@ entry:  ; CHECK: vpand %xmm  define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {  entry: -  %x = and <2 x i64> %a, %b +  ; Force the execution domain with an add. +  %a2 = add <2 x i64> %a, <i64 1, i64 1> +  %x = and <2 x i64> %a2, %b    ret <2 x i64> %x  } diff --git a/llvm/test/CodeGen/X86/nontemporal.ll b/llvm/test/CodeGen/X86/nontemporal.ll index 1d095359b61..ae04435ac39 100644 --- a/llvm/test/CodeGen/X86/nontemporal.ll +++ b/llvm/test/CodeGen/X86/nontemporal.ll @@ -3,13 +3,16 @@  define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {  ; CHECK: movntps    %cast = bitcast i8* %B to <4 x float>* -  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0 +  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000> +  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0  ; CHECK: movntdq    %cast1 = bitcast i8* %B to <2 x i64>* -  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0 +  %E2 = add <2 x i64> %E, <i64 1, i64 2> +  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0  ; CHECK: movntpd    %cast2 = bitcast i8* %B to <2 x double>* -  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0 +  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000> +  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0  ; CHECK: movnti    %cast3 = bitcast i8* %B to i32*    store i32 %D, i32* %cast3, align 16, !nontemporal !0 diff --git a/llvm/test/CodeGen/X86/sse-align-3.ll b/llvm/test/CodeGen/X86/sse-align-3.ll index 04f216176c3..b6b0471e913 100644 --- a/llvm/test/CodeGen/X86/sse-align-3.ll +++ b/llvm/test/CodeGen/X86/sse-align-3.ll @@ -1,8 +1,8 @@  ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s  ; CHECK-NOT:     movapd  ; CHECK:     movaps -; CHECK-NOT:     movaps -; CHECK:     movapd +; CHECK-NOT:     movapd +; CHECK:     movaps  ; CHECK-NOT:     movap  define void @foo(<4 x float>* %p, <4 x float> %x) nounwind { diff --git a/llvm/test/CodeGen/X86/sse2-blend.ll b/llvm/test/CodeGen/X86/sse2-blend.ll index 4ff1d035e4d..2f4317bf294 100644 --- a/llvm/test/CodeGen/X86/sse2-blend.ll +++ b/llvm/test/CodeGen/X86/sse2-blend.ll @@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {    ret void  } -; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the -; mixed domains here. +; Without forcing instructions, fall back to the preferred PS domain.  ; CHECK: vsel_i64  ; CHECK: xorps -; CHECK: pand +; CHECK: andps  ; CHECK: andnps  ; CHECK: orps  ; CHECK: ret @@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {    ret void  } -; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the -; mixed domains here. +; Without forcing instructions, fall back to the preferred PS domain.  ; CHECK: vsel_double  ; CHECK: xorps -; CHECK: pand +; CHECK: andps  ; CHECK: andnps  ; CHECK: orps  ; CHECK: ret -  define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {    %A = load <4 x double>* %v1    %B = load <4 x double>* %v2 diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll index d520d5c1e31..1d74af2ba36 100644 --- a/llvm/test/CodeGen/X86/sse2.ll +++ b/llvm/test/CodeGen/X86/sse2.ll @@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {  	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]  	ret <2 x double> %tmp7  ; CHECK: test11: -; CHECK: movapd	4(%esp), %xmm0 +; CHECK: movaps	4(%esp), %xmm0  }  define void @test12() nounwind { diff --git a/llvm/test/CodeGen/X86/vec_shuffle.ll b/llvm/test/CodeGen/X86/vec_shuffle.ll index 2a48de22098..d20b3e7b1f4 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle.ll @@ -1,9 +1,8 @@ -; RUN: llc < %s -march=x86 -mcpu=core2 -o %t -; RUN: grep movq    %t | count 1 -; RUN: grep pshufd  %t | count 1 -; RUN: grep movupd  %t | count 1 -; RUN: grep pshufhw %t | count 1 +; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s +; CHECK: test_v4sf +; CHECK: movq 8(%esp) +; CHECK: pshufd $80  define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {  	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]  	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1] @@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {  	ret void  } +; CHECK: test_v2sd +; CHECK: movups	8(%esp) +; CHECK: movaps  define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {  	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]  	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1] @@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {  	ret void  } +; CHECK: test_v8i16 +; CHECK: pshufhw $-58 +; CHECK: movdqa  define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {  	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]  	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]  | 

