diff options
| author | Chris Lattner <sabre@nondot.org> | 2010-08-26 21:55:42 +0000 | 
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2010-08-26 21:55:42 +0000 | 
| commit | d4ebd6df5a9605709e0acbd4c2a46570543fba70 (patch) | |
| tree | 2884f00ccdd3f159d30c8edd81142cf5522d355b /llvm | |
| parent | 2771d7bf49528533c1be210b5c0a5ae9fd0bc12a (diff) | |
| download | bcm5719-llvm-d4ebd6df5a9605709e0acbd4c2a46570543fba70.tar.gz bcm5719-llvm-d4ebd6df5a9605709e0acbd4c2a46570543fba70.zip  | |
optimize bitcast(trunc(bitcast(x))) where the result is a float and 'x'
is a vector to be a vector element extraction.  This allows clang to
compile:
struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }
into:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movapd	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	movd	%xmm1, %rax
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm3, %xmm0
	ret
instead of:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	movd	%eax, %xmm0
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movd	%xmm1, %rax
	movd	%eax, %xmm1
	addss	%xmm2, %xmm1
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm1, %xmm0
	ret
... eliminating half of the horribleness.
llvm-svn: 112227
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 34 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/bitcast.ll | 22 | 
2 files changed, 56 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index dbe5200d000..ef5bbc4798d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1335,6 +1335,35 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,    return new ShuffleVectorInst(InVal, V2, Mask);  } +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast.  The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) { +  Value *Src = CI.getOperand(0); + +  // If this is a bitcast from int to float, check to see if the int is an +  // extraction from a vector. +  Value *VecInput = 0; +  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && +      isa<VectorType>(VecInput->getType())) { +    const VectorType *VecTy = cast<VectorType>(VecInput->getType()); +    const Type *DestTy = CI.getType(); +     +    // If the element type of the vector doesn't match the result type, but the +    // vector type's size is a multiple of the result type, bitcast it to be a +    // vector type we can extract from. +    if (VecTy->getElementType() != DestTy && +        VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) { +      VecTy = VectorType::get(DestTy, +            VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits()); +      VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); +    } +     +    if (VecTy->getElementType() == DestTy) +      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); +  } +   +  return 0; +}  Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {    // If the operands are integer typed then apply the integer transforms, @@ -1386,6 +1415,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {                                                 ((Instruction*)NULL));      }    } +   +  // Try to optimize int -> float bitcasts. +  if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) +    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) +      return I;    if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {      if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 88fa9a43e54..c248b5e4bc8 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -13,3 +13,25 @@ define i32 @test1(i64 %a) {  ; CHECK: ret i32 0  } +; Optimize bitcasts that are extracting low element of vector.  This happens +; because of SRoA. +; rdar://7892780 +define float @test2(<2 x float> %A, <2 x i32> %B) { +  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2] +  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1] +  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1] + +  %tmp = bitcast <2 x i32> %B to i64 +  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1] +  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1] + +  %add = fadd float %tmp24, %tmp4 +  ret float %add +   +; CHECK: @test2 +; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 0 +; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT:  ret float %add +}  | 

