diff options
| author | Chris Lattner <sabre@nondot.org> | 2010-08-27 18:31:05 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2010-08-27 18:31:05 +0000 |
| commit | 90cd746e63594f7e4653f2c44d713c849227d7d9 (patch) | |
| tree | 9edb02f0f36f4ee8148590474b2058a53c77b655 /llvm/lib/Transforms/InstCombine | |
| parent | 5082c5fdf6a628bb216f789eb24df42a76a2307b (diff) | |
| download | bcm5719-llvm-90cd746e63594f7e4653f2c44d713c849227d7d9.tar.gz bcm5719-llvm-90cd746e63594f7e4653f2c44d713c849227d7d9.zip | |
Add an instcombine to clean up a common pattern produced
by the SRoA "promote to large integer" code, eliminating
some type conversions like this:
%94 = zext i16 %93 to i32 ; <i32> [#uses=2]
%96 = lshr i32 %94, 8 ; <i32> [#uses=1]
%101 = trunc i32 %96 to i8 ; <i8> [#uses=1]
This also unblocks other xforms from happening, now clang is able to compile:
struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }
into:
_foo: ## @foo
## BB#0: ## %entry
pshufd $1, %xmm0, %xmm2
addss %xmm0, %xmm2
movdqa %xmm1, %xmm3
addss %xmm2, %xmm3
pshufd $1, %xmm1, %xmm0
addss %xmm3, %xmm0
ret
on x86-64, instead of:
_foo: ## @foo
## BB#0: ## %entry
movd %xmm0, %rax
shrq $32, %rax
movd %eax, %xmm2
addss %xmm0, %xmm2
movapd %xmm1, %xmm3
addss %xmm2, %xmm3
movd %xmm1, %rax
shrq $32, %rax
movd %eax, %xmm0
addss %xmm3, %xmm0
ret
This seems pretty close to optimal to me, at least without
using horizontal adds. This also triggers in lots of other
code, including SPEC.
llvm-svn: 112278
Diffstat (limited to 'llvm/lib/Transforms/InstCombine')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 82c359194f7..1372a1891fa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -454,6 +454,29 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } + + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. + Value *A = 0; ConstantInt *Cst = 0; + if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) && + Src->hasOneUse()) { + // We have three types to worry about here, the type of A, the source of + // the truncate (MidSize), and the destination of the truncate. We know that + // ASize < MidSize and MidSize > ResultSize, but don't know the relation + // between ASize and ResultSize. + unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + + // If the shift amount is larger than the size of A, then the result is + // known to be zero because all the input bits got shifted out. + if (Cst->getZExtValue() >= ASize) + return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + + // Since we're doing an lshr and a zero extend, and know that the shift + // amount is smaller than ASize, it is always safe to do the shift in A's + // type, then zero extend or truncate to the result. + Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + } return 0; } |

