|  | Commit message (Collapse) | Author | Age | Files | Lines | 
|---|
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | This fixes PR5997.
These transforms were disabled because codegen couldn't deal with other
uses of trunc(x). This is now handled by the peephole pass.
This causes no regressions on x86-64.
llvm-svn: 159003 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| | the GEP offset is known to be constant.
With this change, we avoid relying on the IR Builder to constant fold the operations.
No functionality change intended.
llvm-svn: 158829 | 
| | 
| 
| 
| 
| 
| 
| 
| | case when alloca's size is calculated within the "add/sub/... nsw".
Also added fix to 2011-06-13-nsw-alloca.ll test.
llvm-svn: 156231 | 
| | 
| 
| 
| 
| 
| 
| 
| | This allows us to keep passing reduced masks to SimplifyDemandedBits, but
know about all the bits if SimplifyDemandedBits fails. This allows instcombine
to simplify cases like the one in the included testcase.
llvm-svn: 154011 | 
| | 
| 
| 
| 
| 
| | instead of always using ConstantVector.
llvm-svn: 149912 | 
| | 
| 
| 
| | llvm-svn: 148578 | 
| | 
| 
| 
| 
| 
| 
| 
| | "half precision" floating-point with a first-class type.
This patch adds basic IR support (but not codegen support).
llvm-svn: 146786 | 
| | 
| 
| 
| 
| 
| 
| | where it appeared beneficial to pass.
More of rdar://10500969
llvm-svn: 145630 | 
| | 
| 
| 
| | llvm-svn: 145470 | 
| | 
| 
| 
| 
| 
| 
| 
| | (fptrunc (sqrt (fpext x))) -> (sqrtf x) transformation if -fno-builtin is 
specified.
rdar://10466410
llvm-svn: 145460 | 
| | 
| 
| 
| 
| 
| 
| 
| | to be uniqued, without any benefit.
If someone prefers %tmp42 to %42, run instnamer.
llvm-svn: 140634 | 
| | 
| 
| 
| 
| 
| 
| | Optimize chained bitcasts of the form A->B->A.
Undo r138722 and change isEliminableCastPair to allow this case.
llvm-svn: 138756 | 
| | 
| 
| 
| | llvm-svn: 138722 | 
| | 
| 
| 
| | llvm-svn: 135904 | 
| | 
| 
| 
| 
| 
| | ArrayRef.
llvm-svn: 135761 | 
| | 
| 
| 
| 
| 
| | it's used and not included where it isn't.
llvm-svn: 135628 | 
| | 
| 
| 
| | llvm-svn: 135375 | 
| | 
| 
| 
| 
| 
| | another use of sqrt. rdar://9763193
llvm-svn: 135058 | 
| | 
| 
| 
| 
| 
| 
| | This tightens up checking for overflow in alloca sizes, based on feedback
from Duncan and John about the change in r132926.
llvm-svn: 134749 | 
| | 
| 
| 
| | llvm-svn: 133301 | 
| | 
| 
| 
| 
| 
| | Followup to 132926.  rdar://problem/9265821
llvm-svn: 133285 | 
| | 
| 
| 
| 
| 
| 
| 
| | might overflow.  Re-typing the alloca to a larger type (e.g. double)
hoists a shift into the alloca, potentially exposing overflow in the
expression.  rdar://problem/9265821
llvm-svn: 132926 | 
| | 
| 
| 
| 
| 
| | InsertNewInstWith, and use setDebugLoc for the cases which can't be easily handled by the automated mechanisms.
llvm-svn: 132167 | 
| | 
| 
| 
| | llvm-svn: 131596 | 
| | 
| 
| 
| 
| 
| | instcombine.
llvm-svn: 131512 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| | It's possible to craft an input that hits the recursion limits in a way
that SimplifyDemandedBits doesn't simplify the icmp but ComputeMaskedBits
can infer which bits are zero.
No test case as it depends on too many other things. Fixes PR9609.
llvm-svn: 128777 | 
| | 
| 
| 
| | llvm-svn: 128745 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| | - Localize the check if an icmp has one use to a place where we know we're
  introducing something that's likely more expensive than a sext from i1.
- Add an assert to make sure a case that would lead to a miscompilation is
  folded away earlier.
- Fix a typo.
llvm-svn: 128744 | 
| | 
| 
| 
| | llvm-svn: 128733 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | only one unknown bit.
int test1(unsigned x) { return (x&8) ? 0 : -1; }
int test3(unsigned x) { return (x&8) ? -1 : 0; }
before (x86_64):
_test1:
	andl	$8, %edi
	cmpl	$1, %edi
	sbbl	%eax, %eax
	ret
_test3:
	andl	$8, %edi
	cmpl	$1, %edi
	sbbl	%eax, %eax
	notl	%eax
	ret
after:
_test1:
	shrl	$3, %edi
	andl	$1, %edi
	leal	-1(%rdi), %eax
	ret
_test3:
	shll	$28, %edi
	movl	%edi, %eax
	sarl	$31, %eax
	ret
llvm-svn: 128732 | 
| | 
| 
| 
| 
| 
| | functionality change.
llvm-svn: 128731 | 
| | 
| 
| 
| 
| 
| | PHINode::Create() giving the (known or expected) number of operands.
llvm-svn: 128537 | 
| | 
| 
| 
| | llvm-svn: 128535 | 
| | 
| 
| 
| | llvm-svn: 127282 | 
| | 
| 
| 
| | llvm-svn: 125537 | 
| | 
| 
| 
| 
| 
| | builders unhappy.
llvm-svn: 125504 | 
| | 
| 
| 
| 
| 
| | idiom.  Change various clients to simplify their code.
llvm-svn: 125487 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | and-with-constant operations.
This fixes rdar://8808586 which observed that we used to compile:
union xy {
        struct x { _Bool b[15]; } x;
        __attribute__((packed))
        struct y {
                __attribute__((packed)) unsigned long b0to7;
                __attribute__((packed)) unsigned int b8to11;
                __attribute__((packed)) unsigned short b12to13;
                __attribute__((packed)) unsigned char b14;
        } y;
};
struct x
foo(union xy *xy)
{
        return xy->x;
}
into:
_foo:                                   ## @foo
	movq	(%rdi), %rax
	movabsq	$1095216660480, %rcx    ## imm = 0xFF00000000
	andq	%rax, %rcx
	movabsq	$-72057594037927936, %rdx ## imm = 0xFF00000000000000
	andq	%rax, %rdx
	movzbl	%al, %esi
	orq	%rdx, %rsi
	movq	%rax, %rdx
	andq	$65280, %rdx            ## imm = 0xFF00
	orq	%rsi, %rdx
	movq	%rax, %rsi
	andq	$16711680, %rsi         ## imm = 0xFF0000
	orq	%rdx, %rsi
	movl	%eax, %edx
	andl	$-16777216, %edx        ## imm = 0xFFFFFFFFFF000000
	orq	%rsi, %rdx
	orq	%rcx, %rdx
	movabsq	$280375465082880, %rcx  ## imm = 0xFF0000000000
	movq	%rax, %rsi
	andq	%rcx, %rsi
	orq	%rdx, %rsi
	movabsq	$71776119061217280, %r8 ## imm = 0xFF000000000000
	andq	%r8, %rax
	orq	%rsi, %rax
	movzwl	12(%rdi), %edx
	movzbl	14(%rdi), %esi
	shlq	$16, %rsi
	orl	%edx, %esi
	movq	%rsi, %r9
	shlq	$32, %r9
	movl	8(%rdi), %edx
	orq	%r9, %rdx
	andq	%rdx, %rcx
	movzbl	%sil, %esi
	shlq	$32, %rsi
	orq	%rcx, %rsi
	movl	%edx, %ecx
	andl	$-16777216, %ecx        ## imm = 0xFFFFFFFFFF000000
	orq	%rsi, %rcx
	movq	%rdx, %rsi
	andq	$16711680, %rsi         ## imm = 0xFF0000
	orq	%rcx, %rsi
	movq	%rdx, %rcx
	andq	$65280, %rcx            ## imm = 0xFF00
	orq	%rsi, %rcx
	movzbl	%dl, %esi
	orq	%rcx, %rsi
	andq	%r8, %rdx
	orq	%rsi, %rdx
	ret
We now compile this into:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movzwl	12(%rdi), %eax
	movzbl	14(%rdi), %ecx
	shlq	$16, %rcx
	orl	%eax, %ecx
	shlq	$32, %rcx
	movl	8(%rdi), %edx
	orq	%rcx, %rdx
	movq	(%rdi), %rax
	ret
A small improvement :-)
llvm-svn: 123520 | 
| | 
| 
| 
| | llvm-svn: 122110 | 
| | 
| 
| 
| 
| 
| | matching psign & pblend operations to the IR produced by clang/gcc for their C idioms.
llvm-svn: 122105 | 
| | 
| 
| 
| 
| 
| 
| 
| | turning (fptrunc (sqrt (fpext x))) -> (sqrtf x)  is great, but we have
to delete the original sqrt as well.  Not doing so causes us to do 
two sqrt's when building with -fmath-errno (the default on linux).
llvm-svn: 113260 | 
| | 
| 
| 
| | llvm-svn: 112351 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | like this:
struct S { float A, B, C, D; };
struct S g;
struct S bar() { 
  struct S A = g;
  ++A.B;
  A.A = 42;
  return A;
}
we now generate:
_bar:                                   ## @bar
## BB#0:                                ## %entry
	movq	_g@GOTPCREL(%rip), %rax
	movss	12(%rax), %xmm0
	pshufd	$16, %xmm0, %xmm0
	movss	4(%rax), %xmm2
	movss	8(%rax), %xmm1
	pshufd	$16, %xmm1, %xmm1
	unpcklps	%xmm0, %xmm1
	addss	LCPI1_0(%rip), %xmm2
	pshufd	$16, %xmm2, %xmm2
	movss	LCPI1_1(%rip), %xmm0
	pshufd	$16, %xmm0, %xmm0
	unpcklps	%xmm2, %xmm0
	ret
instead of:
_bar:                                   ## @bar
## BB#0:                                ## %entry
	movq	_g@GOTPCREL(%rip), %rax
	movss	12(%rax), %xmm0
	pshufd	$16, %xmm0, %xmm0
	movss	4(%rax), %xmm2
	movss	8(%rax), %xmm1
	pshufd	$16, %xmm1, %xmm1
	unpcklps	%xmm0, %xmm1
	addss	LCPI1_0(%rip), %xmm2
	movd	%xmm2, %eax
	shlq	$32, %rax
	addq	$1109917696, %rax       ## imm = 0x42280000
	movd	%rax, %xmm0
	ret
llvm-svn: 112345 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | element insertion from the pieces that feed into the vector.
This handles a pattern that occurs frequently due to code
generated for the x86-64 abi.  We now compile something like
this:
struct S { float A, B, C, D; };
struct S g;
struct S bar() { 
  struct S A = g;
  ++A.A;
  ++A.C;
  return A;
}
into all nice vector operations:
_bar:                                   ## @bar
## BB#0:                                ## %entry
	movq	_g@GOTPCREL(%rip), %rax
	movss	LCPI1_0(%rip), %xmm1
	movss	(%rax), %xmm0
	addss	%xmm1, %xmm0
	pshufd	$16, %xmm0, %xmm0
	movss	4(%rax), %xmm2
	movss	12(%rax), %xmm3
	pshufd	$16, %xmm2, %xmm2
	unpcklps	%xmm2, %xmm0
	addss	8(%rax), %xmm1
	pshufd	$16, %xmm1, %xmm1
	pshufd	$16, %xmm3, %xmm2
	unpcklps	%xmm2, %xmm1
	ret
instead of icky integer operations:
_bar:                                   ## @bar
	movq	_g@GOTPCREL(%rip), %rax
	movss	LCPI1_0(%rip), %xmm1
	movss	(%rax), %xmm0
	addss	%xmm1, %xmm0
	movd	%xmm0, %ecx
	movl	4(%rax), %edx
	movl	12(%rax), %esi
	shlq	$32, %rdx
	addq	%rcx, %rdx
	movd	%rdx, %xmm0
	addss	8(%rax), %xmm1
	movd	%xmm1, %eax
	shlq	$32, %rsi
	addq	%rax, %rsi
	movd	%rsi, %xmm1
	ret
This resolves rdar://8360454
llvm-svn: 112343 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | framework, which is good at ripping through bitfield
operations.  This generalize a bunch of the existing
xforms that instcombine does, such as 
  (x << c) >> c -> and
to handle intermediate logical nodes.  This is useful for
ripping up the "promote to large integer" code produced by
SRoA.
llvm-svn: 112304 | 
| | 
| 
| 
| 
| 
| 
| | computation can be truncated if it is fed by a sext/zext that doesn't
have to be exactly equal to the truncation result type.
llvm-svn: 112285 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | by the SRoA "promote to large integer" code, eliminating
some type conversions like this:
   %94 = zext i16 %93 to i32                       ; <i32> [#uses=2]
   %96 = lshr i32 %94, 8                           ; <i32> [#uses=1]
   %101 = trunc i32 %96 to i8                      ; <i8> [#uses=1]
This also unblocks other xforms from happening, now clang is able to compile:
struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }
into:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	pshufd	$1, %xmm0, %xmm2
	addss	%xmm0, %xmm2
	movdqa	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	pshufd	$1, %xmm1, %xmm0
	addss	%xmm3, %xmm0
	ret
on x86-64, instead of:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movapd	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	movd	%xmm1, %rax
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm3, %xmm0
	ret
This seems pretty close to optimal to me, at least without
using horizontal adds.  This also triggers in lots of other
code, including SPEC.
llvm-svn: 112278 | 
| | 
| 
| 
| 
| 
| 
| | by SRoA.  This is part of rdar://7892780, but needs another xform to
expose this.
llvm-svn: 112232 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | is a vector to be a vector element extraction.  This allows clang to
compile:
struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }
into:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movapd	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	movd	%xmm1, %rax
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm3, %xmm0
	ret
instead of:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	movd	%eax, %xmm0
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movd	%xmm1, %rax
	movd	%eax, %xmm1
	addss	%xmm2, %xmm1
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm1, %xmm0
	ret
... eliminating half of the horribleness.
llvm-svn: 112227 | 
| | 
| 
| 
| | llvm-svn: 108736 |