From b078350872693f37726e78caa1c413dd736cff4e Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 1 Apr 2012 19:31:22 +0000 Subject: This commit contains a few changes that had to go in together. 1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) (and also scalar_to_vector). 2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src). Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B)) 3. Optimize swizzles of shuffles: shuff(shuff(x, y), undef) -> shuff(x, y). 4. Fix an X86ISelLowering optimization which was very bitcast-sensitive. Code which was previously compiled to this: movd (%rsi), %xmm0 movdqa .LCPI0_0(%rip), %xmm2 pshufb %xmm2, %xmm0 movd (%rdi), %xmm1 pshufb %xmm2, %xmm1 pxor %xmm0, %xmm1 pshufb .LCPI0_1(%rip), %xmm1 movd %xmm1, (%rdi) ret Now compiles to this: movl (%rsi), %eax xorl %eax, (%rdi) ret llvm-svn: 153848 --- llvm/test/CodeGen/ARM/reg_sequence.ll | 2 +- llvm/test/CodeGen/CellSPU/rotate_ops.ll | 2 +- llvm/test/CodeGen/X86/2011-10-27-tstore.ll | 12 ++++++------ llvm/test/CodeGen/X86/SwizzleShuff.ll | 14 ++++++++++++++ llvm/test/CodeGen/X86/vec_shuffle-37.ll | 10 +++++----- llvm/test/CodeGen/X86/widen_shuffle-1.ll | 2 +- 6 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/X86/SwizzleShuff.ll (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll index 78b4e7ea84c..05794e4ebdd 100644 --- a/llvm/test/CodeGen/ARM/reg_sequence.ll +++ b/llvm/test/CodeGen/ARM/reg_sequence.ll @@ -273,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind { entry: ; CHECK: t10: ; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000 -; CHECK: vmul.f32 q8, q8, d0[0] +; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]] ; CHECK: vadd.f32 q8, q8, q8 %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] diff --git a/llvm/test/CodeGen/CellSPU/rotate_ops.ll b/llvm/test/CodeGen/CellSPU/rotate_ops.ll index 97709352760..8b7af20b4a9 100644 --- a/llvm/test/CodeGen/CellSPU/rotate_ops.ll +++ b/llvm/test/CodeGen/CellSPU/rotate_ops.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep rot %t1.s | count 86 +; RUN: grep rot %t1.s | count 85 ; RUN: grep roth %t1.s | count 8 ; RUN: grep roti.*5 %t1.s | count 1 ; RUN: grep roti.*27 %t1.s | count 1 diff --git a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll index 016e02c3d5d..1712f345653 100644 --- a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll +++ b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll @@ -3,14 +3,14 @@ target triple = "x86_64-unknown-linux-gnu" ;CHECK: ltstore -;CHECK: pshufd -;CHECK: pshufd -;CHECK: ret -define void @ltstore() { +;CHECK: movq +;CHECK-NEXT: movq +;CHECK-NEXT: ret +define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) { entry: - %in = load <4 x i32>* undef + %in = load <4 x i32>* %pIn %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> - store <2 x i32> %j, <2 x i32>* undef + store <2 x i32> %j, <2 x i32>* %pOut ret void } diff --git a/llvm/test/CodeGen/X86/SwizzleShuff.ll b/llvm/test/CodeGen/X86/SwizzleShuff.ll new file mode 100644 index 00000000000..11b702e3d1b --- /dev/null +++ b/llvm/test/CodeGen/X86/SwizzleShuff.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; Check that we perform a scalar XOR on i32. + +; CHECK: pull_bitcast +; CHECK: xorl +; CHECK: ret +define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) { + %A = load <4 x i8>* %pA + %B = load <4 x i8>* %pB + %C = xor <4 x i8> %A, %B + store <4 x i8> %C, <4 x i8>* %pA + ret void +} diff --git a/llvm/test/CodeGen/X86/vec_shuffle-37.ll b/llvm/test/CodeGen/X86/vec_shuffle-37.ll index 06083989382..65486cb80c9 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-37.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-37.ll @@ -27,11 +27,11 @@ entry: define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline { entry: ; CHECK: t02 -; CHECK: movaps -; CHECK: shufps -; CHECK: pshufd -; CHECK: movq -; CHECK: ret +; CHECK: mov +; CHECK-NEXT: mov +; CHECK-NEXT: mov +; CHECK-NEXT: mov +; CHECK-NEXT: ret %0 = bitcast <8 x i32>* %source to <4 x i32>* %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3 %tmp2 = load <4 x i32>* %arrayidx, align 16 diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll index 7bebb274f6e..94200537168 100644 --- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll +++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll @@ -33,7 +33,7 @@ entry: define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: shuf3: -; CHECK: shufps +; CHECK: shufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> -- cgit v1.2.3