diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-10-03 01:06:32 +0000 | 
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-10-03 01:06:32 +0000 | 
| commit | b264e4a3251d4c2870fc29061bc78e94581df2d8 (patch) | |
| tree | 50253d31cf88c957459857ec7448cb93e6afd788 | |
| parent | 9df29f3a6f9ebef9b85c61e19470b57dffb91913 (diff) | |
| download | bcm5719-llvm-b264e4a3251d4c2870fc29061bc78e94581df2d8.tar.gz bcm5719-llvm-b264e4a3251d4c2870fc29061bc78e94581df2d8.zip | |
[x86] Regenerate a number of FileCheck assertions with my script for
test cases that will change with the new vector shuffle lowering. This
gives us a nice baseline for deltas against. I've checked and removed
the cases where there were weird register usage being pinned down, and
all of these are extremely pin-pointed tests so fully checking them
seems very appropriate.
llvm-svn: 218941
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-or.ll | 205 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp-load-trunc.ll | 96 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp-trunc.ll | 90 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/movgs.ll | 86 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/palignr.ll | 140 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr14161.ll | 23 | 
6 files changed, 426 insertions, 214 deletions
| diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index df3b9015add..42a50b65905 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -5,277 +5,296 @@  ; instruction which performs a blend operation.  define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test1: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm0, %xmm1 +; CHECK-NEXT:    movaps %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>    %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>    %or = or <2 x i64> %shuf1, %shuf2    ret <2 x i64> %or  } -; CHECK-LABEL: test1 -; CHECK-NOT: xorps -; CHECK: movsd -; CHECK-NOT: orps -; CHECK: ret  define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test2: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test2 -; CHECK-NOT: xorps -; CHECK: movsd -; CHECK: ret  define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test3: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>    %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>    %or = or <2 x i64> %shuf1, %shuf2    ret <2 x i64> %or  } -; CHECK-LABEL: test3 -; CHECK-NOT: xorps -; CHECK: movsd -; CHECK-NEXT: ret  define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test4: +; CHECK:       # BB#0: +; CHECK-NEXT:    movss %xmm0, %xmm1 +; CHECK-NEXT:    movaps %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test4 -; CHECK-NOT: xorps -; CHECK: movss -; CHECK-NOT: orps -; CHECK: ret  define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test5: +; CHECK:       # BB#0: +; CHECK-NEXT:    movss %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test5 -; CHECK-NOT: xorps -; CHECK: movss -; CHECK-NEXT: ret  define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test6: +; CHECK:       # BB#0: +; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test6 -; CHECK-NOT: xorps -; CHECK: blendps $12 -; CHECK-NEXT: ret  define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test7: +; CHECK:       # BB#0: +; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; CHECK-NEXT:    retq    %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>    %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>    %or = or <4 x i32> %and1, %and2    ret <4 x i32> %or  } -; CHECK-LABEL: test7 -; CHECK-NOT: xorps -; CHECK: blendps $12 -; CHECK-NEXT: ret  define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test8: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm0, %xmm1 +; CHECK-NEXT:    movaps %xmm1, %xmm0 +; CHECK-NEXT:    retq    %and1 = and <2 x i64> %a, <i64 -1, i64 0>    %and2 = and <2 x i64> %b, <i64 0, i64 -1>    %or = or <2 x i64> %and1, %and2    ret <2 x i64> %or  } -; CHECK-LABEL: test8 -; CHECK-NOT: xorps -; CHECK: movsd -; CHECK-NOT: orps -; CHECK: ret  define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test9: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm1, %xmm0 +; CHECK-NEXT:    retq    %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>    %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>    %or = or <4 x i32> %and1, %and2    ret <4 x i32> %or  } -; CHECK-LABEL: test9 -; CHECK-NOT: xorps -; CHECK: movsd -; CHECK: ret  define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test10: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm1, %xmm0 +; CHECK-NEXT:    retq    %and1 = and <2 x i64> %a, <i64 0, i64 -1>    %and2 = and <2 x i64> %b, <i64 -1, i64 0>    %or = or <2 x i64> %and1, %and2    ret <2 x i64> %or  } -; CHECK-LABEL: test10 -; CHECK-NOT: xorps -; CHECK: movsd -; CHECK-NEXT: ret  define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test11: +; CHECK:       # BB#0: +; CHECK-NEXT:    movss %xmm0, %xmm1 +; CHECK-NEXT:    movaps %xmm1, %xmm0 +; CHECK-NEXT:    retq    %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>    %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>    %or = or <4 x i32> %and1, %and2    ret <4 x i32> %or  } -; CHECK-LABEL: test11 -; CHECK-NOT: xorps -; CHECK: movss -; CHECK-NOT: orps -; CHECK: ret  define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test12: +; CHECK:       # BB#0: +; CHECK-NEXT:    movss %xmm1, %xmm0 +; CHECK-NEXT:    retq    %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>    %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>    %or = or <4 x i32> %and1, %and2    ret <4 x i32> %or  } -; CHECK-LABEL: test12 -; CHECK-NOT: xorps -; CHECK: movss -; CHECK-NEXT: ret  ; Verify that the following test cases are folded into single shuffles.  define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test13: +; CHECK:       # BB#0: +; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test13 -; CHECK-NOT: xorps -; CHECK: shufps -; CHECK-NEXT: ret  define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test14: +; CHECK:       # BB#0: +; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:    retq    %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>    %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>    %or = or <2 x i64> %shuf1, %shuf2    ret <2 x i64> %or  } -; CHECK-LABEL: test14 -; CHECK-NOT: pslldq -; CHECK-NOT: por -; CHECK: punpcklqdq -; CHECK-NEXT: ret  define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test15: +; CHECK:       # BB#0: +; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1] +; CHECK-NEXT:    movaps %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test15 -; CHECK-NOT: xorps -; CHECK: shufps -; CHECK-NOT: shufps -; CHECK-NOT: orps -; CHECK: ret  define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test16: +; CHECK:       # BB#0: +; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>    %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>    %or = or <2 x i64> %shuf1, %shuf2    ret <2 x i64> %or  } -; CHECK-LABEL: test16 -; CHECK-NOT: pslldq -; CHECK-NOT: por -; CHECK: punpcklqdq -; CHECK: ret  ; Verify that the dag-combiner does not fold a OR of two shuffles into a single  ; shuffle instruction when the shuffle indexes are not compatible.  define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test17: +; CHECK:       # BB#0: +; CHECK-NEXT:    xorps %xmm2, %xmm2 +; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,0] +; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] +; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0] +; CHECK-NEXT:    por %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test17 -; CHECK: por -; CHECK-NEXT: ret  define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test18: +; CHECK:       # BB#0: +; CHECK-NEXT:    xorps %xmm2, %xmm2 +; CHECK-NEXT:    xorps %xmm3, %xmm3 +; CHECK-NEXT:    movss %xmm0, %xmm3 +; CHECK-NEXT:    shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[0,0] +; CHECK-NEXT:    movss %xmm1, %xmm2 +; CHECK-NEXT:    orps %xmm3, %xmm2 +; CHECK-NEXT:    movaps %xmm2, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test18 -; CHECK: orps -; CHECK: ret  define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test19: +; CHECK:       # BB#0: +; CHECK-NEXT:    xorps %xmm2, %xmm2 +; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,0] +; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] +; CHECK-NEXT:    movdqa %xmm1, %xmm2 +; CHECK-NEXT:    pslldq $8, %xmm2 +; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,2] +; CHECK-NEXT:    por %xmm2, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>    %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>    %or = or <4 x i32> %shuf1, %shuf2    ret <4 x i32> %or  } -; CHECK-LABEL: test19 -; CHECK: por -; CHECK-NEXT: ret  define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test20: +; CHECK:       # BB#0: +; CHECK-NEXT:    orps %xmm1, %xmm0 +; CHECK-NEXT:    movq %xmm0, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>    %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>    %or = or <2 x i64> %shuf1, %shuf2    ret <2 x i64> %or  } -; CHECK-LABEL: test20 -; CHECK-NOT: xorps -; CHECK: orps -; CHECK-NEXT: movq -; CHECK-NEXT: ret  define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test21: +; CHECK:       # BB#0: +; CHECK-NEXT:    por %xmm1, %xmm0 +; CHECK-NEXT:    pslldq $8, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>    %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>    %or = or <2 x i64> %shuf1, %shuf2    ret <2 x i64> %or  } -; CHECK-LABEL: test21 -; CHECK: por -; CHECK-NEXT: pslldq -; CHECK-NEXT: ret  ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle  ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to  ; handle legal vector value types.  define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_crash: +; CHECK:       # BB#0: +; CHECK-NEXT:    movsd %xmm1, %xmm0 +; CHECK-NEXT:    retq    %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>    %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>    %or = or <4 x i8> %shuf1, %shuf2    ret <4 x i8> %or  } -; CHECK-LABEL: test_crash -; CHECK: movsd -; CHECK: ret diff --git a/llvm/test/CodeGen/X86/fp-load-trunc.ll b/llvm/test/CodeGen/X86/fp-load-trunc.ll index a973befdafe..7e78dd046c3 100644 --- a/llvm/test/CodeGen/X86/fp-load-trunc.ll +++ b/llvm/test/CodeGen/X86/fp-load-trunc.ll @@ -2,57 +2,87 @@  ; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX  define <1 x float> @test1(<1 x double>* %p) nounwind { -; CHECK: test1 -; CHECK: cvtsd2ss -; CHECK: ret -; AVX:   test1 -; AVX:   vcvtsd2ss -; AVX:   ret +; CHECK-LABEL: test1: +; CHECK:       # BB#0: +; CHECK-NEXT:    pushl %eax +; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:    movsd (%eax), %xmm0 +; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT:    movss %xmm0, (%esp) +; CHECK-NEXT:    flds (%esp) +; CHECK-NEXT:    popl %eax +; CHECK-NEXT:    retl +; +; AVX-LABEL: test1: +; AVX:       # BB#0: +; AVX-NEXT:    pushl %eax +; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax +; AVX-NEXT:    vmovsd (%eax), %xmm0 +; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    vmovss %xmm0, (%esp) +; AVX-NEXT:    flds (%esp) +; AVX-NEXT:    popl %eax +; AVX-NEXT:    retl    %x = load <1 x double>* %p    %y = fptrunc <1 x double> %x to <1 x float>    ret <1 x float> %y  }  define <2 x float> @test2(<2 x double>* %p) nounwind { -; CHECK: test2 -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: ret -; AVX:   test2 -; AVX:   vcvtpd2psx {{[0-9]*}}(%{{.*}}) -; AVX:   ret +; CHECK-LABEL: test2: +; CHECK:       # BB#0: +; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:    cvtpd2ps (%eax), %xmm0 +; CHECK-NEXT:    retl +; +; AVX-LABEL: test2: +; AVX:       # BB#0: +; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax +; AVX-NEXT:    vcvtpd2psx (%eax), %xmm0 +; AVX-NEXT:    retl    %x = load <2 x double>* %p    %y = fptrunc <2 x double> %x to <2 x float>    ret <2 x float> %y  }  define <4 x float> @test3(<4 x double>* %p) nounwind { -; CHECK: test3 -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: movlhps -; CHECK: ret -; AVX:   test3 -; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}}) -; AVX:   ret +; CHECK-LABEL: test3: +; CHECK:       # BB#0: +; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:    cvtpd2ps 16(%eax), %xmm1 +; CHECK-NEXT:    cvtpd2ps (%eax), %xmm0 +; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:    retl +; +; AVX-LABEL: test3: +; AVX:       # BB#0: +; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax +; AVX-NEXT:    vcvtpd2psy (%eax), %xmm0 +; AVX-NEXT:    retl    %x = load <4 x double>* %p    %y = fptrunc <4 x double> %x to <4 x float>    ret <4 x float> %y  }  define <8 x float> @test4(<8 x double>* %p) nounwind { -; CHECK: test4 -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: movlhps -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}}) -; CHECK: movlhps -; CHECK: ret -; AVX:   test4 -; AVX:   vcvtpd2psy -; AVX:   vcvtpd2psy -; AVX:   vinsertf128 -; AVX:   ret +; CHECK-LABEL: test4: +; CHECK:       # BB#0: +; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:    cvtpd2ps 16(%eax), %xmm1 +; CHECK-NEXT:    cvtpd2ps (%eax), %xmm0 +; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:    cvtpd2ps 48(%eax), %xmm2 +; CHECK-NEXT:    cvtpd2ps 32(%eax), %xmm1 +; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT:    retl +; +; AVX-LABEL: test4: +; AVX:       # BB#0: +; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax +; AVX-NEXT:    vcvtpd2psy (%eax), %xmm0 +; AVX-NEXT:    vcvtpd2psy 32(%eax), %xmm1 +; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT:    retl    %x = load <8 x double>* %p    %y = fptrunc <8 x double> %x to <8 x float>    ret <8 x float> %y diff --git a/llvm/test/CodeGen/X86/fp-trunc.ll b/llvm/test/CodeGen/X86/fp-trunc.ll index 25442fcadd2..4f6ce937aa8 100644 --- a/llvm/test/CodeGen/X86/fp-trunc.ll +++ b/llvm/test/CodeGen/X86/fp-trunc.ll @@ -2,55 +2,77 @@  ; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX  define <1 x float> @test1(<1 x double> %x) nounwind { -; CHECK: test1 -; CHECK: cvtsd2ss -; CHECK: ret -; AVX:   test1 -; AVX:   vcvtsd2ss -; AVX:   ret +; CHECK-LABEL: test1: +; CHECK:       # BB#0: +; CHECK-NEXT:    pushl %eax +; CHECK-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 +; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT:    movss %xmm0, (%esp) +; CHECK-NEXT:    flds (%esp) +; CHECK-NEXT:    popl %eax +; CHECK-NEXT:    retl +; +; AVX-LABEL: test1: +; AVX:       # BB#0: +; AVX-NEXT:    pushl %eax +; AVX-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 +; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    vmovss %xmm0, (%esp) +; AVX-NEXT:    flds (%esp) +; AVX-NEXT:    popl %eax +; AVX-NEXT:    retl    %y = fptrunc <1 x double> %x to <1 x float>    ret <1 x float> %y  }  define <2 x float> @test2(<2 x double> %x) nounwind { -; CHECK: test2 -; CHECK: cvtpd2ps -; CHECK: ret -; AVX:   test2 -; AVX-NOT:  vcvtpd2psy -; AVX:   vcvtpd2ps -; AVX:   ret +; CHECK-LABEL: test2: +; CHECK:       # BB#0: +; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0 +; CHECK-NEXT:    retl +; +; AVX-LABEL: test2: +; AVX:       # BB#0: +; AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0 +; AVX-NEXT:    retl    %y = fptrunc <2 x double> %x to <2 x float>    ret <2 x float> %y  }  define <4 x float> @test3(<4 x double> %x) nounwind { -; CHECK: test3 -; CHECK: cvtpd2ps -; CHECK: cvtpd2ps -; CHECK: movlhps -; CHECK: ret -; AVX:   test3 -; AVX:   vcvtpd2psy -; AVX:   ret +; CHECK-LABEL: test3: +; CHECK:       # BB#0: +; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1 +; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0 +; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:    retl +; +; AVX-LABEL: test3: +; AVX:       # BB#0: +; AVX-NEXT:    vcvtpd2psy %ymm0, %xmm0 +; AVX-NEXT:    vzeroupper +; AVX-NEXT:    retl    %y = fptrunc <4 x double> %x to <4 x float>    ret <4 x float> %y  }  define <8 x float> @test4(<8 x double> %x) nounwind { -; CHECK: test4 -; CHECK: cvtpd2ps -; CHECK: cvtpd2ps -; CHECK: movlhps -; CHECK: cvtpd2ps -; CHECK: cvtpd2ps -; CHECK: movlhps -; CHECK: ret -; AVX:   test4 -; AVX:   vcvtpd2psy -; AVX:   vcvtpd2psy -; AVX:   vinsertf128 -; AVX:   ret +; CHECK-LABEL: test4: +; CHECK:       # BB#0: +; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1 +; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0 +; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:    cvtpd2ps %xmm3, %xmm3 +; CHECK-NEXT:    cvtpd2ps %xmm2, %xmm1 +; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; CHECK-NEXT:    retl +; +; AVX-LABEL: test4: +; AVX:       # BB#0: +; AVX-NEXT:    vcvtpd2psy %ymm0, %xmm0 +; AVX-NEXT:    vcvtpd2psy %ymm1, %xmm1 +; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT:    retl    %y = fptrunc <8 x double> %x to <8 x float>    ret <8 x float> %y  } diff --git a/llvm/test/CodeGen/X86/movgs.ll b/llvm/test/CodeGen/X86/movgs.ll index 71b0723c429..96c5dbb8ea9 100644 --- a/llvm/test/CodeGen/X86/movgs.ll +++ b/llvm/test/CodeGen/X86/movgs.ll @@ -3,40 +3,58 @@  ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64  define i32 @test1() nounwind readonly { +; X32-LABEL: test1: +; X32:       # BB#0: # %entry +; X32-NEXT:    movl %gs:196, %eax +; X32-NEXT:    movl (%eax), %eax +; X32-NEXT:    retl +; +; X64-LABEL: test1: +; X64:       # BB#0: # %entry +; X64-NEXT:    movq %gs:320, %rax +; X64-NEXT:    movl (%rax), %eax +; X64-NEXT:    retq  entry:  	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]  	%tmp1 = load i32* %tmp		; <i32> [#uses=1]  	ret i32 %tmp1  } -; X32-LABEL: test1: -; X32: 	movl	%gs:196, %eax -; X32: 	movl	(%eax), %eax -; X32: 	ret - -; X64-LABEL: test1: -; X64: 	movq	%gs:320, %rax -; X64: 	movl	(%rax), %eax -; X64: 	ret  define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind { +; X32-LABEL: test2: +; X32:       # BB#0: # %entry +; X32-NEXT:    subl $12, %esp +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    calll *%gs:(%eax) +; X32-NEXT:    xorl %eax, %eax +; X32-NEXT:    xorl %edx, %edx +; X32-NEXT:    addl $12, %esp +; X32-NEXT:    retl +; +; X64-LABEL: test2: +; X64:       # BB#0: # %entry +; X64-NEXT:    {{(subq.*%rsp|pushq)}} +; X64-NEXT:    callq *%gs:(%{{(rcx|rdi)}}) +; X64-NEXT:    xorl %eax, %eax +; X64-NEXT:    {{(addq.*%rsp|popq)}} +; X64-NEXT:    retq  entry:    %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8    tail call void %tmp9(i8* undef) nounwind optsize    ret i64 0  } -; rdar://8453210 -; X32-LABEL: test2: -; X32: movl	{{.*}}(%esp), %eax -; X32: calll	*%gs:(%eax) - -; X64-LABEL: test2: -; X64: callq	*%gs:([[A0:%rdi|%rcx]]) - - - -  define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly { +; X32-LABEL: pmovsxwd_1: +; X32:       # BB#0: # %entry +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    pmovsxwd %gs:(%eax), %xmm0 +; X32-NEXT:    retl +; +; X64-LABEL: pmovsxwd_1: +; X64:       # BB#0: # %entry +; X64-NEXT:    pmovsxwd %gs:(%{{(rcx|rdi)}}), %xmm0 +; X64-NEXT:    retq  entry:    %0 = load i64 addrspace(256)* %p    %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0 @@ -44,20 +62,26 @@ entry:    %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone    %3 = bitcast <4 x i32> %2 to <2 x i64>    ret <2 x i64> %3 -   -; X32-LABEL: pmovsxwd_1: -; X32: 	movl	4(%esp), %eax -; X32: 	pmovsxwd	%gs:(%eax), %xmm0 -; X32: 	ret - -; X64-LABEL: pmovsxwd_1: -; X64:	pmovsxwd	%gs:([[A0]]), %xmm0 -; X64:	ret  }  ; The two loads here both look identical to selection DAG, except for their  ; address spaces.  Make sure they aren't CSE'd.  define i32 @test_no_cse() nounwind readonly { +; X32-LABEL: test_no_cse: +; X32:       # BB#0: # %entry +; X32-NEXT:    movl %gs:196, %eax +; X32-NEXT:    movl (%eax), %eax +; X32-NEXT:    movl %fs:196, %ecx +; X32-NEXT:    addl (%ecx), %eax +; X32-NEXT:    retl +; +; X64-LABEL: test_no_cse: +; X64:       # BB#0: # %entry +; X64-NEXT:    movq %gs:320, %rax +; X64-NEXT:    movl (%rax), %eax +; X64-NEXT:    movq %fs:320, %rcx +; X64-NEXT:    addl (%rcx), %eax +; X64-NEXT:    retq  entry:  	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]  	%tmp1 = load i32* %tmp		; <i32> [#uses=1] @@ -66,9 +90,5 @@ entry:  	%tmp4 = add i32 %tmp1, %tmp3  	ret i32 %tmp4  } -; X32-LABEL: test_no_cse: -; X32: 	movl	%gs:196 -; X32: 	movl	%fs:196 -; X32: 	ret  declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/palignr.ll b/llvm/test/CodeGen/X86/palignr.ll index ec6564d7e2e..5c2dd05cb29 100644 --- a/llvm/test/CodeGen/X86/palignr.ll +++ b/llvm/test/CodeGen/X86/palignr.ll @@ -3,58 +3,162 @@  define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {  ; CHECK-LABEL: test1: -; CHECK: pshufd -; CHECK-YONAH: pshufd +; CHECK:       # BB#0: +; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0] +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test1: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0] +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >  	ret <4 x i32> %C  }  define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {  ; CHECK-LABEL: test2: -; CHECK: palignr -; CHECK-YONAH: shufps +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test2: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >  	ret <4 x i32> %C  }  define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {  ; CHECK-LABEL: test3: -; CHECK: palignr +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test3: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[0,0] +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >  	ret <4 x i32> %C  }  define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {  ; CHECK-LABEL: test4: -; CHECK: palignr +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test4: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; CHECK-YONAH-NEXT:    movaps %xmm1, %xmm0 +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >  	ret <4 x i32> %C  }  define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {  ; CHECK-LABEL: test5: -; CHECK: palignr +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test5: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; CHECK-YONAH-NEXT:    movaps %xmm1, %xmm0 +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >  	ret <4 x float> %C  }  define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {  ; CHECK-LABEL: test6: -; CHECK: palignr +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test6: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    movapd %xmm0, %xmm2 +; CHECK-YONAH-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] +; CHECK-YONAH-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,2,4,5,6,7] +; CHECK-YONAH-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,5,6] +; CHECK-YONAH-NEXT:    pextrw $3, %xmm0, %eax +; CHECK-YONAH-NEXT:    pinsrw $0, %eax, %xmm1 +; CHECK-YONAH-NEXT:    pextrw $7, %xmm0, %eax +; CHECK-YONAH-NEXT:    pinsrw $4, %eax, %xmm1 +; CHECK-YONAH-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >  	ret <8 x i16> %C  }  define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {  ; CHECK-LABEL: test7: -; CHECK: palignr +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test7: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; CHECK-YONAH-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,0,4,5,6,7] +; CHECK-YONAH-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; CHECK-YONAH-NEXT:    movd %xmm1, %eax +; CHECK-YONAH-NEXT:    pinsrw $3, %eax, %xmm0 +; CHECK-YONAH-NEXT:    pextrw $4, %xmm1, %eax +; CHECK-YONAH-NEXT:    pinsrw $7, %eax, %xmm0 +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >  	ret <8 x i16> %C  }  define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {  ; CHECK-LABEL: test8: -; CHECK: palignr +; CHECK:       # BB#0: +; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test8: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    pushl %esi +; CHECK-YONAH-NEXT:    movdqa %xmm0, %xmm2 +; CHECK-YONAH-NEXT:    pextrw $4, %xmm2, %eax +; CHECK-YONAH-NEXT:    pextrw $5, %xmm2, %ecx +; CHECK-YONAH-NEXT:    shrdw $8, %cx, %ax +; CHECK-YONAH-NEXT:    pextrw $2, %xmm2, %edx +; CHECK-YONAH-NEXT:    pextrw $3, %xmm2, %esi +; CHECK-YONAH-NEXT:    shrdw $8, %si, %dx +; CHECK-YONAH-NEXT:   # kill: XMM0<def> XMM2<kill> +; CHECK-YONAH-NEXT:    pinsrw $0, %edx, %xmm0 +; CHECK-YONAH-NEXT:    shrl $8, %esi +; CHECK-YONAH-NEXT:    pinsrw $1, %esi, %xmm0 +; CHECK-YONAH-NEXT:    pinsrw $2, %eax, %xmm0 +; CHECK-YONAH-NEXT:    pextrw $6, %xmm2, %eax +; CHECK-YONAH-NEXT:    shrdw $8, %ax, %cx +; CHECK-YONAH-NEXT:    pinsrw $3, %ecx, %xmm0 +; CHECK-YONAH-NEXT:    pextrw $7, %xmm2, %ecx +; CHECK-YONAH-NEXT:    shrdw $8, %cx, %ax +; CHECK-YONAH-NEXT:    pinsrw $4, %eax, %xmm0 +; CHECK-YONAH-NEXT:    pextrw $8, %xmm1, %eax +; CHECK-YONAH-NEXT:    shrdw $8, %ax, %cx +; CHECK-YONAH-NEXT:    pinsrw $5, %ecx, %xmm0 +; CHECK-YONAH-NEXT:    pextrw $9, %xmm1, %ecx +; CHECK-YONAH-NEXT:    shrdw $8, %cx, %ax +; CHECK-YONAH-NEXT:    pinsrw $6, %eax, %xmm0 +; CHECK-YONAH-NEXT:    pextrw $10, %xmm1, %eax +; CHECK-YONAH-NEXT:    shldw $8, %cx, %ax +; CHECK-YONAH-NEXT:    pinsrw $7, %eax, %xmm0 +; CHECK-YONAH-NEXT:    popl %esi +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >  	ret <16 x i8> %C  } @@ -65,8 +169,20 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {  ; was an UNDEF.)  define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {  ; CHECK-LABEL: test9: -; CHECK-NOT: palignr -; CHECK: pshufb +; CHECK:       # BB#0: +; CHECK-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,xmm1[4,5,6,7,8,9,10,11,12,13,14,15,0,1] +; CHECK-NEXT:    movdqa %xmm1, %xmm0 +; CHECK-NEXT:    retl +; +; CHECK-YONAH-LABEL: test9: +; CHECK-YONAH:       # BB#0: +; CHECK-YONAH-NEXT:    pextrw $4, %xmm1, %eax +; CHECK-YONAH-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,3,0,4,5,6,7] +; CHECK-YONAH-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; CHECK-YONAH-NEXT:    pinsrw $3, %eax, %xmm0 +; CHECK-YONAH-NEXT:    movd %xmm1, %eax +; CHECK-YONAH-NEXT:    pinsrw $7, %eax, %xmm0 +; CHECK-YONAH-NEXT:    retl    %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >  	ret <8 x i16> %C  } diff --git a/llvm/test/CodeGen/X86/pr14161.ll b/llvm/test/CodeGen/X86/pr14161.ll index ff4532eac3a..c2bb8d3df8f 100644 --- a/llvm/test/CodeGen/X86/pr14161.ll +++ b/llvm/test/CodeGen/X86/pr14161.ll @@ -3,6 +3,12 @@  declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>)  define <2 x i16> @good(<4 x i32>*, <4 x i8>*) { +; CHECK-LABEL: good: +; CHECK:       # BB#0: # %entry +; CHECK-NEXT:    movdqa (%rdi), %xmm0 +; CHECK-NEXT:    pminud {{.*}}(%rip), %xmm0 +; CHECK-NEXT:    pmovzxwq %xmm0, %xmm0 +; CHECK-NEXT:    retq  entry:    %2 = load <4 x i32>* %0, align 16    %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) @@ -13,13 +19,17 @@ entry:    %8 = bitcast i32 %4 to <2 x i16>    %9 = bitcast i32 %5 to <2 x i16>    ret <2 x i16> %8 -; CHECK: good -; CHECK: pminud -; CHECK-NEXT: pmovzxwq -; CHECK: ret  }  define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) { +; CHECK-LABEL: bad: +; CHECK:       # BB#0: # %entry +; CHECK-NEXT:    movdqa (%rdi), %xmm0 +; CHECK-NEXT:    pminud {{.*}}(%rip), %xmm0 +; CHECK-NEXT:    pextrd $1, %xmm0, %eax +; CHECK-NEXT:    movd %eax, %xmm0 +; CHECK-NEXT:    pmovzxwq %xmm0, %xmm0 +; CHECK-NEXT:    retq  entry:    %2 = load <4 x i32>* %0, align 16    %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>) @@ -30,9 +40,4 @@ entry:    %8 = bitcast i32 %4 to <2 x i16>    %9 = bitcast i32 %5 to <2 x i16>    ret <2 x i16> %9 -; CHECK: bad -; CHECK: pminud -; CHECK: pextrd -; CHECK: pmovzxwq -; CHECK: ret  } | 

