diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-24 11:46:13 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-24 11:46:13 +0000 |
| commit | e1ec9072f638b4d3aede8e840cb61e26553fd42b (patch) | |
| tree | 5080592ddb4e12d30543c1de9eed4a746ce04263 /llvm/test | |
| parent | 4b1dfbbc78845805e5205f5655d6f444ff05bd1b (diff) | |
| download | bcm5719-llvm-e1ec9072f638b4d3aede8e840cb61e26553fd42b.tar.gz bcm5719-llvm-e1ec9072f638b4d3aede8e840cb61e26553fd42b.zip | |
[X86][SSE] Add support for constant folding vector arithmetic shift by immediates
llvm-svn: 292919
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/pmul.ll | 47 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 3 |
3 files changed, 38 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 7d9ef28a090..ac83653efbd 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -8,19 +8,18 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind { ; SSE2-LABEL: mul_v16i8c: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: pmullw %xmm1, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117] +; SSE2-NEXT: pmullw %xmm2, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pand %xmm3, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm0 -; SSE2-NEXT: pmullw %xmm1, %xmm0 +; SSE2-NEXT: pmullw %xmm2, %xmm0 ; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: packuswb %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: mul_v16i8c: @@ -382,29 +381,28 @@ entry: define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind { ; SSE2-LABEL: mul_v32i8c: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: pmullw %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [117,117,117,117,117,117,117,117] +; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] -; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm0 -; SSE2-NEXT: pmullw %xmm2, %xmm0 +; SSE2-NEXT: pmullw %xmm3, %xmm0 ; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: packuswb %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: pmullw %xmm2, %xmm3 -; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: packuswb %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE2-NEXT: psraw $8, %xmm2 +; SSE2-NEXT: pmullw %xmm3, %xmm2 +; SSE2-NEXT: pand %xmm4, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: pmullw %xmm2, %xmm1 +; SSE2-NEXT: pmullw %xmm3, %xmm1 ; SSE2-NEXT: pand %xmm4, %xmm1 -; SSE2-NEXT: packuswb %xmm3, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSE41-LABEL: mul_v32i8c: @@ -771,11 +769,10 @@ entry: define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { ; SSE2-LABEL: mul_v64i8c: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] -; SSE2-NEXT: psraw $8, %xmm4 ; SSE2-NEXT: movdqa %xmm0, %xmm6 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm6 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117] ; SSE2-NEXT: pmullw %xmm4, %xmm6 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm5, %xmm6 diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll index dfbfe06678f..933359fa084 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -175,19 +175,18 @@ define <8 x i16> @test_div7_8i16(<8 x i16> %a) nounwind { define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: test_div7_16i8: ; SSE2: # BB#0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: pmullw %xmm2, %xmm3 -; SSE2-NEXT: psrlw $8, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427] +; SSE2-NEXT: pmullw %xmm3, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: pmullw %xmm2, %xmm1 +; SSE2-NEXT: pmullw %xmm3, %xmm1 ; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: packuswb %xmm3, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psrlw $2, %xmm0 @@ -483,19 +482,18 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind { define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: test_rem7_16i8: ; SSE2: # BB#0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: pmullw %xmm2, %xmm3 -; SSE2-NEXT: psrlw $8, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427] +; SSE2-NEXT: pmullw %xmm3, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: pmullw %xmm2, %xmm1 +; SSE2-NEXT: pmullw %xmm3, %xmm1 ; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: packuswb %xmm3, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm2 @@ -509,8 +507,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; SSE2-NEXT: psraw $8, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm4, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index c4ecaa4a1bc..7857e585dca 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -486,8 +486,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; SSE2-NEXT: psraw $8, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm4, %xmm2 |

