diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-10-21 20:13:29 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-10-21 20:13:29 +0000 |
commit | e439cc2745323d2ac0e61de1e04efa0098f4b1a8 (patch) | |
tree | 6365076050c6dc131a967da4aec0508cd91b0f4b /llvm/test/CodeGen | |
parent | e967a12733565fff0beb16865bd21e381b75b250 (diff) | |
download | bcm5719-llvm-e439cc2745323d2ac0e61de1e04efa0098f4b1a8.tar.gz bcm5719-llvm-e439cc2745323d2ac0e61de1e04efa0098f4b1a8.zip |
[DAGCombiner] reduce insert+bitcast+extract vector ops to truncate (PR39016)
This is a late backend subset of the IR transform added with:
D52439
We can confirm that the conversion to a 'trunc' is correct by running:
$ opt -instcombine -data-layout="e"
(assuming the IR transforms are correct; change "e" to "E" for big-endian)
As discussed in PR39016:
https://bugs.llvm.org/show_bug.cgi?id=39016
...the pattern may emerge during legalization, so that's we are waiting for an
insertelement to become a scalar_to_vector in the pattern matching here.
The DAG allows for fun variations that are not possible in IR. Result types for
extracts and scalar_to_vector don't necessarily match input types, so that means
we have to be a bit more careful in the transform (see code comments).
The tests show that we don't handle cases that require a shift (as we did in the
IR version). I've left that as a potential follow-up because I'm not sure if
that's a real concern at this late stage.
Differential Revision: https://reviews.llvm.org/D53201
llvm-svn: 344872
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AArch64/extract-insert.ll | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/extract-insert.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/mmx-coalescing.ll | 9 |
3 files changed, 32 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/AArch64/extract-insert.ll b/llvm/test/CodeGen/AArch64/extract-insert.ll index 91f6518edd8..077e5f3d042 100644 --- a/llvm/test/CodeGen/AArch64/extract-insert.ll +++ b/llvm/test/CodeGen/AArch64/extract-insert.ll @@ -12,8 +12,7 @@ define i32 @trunc_i64_to_i32_le(i64 %x) { ; ; LE-LABEL: trunc_i64_to_i32_le: ; LE: // %bb.0: -; LE-NEXT: fmov d0, x0 -; LE-NEXT: fmov w0, s0 +; LE-NEXT: // kill: def $w0 killed $w0 killed $x0 ; LE-NEXT: ret %ins = insertelement <2 x i64> undef, i64 %x, i32 0 %bc = bitcast <2 x i64> %ins to <4 x i32> @@ -24,9 +23,7 @@ define i32 @trunc_i64_to_i32_le(i64 %x) { define i32 @trunc_i64_to_i32_be(i64 %x) { ; BE-LABEL: trunc_i64_to_i32_be: ; BE: // %bb.0: -; BE-NEXT: fmov d0, x0 -; BE-NEXT: rev64 v0.4s, v0.4s -; BE-NEXT: mov w0, v0.s[1] +; BE-NEXT: // kill: def $w0 killed $w0 killed $x0 ; BE-NEXT: ret ; ; LE-LABEL: trunc_i64_to_i32_be: @@ -50,8 +47,7 @@ define i16 @trunc_i64_to_i16_le(i64 %x) { ; ; LE-LABEL: trunc_i64_to_i16_le: ; LE: // %bb.0: -; LE-NEXT: fmov d0, x0 -; LE-NEXT: umov w0, v0.h[0] +; LE-NEXT: // kill: def $w0 killed $w0 killed $x0 ; LE-NEXT: ret %ins = insertelement <2 x i64> undef, i64 %x, i32 0 %bc = bitcast <2 x i64> %ins to <8 x i16> @@ -62,9 +58,7 @@ define i16 @trunc_i64_to_i16_le(i64 %x) { define i16 @trunc_i64_to_i16_be(i64 %x) { ; BE-LABEL: trunc_i64_to_i16_be: ; BE: // %bb.0: -; BE-NEXT: fmov d0, x0 -; BE-NEXT: rev64 v0.8h, v0.8h -; BE-NEXT: umov w0, v0.h[3] +; BE-NEXT: // kill: def $w0 killed $w0 killed $x0 ; BE-NEXT: ret ; ; LE-LABEL: trunc_i64_to_i16_be: @@ -88,8 +82,6 @@ define i8 @trunc_i32_to_i8_le(i32 %x) { ; ; LE-LABEL: trunc_i32_to_i8_le: ; LE: // %bb.0: -; LE-NEXT: fmov s0, w0 -; LE-NEXT: umov w0, v0.b[0] ; LE-NEXT: ret %ins = insertelement <4 x i32> undef, i32 %x, i32 0 %bc = bitcast <4 x i32> %ins to <16 x i8> @@ -100,9 +92,6 @@ define i8 @trunc_i32_to_i8_le(i32 %x) { define i8 @trunc_i32_to_i8_be(i32 %x) { ; BE-LABEL: trunc_i32_to_i8_be: ; BE: // %bb.0: -; BE-NEXT: fmov s0, w0 -; BE-NEXT: rev32 v0.16b, v0.16b -; BE-NEXT: umov w0, v0.b[3] ; BE-NEXT: ret ; ; LE-LABEL: trunc_i32_to_i8_be: @@ -116,3 +105,22 @@ define i8 @trunc_i32_to_i8_be(i32 %x) { ret i8 %ext } +; Weird type (non-power-of-2 vector) is ok. + +define i8 @trunc_i64_to_i8_be(i64 %x) { +; BE-LABEL: trunc_i64_to_i8_be: +; BE: // %bb.0: +; BE-NEXT: // kill: def $w0 killed $w0 killed $x0 +; BE-NEXT: ret +; +; LE-LABEL: trunc_i64_to_i8_be: +; LE: // %bb.0: +; LE-NEXT: fmov d0, x0 +; LE-NEXT: umov w0, v0.b[7] +; LE-NEXT: ret + %ins = insertelement <3 x i64> undef, i64 %x, i32 0 + %bc = bitcast <3 x i64> %ins to <24 x i8> + %ext = extractelement <24 x i8> %bc, i32 7 + ret i8 %ext +} + diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll index 2393e32ebf6..be5f9ed24fb 100644 --- a/llvm/test/CodeGen/X86/extract-insert.ll +++ b/llvm/test/CodeGen/X86/extract-insert.ll @@ -68,8 +68,8 @@ define i32 @trunc_i64_to_i32_le(i64 %x) { ; ; X64-LABEL: trunc_i64_to_i32_le: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %xmm0 -; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %ins = insertelement <2 x i64> undef, i64 %x, i32 0 %bc = bitcast <2 x i64> %ins to <4 x i32> @@ -86,9 +86,8 @@ define i16 @trunc_i64_to_i16_le(i64 %x) { ; ; X64-LABEL: trunc_i64_to_i16_le: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %xmm0 -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $ax killed $ax killed $rax ; X64-NEXT: retq %ins = insertelement <2 x i64> undef, i64 %x, i32 0 %bc = bitcast <2 x i64> %ins to <8 x i16> diff --git a/llvm/test/CodeGen/X86/mmx-coalescing.ll b/llvm/test/CodeGen/X86/mmx-coalescing.ll index 8f9204a4a85..8cd57aa8c53 100644 --- a/llvm/test/CodeGen/X86/mmx-coalescing.ll +++ b/llvm/test/CodeGen/X86/mmx-coalescing.ll @@ -16,16 +16,17 @@ define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) { ; CHECK-NEXT: # %bb.2: # %if.B ; CHECK-NEXT: pshufw $238, %mm0, %mm0 # mm0 = mm0[2,3,2,3] ; CHECK-NEXT: movq %mm0, %rax -; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne .LBB0_4 ; CHECK-NEXT: .LBB0_1: # %if.A +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movd %edx, %mm1 ; CHECK-NEXT: psllq %mm1, %mm0 ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: jne .LBB0_4 -; CHECK-NEXT: .LBB0_3: # %if.C -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: # %bb.3: # %if.C +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: .LBB0_4: # %merge |