From cf2da96c82e6488d382ba320b4749e1c5d6c62f8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 14 Mar 2017 21:26:58 +0000 Subject: [SelectionDAG] Add a signed integer absolute ISD node Reduced version of D26357 - based on the discussion on llvm-dev about canonicalization of UMIN/UMAX/SMIN/SMAX as well as ABS I've reduced that patch to just the ABS ISD node (with x86/sse support) to improve basic combines and lowering. ARM/AArch64, Hexagon, PowerPC and NVPTX all have similar instructions allowing us to make this a generic opcode and move away from the hard coded tablegen patterns which makes it tricky to match more complex patterns. At the moment this patch doesn't attempt legalization as we only create an ABS node if its legal/custom. Differential Revision: https://reviews.llvm.org/D29639 llvm-svn: 297780 --- llvm/test/CodeGen/X86/combine-abs.ll | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'llvm/test/CodeGen/X86/combine-abs.ll') diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index 109738dcd9b..2d71e4f2f27 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -1,13 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -; FIXME: Various missed opportunities to simplify integer absolute instructions. - ; fold (abs c1) -> c2 define <4 x i32> @combine_v4i32_abs_constant() { ; CHECK-LABEL: combine_v4i32_abs_constant: ; CHECK: # BB#0: -; CHECK-NEXT: vpabsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648] ; CHECK-NEXT: retq %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> ) ret <4 x i32> %1 @@ -16,7 +14,7 @@ define <4 x i32> @combine_v4i32_abs_constant() { define <16 x i16> @combine_v16i16_abs_constant() { ; CHECK-LABEL: combine_v16i16_abs_constant: ; CHECK: # BB#0: -; CHECK-NEXT: vpabsw {{.*}}(%rip), %ymm0 +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0] ; CHECK-NEXT: retq %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> ) ret <16 x i16> %1 @@ -27,7 +25,6 @@ define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) { ; CHECK-LABEL: combine_v8i16_abs_abs: ; CHECK: # BB#0: ; CHECK-NEXT: vpabsw %xmm0, %xmm0 -; CHECK-NEXT: vpabsw %xmm0, %xmm0 ; CHECK-NEXT: retq %a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a) %n2 = sub <8 x i16> zeroinitializer, %a1 @@ -40,7 +37,6 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) { ; CHECK-LABEL: combine_v32i8_abs_abs: ; CHECK: # BB#0: ; CHECK-NEXT: vpabsb %ymm0, %ymm0 -; CHECK-NEXT: vpabsb %ymm0, %ymm0 ; CHECK-NEXT: retq %n1 = sub <32 x i8> zeroinitializer, %a %b1 = icmp slt <32 x i8> %a, zeroinitializer -- cgit v1.2.3