From c4cc7febb0fb4f1b2484bd6b406bc2177b85649e Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 4 Aug 2014 13:13:57 +0000 Subject: [PowerPC] Fix and improve vector comparisons This patch refactors code generation of vector comparisons. This fixes a wrong code-gen bug for ISD::SETGE for floating-point types, and improves generated code for vector comparisons in general. Specifically, the patch moves all logic deciding how to implement vector comparisons into getVCmpInst, which gets two extra boolean outputs indicating to its caller whether its needs to swap the input operands and/or negate the result of the comparison. Apart from implementing these two modifications as directed by getVCmpInst, there is no need to ever implement vector comparisons in any other manner; in particular, there is never a need to perform two separate comparisons (e.g. one for equal and one for greater-than, as code used to do before this patch). Reviewed by Bill Schmidt. llvm-svn: 214714 --- llvm/test/CodeGen/PowerPC/vec_cmp.ll | 108 ++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 39 deletions(-) (limited to 'llvm/test/CodeGen/PowerPC') diff --git a/llvm/test/CodeGen/PowerPC/vec_cmp.ll b/llvm/test/CodeGen/PowerPC/vec_cmp.ll index 2733089fcb1..516b2dd58b9 100644 --- a/llvm/test/CodeGen/PowerPC/vec_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/vec_cmp.ll @@ -63,9 +63,8 @@ entry: ret <16 x i8> %sext } ; CHECK-LABEL: v16si8_cmp_le: -; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgtsb [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { entry: @@ -74,9 +73,8 @@ entry: ret <16 x i8> %sext } ; CHECK-LABEL: v16ui8_cmp_le: -; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgtub [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { entry: @@ -121,9 +119,8 @@ entry: ret <16 x i8> %sext } ; CHECK-LABEL: v16si8_cmp_ge: -; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3 -; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] +; CHECK: vcmpgtsb [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { entry: @@ -132,9 +129,8 @@ entry: ret <16 x i8> %sext } ; CHECK-LABEL: v16ui8_cmp_ge: -; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3 -; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] +; CHECK: vcmpgtub [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone { @@ -193,9 +189,8 @@ entry: ret <8 x i16> %sext } ; CHECK-LABEL: v8si16_cmp_le: -; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgtsh [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { entry: @@ -204,9 +199,8 @@ entry: ret <8 x i16> %sext } ; CHECK-LABEL: v8ui16_cmp_le: -; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgtuh [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { entry: @@ -251,9 +245,8 @@ entry: ret <8 x i16> %sext } ; CHECK-LABEL: v8si16_cmp_ge: -; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3 -; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] +; CHECK: vcmpgtsh [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { entry: @@ -262,9 +255,8 @@ entry: ret <8 x i16> %sext } ; CHECK-LABEL: v8ui16_cmp_ge: -; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3 -; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] +; CHECK: vcmpgtuh [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone { @@ -326,9 +318,8 @@ entry: ret <4 x i32> %sext } ; CHECK-LABEL: v4si32_cmp_le: -; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgtsw [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { entry: @@ -337,9 +328,8 @@ entry: ret <4 x i32> %sext } ; CHECK-LABEL: v4ui32_cmp_le: -; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgtuw [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { entry: @@ -384,9 +374,8 @@ entry: ret <4 x i32> %sext } ; CHECK-LABEL: v4si32_cmp_ge: -; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3 -; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] +; CHECK: vcmpgtsw [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { entry: @@ -395,9 +384,8 @@ entry: ret <4 x i32> %sext } ; CHECK-LABEL: v4ui32_cmp_ge: -; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3 -; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] +; CHECK: vcmpgtuw [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone { @@ -480,9 +468,7 @@ entry: ret <4 x float> %0 } ; CHECK-LABEL: v4f32_cmp_le: -; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3 -; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2 -; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] +; CHECK: vcmpgefp 2, 3, 2 define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone { entry: @@ -514,6 +500,50 @@ entry: ; CHECK-LABEL: v4f32_cmp_gt: ; CHECK: vcmpgtfp 2, 2, 3 +define <4 x float> @v4f32_cmp_ule(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ule <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK-LABEL: v4f32_cmp_ule: +; CHECK: vcmpgtfp [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_ult(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ult <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK-LABEL: v4f32_cmp_ult: +; CHECK: vcmpgefp [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_uge(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp uge <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK-LABEL: v4f32_cmp_uge: +; CHECK: vcmpgtfp [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_ugt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ugt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK-LABEL: v4f32_cmp_ugt: +; CHECK: vcmpgefp [[RET:[0-9]+]], 3, 2 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone { entry: -- cgit v1.2.3