diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-07-28 13:13:46 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-07-28 13:13:46 +0000 |
commit | d5bc4b09f11b794deecdfd2aabe5f870369c45ef (patch) | |
tree | 9e71d7be1ebb37701b4f93b54ef954219bc54a6d | |
parent | 45d910de994098142d6e8ffae94963df0f649dc2 (diff) | |
download | bcm5719-llvm-d5bc4b09f11b794deecdfd2aabe5f870369c45ef.tar.gz bcm5719-llvm-d5bc4b09f11b794deecdfd2aabe5f870369c45ef.zip |
[NFC][InstCombine] Shift amount reassociation: can have trunc between shl's
https://rise4fun.com/Alive/OQbM
Not so simple for lshr/ashr, so those maybe later.
https://bugs.llvm.org/show_bug.cgi?id=42391
llvm-svn: 367189
-rw-r--r-- | llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll new file mode 100644 index 00000000000..0eab031b959 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -instcombine -S | FileCheck %s + +; Given pattern: +; (trunc (x << Q) to iDst) << K +; we should rewrite it as +; (trunc (x << (Q+K)) to iDst) iff (Q+K) u< iDst +; This is only valid for shl. +; THIS FOLD DOES *NOT* REQUIRE ANY 'nuw'/`nsw` FLAGS! + +; Basic scalar test + +define i16 @t0(i32 %x, i16 %y) { +; CHECK-LABEL: @t0( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -2 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = shl i16 %t3, %t4 + ret i16 %t5 +} + +define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { +; CHECK-LABEL: @t1_vec_splat( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -2, i16 -2> +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t0 = sub <2 x i16> <i16 32, i16 32>, %y + %t1 = zext <2 x i16> %t0 to <2 x i32> + %t2 = shl <2 x i32> %x, %t1 + %t3 = trunc <2 x i32> %t2 to <2 x i16> + %t4 = add <2 x i16> %y, <i16 -2, i16 -2> + %t5 = shl <2 x i16> %t3, %t4 + ret <2 x i16> %t5 +} + +define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) { +; CHECK-LABEL: @t2_vec_nonsplat( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 30>, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -2, i16 0> +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t0 = sub <2 x i16> <i16 32, i16 30>, %y + %t1 = zext <2 x i16> %t0 to <2 x i32> + %t2 = shl <2 x i32> %x, %t1 + %t3 = trunc <2 x i32> %t2 to <2 x i16> + %t4 = add <2 x i16> %y, <i16 -2, i16 0> + %t5 = shl <2 x i16> %t3, %t4 + ret <2 x i16> %t5 +} + +; Basic vector tests + +define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t3_vec_nonsplat_undef0( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -2, i16 -2, i16 -2> +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = shl <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, <i16 -2, i16 -2, i16 -2> + %t5 = shl <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t4_vec_nonsplat_undef1( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -2, i16 undef, i16 -2> +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = shl <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, <i16 -2, i16 undef, i16 -2> + %t5 = shl <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t5_vec_nonsplat_undef1( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -2, i16 undef, i16 -2> +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = shl <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, <i16 -2, i16 undef, i16 -2> + %t5 = shl <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +; One-use tests + +declare void @use16(i16) +declare void @use32(i32) + +define i16 @t6_extrause0(i32 %x, i16 %y) { +; CHECK-LABEL: @t6_extrause0( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: call void @use32(i32 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -2 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl i32 %x, %t1 + call void @use32(i32 %t2) + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = shl i16 %t3, %t4 + ret i16 %t5 +} + +define i16 @t7_extrause1(i32 %x, i16 %y) { +; CHECK-LABEL: @t7_extrause1( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: call void @use16(i16 [[T3]]) +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -2 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + call void @use16(i16 %t3) + %t4 = add i16 %y, -2 + %t5 = shl i16 %t3, %t4 + ret i16 %t5 +} + +define i16 @t8_extrause2(i32 %x, i16 %y) { +; CHECK-LABEL: @t8_extrause2( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: call void @use32(i32 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: call void @use16(i16 [[T3]]) +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -2 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl i32 %x, %t1 + call void @use32(i32 %t2) + %t3 = trunc i32 %t2 to i16 + call void @use16(i16 %t3) + %t4 = add i16 %y, -2 + %t5 = shl i16 %t3, %t4 + ret i16 %t5 +} + +; Special test + +; New shift amount is less than bitwidth after truncation, so we could pre-truncated. +define i16 @t9_pretrunc(i32 %x, i16 %y) { +; CHECK-LABEL: @t9_pretrunc( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -18 + %t5 = shl i16 %t3, %t4 + ret i16 %t3 +} + +; No 'nuw'/'nsw' flags are to be propagated! +define i16 @t10_no_flags(i32 %x, i16 %y) { +; CHECK-LABEL: @t10_no_flags( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl nuw nsw i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl nuw nsw i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = shl nuw nsw i16 %t3, %t4 + ret i16 %t3 +} + +; Negative tests + +; As-is this is not correct for other shift opcodes. +define i16 @t11_shl(i32 %x, i16 %y) { +; CHECK-LABEL: @t11_shl( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = lshr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = lshr i16 %t3, %t4 + ret i16 %t3 +} +define i16 @t12_ashr(i32 %x, i16 %y) { +; CHECK-LABEL: @t12_ashr( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = ashr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = ashr i16 %t3, %t4 + ret i16 %t3 +} + +; Can't fold, total shift would be 32 +define i16 @n13(i32 %x, i16 %y) { +; CHECK-LABEL: @n13( +; CHECK-NEXT: [[T0:%.*]] = sub i16 30, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 30, %y + %t1 = zext i16 %t0 to i32 + %t2 = shl i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = shl i16 %t3, %t4 + ret i16 %t3 +} |