diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-11-12 22:52:25 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-11-12 22:52:25 +0000 |
| commit | 35b1c2d19db5ba9e8340b4049f0f5283ed022279 (patch) | |
| tree | b2c671c6b328ff3292351900283a5bab50fafabe | |
| parent | 22dc538618a76c61117512afb83a08d7fd7f4995 (diff) | |
| download | bcm5719-llvm-35b1c2d19db5ba9e8340b4049f0f5283ed022279.tar.gz bcm5719-llvm-35b1c2d19db5ba9e8340b4049f0f5283ed022279.zip | |
[InstCombine] narrow width of rotate patterns, part 3
This is a longer variant for the pattern handled in
rL346713
This one includes zexts.
Eventually, we should canonicalize all rotate patterns
to the funnel shift intrinsics, but we need a bit more
infrastructure to make sure the vectorizers handle those
intrinsics as well as the shift+logic ops.
https://rise4fun.com/Alive/FMn
Name: narrow rotateright
%neg = sub i8 0, %shamt
%rshamt = and i8 %shamt, 7
%rshamtconv = zext i8 %rshamt to i32
%lshamt = and i8 %neg, 7
%lshamtconv = zext i8 %lshamt to i32
%conv = zext i8 %x to i32
%shr = lshr i32 %conv, %rshamtconv
%shl = shl i32 %conv, %lshamtconv
%or = or i32 %shl, %shr
%r = trunc i32 %or to i8
=>
%maskedShAmt2 = and i8 %shamt, 7
%negShAmt2 = sub i8 0, %shamt
%maskedNegShAmt2 = and i8 %negShAmt2, 7
%shl2 = lshr i8 %x, %maskedShAmt2
%shr2 = shl i8 %x, %maskedNegShAmt2
%r = or i8 %shl2, %shr2
llvm-svn: 346716
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/rotate.ll | 71 |
2 files changed, 32 insertions, 44 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 8729ecc911b..a934e0aa68e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -530,6 +530,11 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) { match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))) return X; + // Same as above, but the shift amount may be extended after masking: + if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && + match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))) + return X; + return nullptr; }; diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index 2d0fcde50ac..3ee7d6586dd 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -3,10 +3,11 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -; These are UB-free rotate left/right patterns that are narrowed to a smaller bitwidth. -; See PR34046 and PR16726 for motivating examples: +; These may be UB-free rotate left/right patterns that are narrowed to a smaller bitwidth. +; See PR34046, PR16726, and PR39624 for motivating examples: ; https://bugs.llvm.org/show_bug.cgi?id=34046 ; https://bugs.llvm.org/show_bug.cgi?id=16726 +; https://bugs.llvm.org/show_bug.cgi?id=39624 define i16 @rotate_left_16bit(i16 %v, i32 %shift) { ; CHECK-LABEL: @rotate_left_16bit( @@ -122,22 +123,17 @@ define i8 @rotate8_not_safe(i8 %v, i32 %shamt) { ret i8 %ret } -; FIXME: ; We should narrow (v << (s & 15)) | (v >> (-s & 15)) ; when both v and s have been promoted. define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) { ; CHECK-LABEL: @rotateleft_16_neg_mask( -; CHECK-NEXT: [[NEG:%.*]] = sub i16 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[LSHAMT:%.*]] = and i16 [[SHAMT]], 15 -; CHECK-NEXT: [[LSHAMTCONV:%.*]] = zext i16 [[LSHAMT]] to i32 -; CHECK-NEXT: [[RSHAMT:%.*]] = and i16 [[NEG]], 15 -; CHECK-NEXT: [[RSHAMTCONV:%.*]] = zext i16 [[RSHAMT]] to i32 -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[V:%.*]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] -; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i16 0, [[SHAMT:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[SHAMT]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP1]], 15 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i16 [[V:%.*]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i16 [[V]], [[TMP2]] +; CHECK-NEXT: [[RET:%.*]] = or i16 [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret i16 [[RET]] ; %neg = sub i16 0, %shamt @@ -155,16 +151,12 @@ define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) { define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) { ; CHECK-LABEL: @rotateleft_16_neg_mask_commute( -; CHECK-NEXT: [[NEG:%.*]] = sub i16 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[LSHAMT:%.*]] = and i16 [[SHAMT]], 15 -; CHECK-NEXT: [[LSHAMTCONV:%.*]] = zext i16 [[LSHAMT]] to i32 -; CHECK-NEXT: [[RSHAMT:%.*]] = and i16 [[NEG]], 15 -; CHECK-NEXT: [[RSHAMTCONV:%.*]] = zext i16 [[RSHAMT]] to i32 -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[V:%.*]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] -; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i16 0, [[SHAMT:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[SHAMT]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP1]], 15 +; CHECK-NEXT: [[TMP4:%.*]] = shl i16 [[V:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = lshr i16 [[V]], [[TMP3]] +; CHECK-NEXT: [[RET:%.*]] = or i16 [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret i16 [[RET]] ; %neg = sub i16 0, %shamt @@ -182,16 +174,12 @@ define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) { define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) { ; CHECK-LABEL: @rotateright_8_neg_mask( -; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[RSHAMT:%.*]] = and i8 [[SHAMT]], 7 -; CHECK-NEXT: [[RSHAMTCONV:%.*]] = zext i8 [[RSHAMT]] to i32 -; CHECK-NEXT: [[LSHAMT:%.*]] = and i8 [[NEG]], 7 -; CHECK-NEXT: [[LSHAMTCONV:%.*]] = zext i8 [[LSHAMT]] to i32 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[V:%.*]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] -; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[SHAMT:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[SHAMT]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], 7 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[V:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i8 [[V]], [[TMP3]] +; CHECK-NEXT: [[RET:%.*]] = or i8 [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret i8 [[RET]] ; %neg = sub i8 0, %shamt @@ -209,16 +197,12 @@ define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) { define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) { ; CHECK-LABEL: @rotateright_8_neg_mask_commute( -; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[RSHAMT:%.*]] = and i8 [[SHAMT]], 7 -; CHECK-NEXT: [[RSHAMTCONV:%.*]] = zext i8 [[RSHAMT]] to i32 -; CHECK-NEXT: [[LSHAMT:%.*]] = and i8 [[NEG]], 7 -; CHECK-NEXT: [[LSHAMTCONV:%.*]] = zext i8 [[LSHAMT]] to i32 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[V:%.*]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] -; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[SHAMT:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[SHAMT]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], 7 +; CHECK-NEXT: [[TMP4:%.*]] = shl i8 [[V:%.*]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[V]], [[TMP2]] +; CHECK-NEXT: [[RET:%.*]] = or i8 [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret i8 [[RET]] ; %neg = sub i8 0, %shamt @@ -234,7 +218,6 @@ define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) { ret i8 %ret } -; FIXME: ; The shift amount may already be in the wide type, ; so we need to truncate it going into the rotate pattern. |

