summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp18
-rw-r--r--llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll30
2 files changed, 12 insertions, 36 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bdd9a43be27..c1872a1c9ce 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3479,22 +3479,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
- // TODO: Also emit sub if only width is constant.
- if (!CWidth && COffset && Offset == 0) {
- Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
- Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
- ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
-
- Value *Shl = Builder.CreateShl(Src, ShiftVal);
- Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
- : Builder.CreateLShr(Shl, ShiftVal);
- RightShift->takeName(II);
- return replaceInstUsesWith(*II, RightShift);
- }
-
if (!CWidth || !COffset)
break;
+ // The case of Width == 0 is handled above, which makes this tranformation
+ // safe. If Width == 0, then the ashr and lshr instructions become poison
+ // value since the shift amount would be equal to the bit size.
+ assert(Width != 0);
+
// TODO: This allows folding to undef when the hardware has specific
// behavior?
if (Offset + Width < IntSize) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 22f96be694d..0a179d1f96d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -934,32 +934,23 @@ define i32 @ubfe_offset_33(i32 %src, i32 %width) {
}
; CHECK-LABEL: @ubfe_offset_0(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = lshr i32 -1, %1
-; CHECK-NEXT: %bfe = and i32 %2, %src
-; CHECK-NEXT: ret i32 %bfe
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
define i32 @ubfe_offset_0(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_32(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = lshr i32 -1, %1
-; CHECK-NEXT: %bfe = and i32 %2, %src
-; CHECK-NEXT: ret i32 %bfe
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
define i32 @ubfe_offset_32(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_31(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = lshr i32 -1, %1
-; CHECK-NEXT: %bfe = and i32 %2, %src
-; CHECK-NEXT: ret i32 %bfe
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
define i32 @ubfe_offset_31(i32 %src, i32 %width) {
- %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
+ %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
ret i32 %bfe
}
@@ -1040,11 +1031,7 @@ define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
}
; CHECK-LABEL: @ubfe_offset_0_i64(
-; CHECK-NEXT: %1 = sub i32 64, %width
-; CHECK-NEXT: %2 = zext i32 %1 to i64
-; CHECK-NEXT: %3 = lshr i64 -1, %2
-; CHECK-NEXT: %bfe = and i64 %3, %src
-; CHECK-NEXT: ret i64 %bfe
+; CHECK-NEXT: %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
%bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
ret i64 %bfe
@@ -1066,12 +1053,9 @@ declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
; CHECK-LABEL: @sbfe_offset_31(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = shl i32 %src, %1
-; CHECK-NEXT: %bfe = ashr i32 %2, %1
-; CHECK-NEXT: ret i32 %bfe
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
define i32 @sbfe_offset_31(i32 %src, i32 %width) {
- %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 32, i32 %width)
+ %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
ret i32 %bfe
}
OpenPOWER on IntegriCloud