diff options
author | Craig Topper <craig.topper@intel.com> | 2017-07-31 18:52:13 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-07-31 18:52:13 +0000 |
commit | 8324003818518c45526280e5acc5772dfb9d5354 (patch) | |
tree | a08eae2d45cf0198502520e893575ce4cba1ffdf | |
parent | 1b4e9ae3840143927b298ec38175e9fb994d18da (diff) | |
download | bcm5719-llvm-8324003818518c45526280e5acc5772dfb9d5354.tar.gz bcm5719-llvm-8324003818518c45526280e5acc5772dfb9d5354.zip |
[X86][InstCombine] Add basic simplification support for BEXTR/BEXTRI intrinsics.
This patch adds simplification support for the BEXTR/BEXTRI intrinsics to match gcc. This only supports cases that fold to 0 or can be fully constant folded. Theoretically we could support converting to AND if the shift part is unused or to only a shift if the mask doesn't modify any bits after an equivalent shl. gcc doesn't do these transformations either.
I put this in InstCombine, but it could be done in InstSimplify. It would be the first target specific intrinsic in InstSimplify.
Differential Revision: https://reviews.llvm.org/D36063
llvm-svn: 309603
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 26 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll | 203 |
2 files changed, 229 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3046f9ba6cd..5a72373921e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2248,6 +2248,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; + case Intrinsic::x86_bmi_bextr_32: + case Intrinsic::x86_bmi_bextr_64: + case Intrinsic::x86_tbm_bextri_u32: + case Intrinsic::x86_tbm_bextri_u64: + // If the RHS is a constant we can try some simplifications. + if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + uint64_t Shift = C->getZExtValue(); + uint64_t Length = (Shift >> 8) & 0xff; + Shift &= 0xff; + unsigned BitWidth = II->getType()->getIntegerBitWidth(); + // If the length is 0 or the shift is out of range, replace with zero. + if (Length == 0 || Shift >= BitWidth) + return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); + // If the LHS is also a constant, we can completely constant fold this. + if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { + uint64_t Result = InC->getZExtValue() >> Shift; + if (Length > BitWidth) + Length = BitWidth; + Result &= maskTrailingOnes<uint64_t>(Length); + return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); + } + // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we + // are only masking bits that a shift already cleared? + } + break; + case Intrinsic::x86_vcvtph2ps_128: case Intrinsic::x86_vcvtph2ps_256: { auto Arg = II->getArgOperand(0); diff --git a/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll new file mode 100644 index 00000000000..f090461fb04 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll @@ -0,0 +1,203 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone +declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone +declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone +declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone + +define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u32( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.tbm.bextri.u32(i32 [[A:%.*]], i32 1296) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1296) + ret i32 %1 +} + +define i32 @test_x86_tbm_bextri_u32_zero_length(i32 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u32_zero_length( +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1) + ret i32 %1 +} + +define i32 @test_x86_tbm_bextri_u32_large_shift(i32 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u32_large_shift( +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 288) + ret i32 %1 +} + +define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u64( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.tbm.bextri.u64(i64 [[A:%.*]], i64 1312) +; CHECK-NEXT: ret i64 [[TMP1]] +; + %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1312) + ret i64 %1 +} + +define i64 @test_x86_tbm_bextri_u64_zero_length(i64 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u64_zero_length( +; CHECK-NEXT: ret i64 0 +; + %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1) + ret i64 %1 +} + +define i64 @test_x86_tbm_bextri_u64_large_shift(i64 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u64_large_shift( +; CHECK-NEXT: ret i64 0 +; + %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 320) + ret i64 %1 +} + +define i32 @test_x86_tbm_bextri_u32_constfold() nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold( +; CHECK-NEXT: ret i32 57005 +; + %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF + ret i32 %1 +} + +define i32 @test_x86_tbm_bextri_u32_constfold2() nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold2( +; CHECK-NEXT: ret i32 233495534 +; + %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF + ret i32 %1 +} + +define i32 @test_x86_tbm_bextri_u32_constfold3() nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold3( +; CHECK-NEXT: ret i32 233495534 +; + %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF + ret i32 %1 +} + +define i64 @test_x86_tbm_bextri_u64_constfold() nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold( +; CHECK-NEXT: ret i64 57005 +; + %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF + ret i64 %1 +} + +define i64 @test_x86_tbm_bextri_u64_constfold2() nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold2( +; CHECK-NEXT: ret i64 233495534 +; + %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF + ret i64 %1 +} + +define i64 @test_x86_tbm_bextri_u64_constfold3() nounwind readnone { +; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold3( +; CHECK-NEXT: ret i64 233495534 +; + %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF + ret i64 %1 +} + +define i32 @test_x86_bmi_bextri_32(i32 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_32( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.bmi.bextr.32(i32 [[A:%.*]], i32 1296) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1296) + ret i32 %1 +} + +define i32 @test_x86_bmi_bextri_32_zero_length(i32 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_32_zero_length( +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1) + ret i32 %1 +} + +define i32 @test_x86_bmi_bextri_32_large_shift(i32 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_32_large_shift( +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 288) + ret i32 %1 +} + +define i64 @test_x86_bmi_bextri_64(i64 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_64( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.bmi.bextr.64(i64 [[A:%.*]], i64 1312) +; CHECK-NEXT: ret i64 [[TMP1]] +; + %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1312) + ret i64 %1 +} + +define i64 @test_x86_bmi_bextri_64_zero_length(i64 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_64_zero_length( +; CHECK-NEXT: ret i64 0 +; + %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1) + ret i64 %1 +} + +define i64 @test_x86_bmi_bextri_64_large_shift(i64 %a) nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_64_large_shift( +; CHECK-NEXT: ret i64 0 +; + %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 320) + ret i64 %1 +} + +define i32 @test_x86_bmi_bextri_32_constfold() nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold( +; CHECK-NEXT: ret i32 57005 +; + %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF + ret i32 %1 +} + +define i32 @test_x86_bmi_bextri_32_constfold2() nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold2( +; CHECK-NEXT: ret i32 233495534 +; + %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF + ret i32 %1 +} + +define i32 @test_x86_bmi_bextri_32_constfold3() nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold3( +; CHECK-NEXT: ret i32 233495534 +; + %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF + ret i32 %1 +} + +define i64 @test_x86_bmi_bextri_64_constfold() nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold( +; CHECK-NEXT: ret i64 57005 +; + %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF + ret i64 %1 +} + +define i64 @test_x86_bmi_bextri_64_constfold2() nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold2( +; CHECK-NEXT: ret i64 233495534 +; + %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF + ret i64 %1 +} + +define i64 @test_x86_bmi_bextri_64_constfold3() nounwind readnone { +; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold3( +; CHECK-NEXT: ret i64 233495534 +; + %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF + ret i64 %1 +} |