diff options
author | Craig Topper <craig.topper@intel.com> | 2018-09-30 03:01:46 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-09-30 03:01:46 +0000 |
commit | 1709829fede301a390163480ef51f0e140b0d028 (patch) | |
tree | aed77e311f7ef31b3d20068937e902f9671af3a2 | |
parent | 6e6d545d2488b070c32a9c7e21cf4a1e5c6d4683 (diff) | |
download | bcm5719-llvm-1709829fede301a390163480ef51f0e140b0d028.tar.gz bcm5719-llvm-1709829fede301a390163480ef51f0e140b0d028.zip |
[X86] Disable BMI BEXTR in X86DAGToDAGISel::matchBEXTRFromAnd unless we're on compiling for a CPU with single uop BEXTR
Summary:
This function turns (X >> C1) & C2 into a BMI BEXTR or TBM BEXTRI instruction. For BMI BEXTR we have to materialize an immediate into a register to feed to the BEXTR instruction.
The BMI BEXTR instruction is 2 uops on Intel CPUs. It looks like on SKL its one port 0/6 uop and one port 1/5 uop. Despite what Agner's tables say. I know one of the uops is a regular shift uop so it would have to go through the port 0/6 shifter unit. So that's the same or worse execution wise than the shift+and which is one 0/6 uop and one 0/1/5/6 uop. The move immediate into register is an additional 0/1/5/6 uop.
For now I've limited this transform to AMD CPUs which have a single uop BEXTR. If may also might make sense if we can fold a load or if the and immediate is larger than 32-bits and can't be encoded as a sign extended 32-bit value or if LICM or CSE can hoist the move immediate and share it. But we'd need to look more carefully at that. In the regression I looked at it doesn't look load folding or large immediates were occurring so the regression isn't caused by the loss of those. So we could try to be smarter here if we find a compelling case.
Reviewers: RKSimon, spatel, lebedev.ri, andreadb
Reviewed By: RKSimon
Subscribers: llvm-commits, andreadb, RKSimon
Differential Revision: https://reviews.llvm.org/D52570
llvm-svn: 343399
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/bmi-x86_64.ll | 64 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/bmi.ll | 85 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/extract-bits.ll | 20 |
6 files changed, 136 insertions, 55 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index e5d5d929be9..2c48b54c380 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -400,6 +400,10 @@ def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", "Support movdir64b instruction">; +def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", + "Indicates that the BEXTR instruction is implemented as a single uop " + "with good throughput.">; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -987,6 +991,7 @@ def : ProcessorModel<"btver2", BtVer2Model, [ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast15ByteNOP, + FeatureFastBEXTR, FeatureFastPartialYMMorZMMWrite ]>; @@ -1042,6 +1047,7 @@ def : Proc<"bdver2", [ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastBEXTR, FeatureMacroFusion ]>; @@ -1074,6 +1080,7 @@ def : Proc<"bdver3", [ FeatureFSGSBase, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastBEXTR, FeatureMacroFusion ]>; @@ -1105,6 +1112,7 @@ def : Proc<"bdver4", [ FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, + FeatureFastBEXTR, FeatureFast11ByteNOP, FeatureMWAITX, FeatureMacroFusion @@ -1130,6 +1138,7 @@ def: ProcessorModel<"znver1", Znver1Model, [ FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, + FeatureFastBEXTR, FeatureFast15ByteNOP, FeatureMacroFusion, FeatureMMX, diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 32ad262e558..a0ef4b61263 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2590,7 +2590,14 @@ bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (!Subtarget->hasBMI() && !Subtarget->hasTBM()) + // If we have TBM we can use an immediate for the control. If we have BMI + // we should only do this if the BEXTR instruction is implemented well. + // Otherwise moving the control into a register makes this more costly. + // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM + // hoisting the move immediate would make it worthwhile with a less optimal + // BEXTR? + if (!Subtarget->hasTBM() && + !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) return false; // Must have a shift right. diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 82ff9420b17..5dd406b1400 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -385,6 +385,9 @@ protected: /// Processor supports PCONFIG instruction bool HasPCONFIG = false; + /// Processor has a single uop BEXTR implementation. + bool HasFastBEXTR = false; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpolineIndirectCalls = false; @@ -629,6 +632,7 @@ public: bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasFastBEXTR() const { return HasFastBEXTR; } bool hasMacroFusion() const { return HasMacroFusion; } bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll index 970dafdde4b..81ac3132987 100644 --- a/llvm/test/CodeGen/X86/bmi-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI1,BMI1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2,BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI1,BMI1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI2,BMI2-FAST declare i64 @llvm.x86.bmi.bextr.64(i64, i64) @@ -14,11 +16,18 @@ define i64 @bextr64(i64 %x, i64 %y) { } define i64 @bextr64b(i64 %x) uwtable ssp { -; CHECK-LABEL: bextr64b: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, %edi, %eax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: bextr64b: +; BEXTR-SLOW: # %bb.0: +; BEXTR-SLOW-NEXT: movq %rdi, %rax +; BEXTR-SLOW-NEXT: shrl $4, %eax +; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64b: +; BEXTR-FAST: # %bb.0: +; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 +; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax +; BEXTR-FAST-NEXT: retq %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 ret i64 %2 @@ -37,11 +46,18 @@ define i64 @bextr64_subreg(i64 %x) uwtable ssp { } define i64 @bextr64b_load(i64* %x) { -; CHECK-LABEL: bextr64b_load: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, (%rdi), %eax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: bextr64b_load: +; BEXTR-SLOW: # %bb.0: +; BEXTR-SLOW-NEXT: movl (%rdi), %eax +; BEXTR-SLOW-NEXT: shrl $4, %eax +; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64b_load: +; BEXTR-FAST: # %bb.0: +; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 +; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax +; BEXTR-FAST-NEXT: retq %1 = load i64, i64* %x, align 8 %2 = lshr i64 %1, 4 %3 = and i64 %2, 4095 @@ -61,11 +77,25 @@ define i64 @bextr64c(i64 %x, i32 %y) { } define i64 @bextr64d(i64 %a) { -; CHECK-LABEL: bextr64d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $8450, %eax # imm = 0x2102 -; CHECK-NEXT: bextrq %rax, %rdi, %rax -; CHECK-NEXT: retq +; BMI1-SLOW-LABEL: bextr64d: +; BMI1-SLOW: # %bb.0: # %entry +; BMI1-SLOW-NEXT: shrq $2, %rdi +; BMI1-SLOW-NEXT: movl $8448, %eax # imm = 0x2100 +; BMI1-SLOW-NEXT: bextrq %rax, %rdi, %rax +; BMI1-SLOW-NEXT: retq +; +; BMI2-SLOW-LABEL: bextr64d: +; BMI2-SLOW: # %bb.0: # %entry +; BMI2-SLOW-NEXT: shrq $2, %rdi +; BMI2-SLOW-NEXT: movb $33, %al +; BMI2-SLOW-NEXT: bzhiq %rax, %rdi, %rax +; BMI2-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64d: +; BEXTR-FAST: # %bb.0: # %entry +; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102 +; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax +; BEXTR-FAST-NEXT: retq entry: %shr = lshr i64 %a, 2 %and = and i64 %shr, 8589934591 diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll index 945ff822dea..be6f193414a 100644 --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW-BEXTR +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW-BEXTR +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-BEXTR define i32 @andn32(i32 %x, i32 %y) { ; X86-LABEL: andn32: @@ -342,17 +344,31 @@ define i32 @bextr32_load(i32* %x, i32 %y) { } define i32 @bextr32b(i32 %x) uwtable ssp { -; X86-LABEL: bextr32b: -; X86: # %bb.0: -; X86-NEXT: movl $3076, %eax # imm = 0xC04 -; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl -; -; X64-LABEL: bextr32b: -; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, %edi, %eax -; X64-NEXT: retq +; X86-SLOW-BEXTR-LABEL: bextr32b: +; X86-SLOW-BEXTR: # %bb.0: +; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X86-SLOW-BEXTR-NEXT: retl +; +; X64-SLOW-BEXTR-LABEL: bextr32b: +; X64-SLOW-BEXTR: # %bb.0: +; X64-SLOW-BEXTR-NEXT: movl %edi, %eax +; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X64-SLOW-BEXTR-NEXT: retq +; +; X86-FAST-BEXTR-LABEL: bextr32b: +; X86-FAST-BEXTR: # %bb.0: +; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-FAST-BEXTR-NEXT: retl +; +; X64-FAST-BEXTR-LABEL: bextr32b: +; X64-FAST-BEXTR: # %bb.0: +; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax +; X64-FAST-BEXTR-NEXT: retq %1 = lshr i32 %x, 4 %2 = and i32 %1, 4095 ret i32 %2 @@ -376,18 +392,33 @@ define i32 @bextr32_subreg(i32 %x) uwtable ssp { } define i32 @bextr32b_load(i32* %x) uwtable ssp { -; X86-LABEL: bextr32b_load: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $3076, %ecx # imm = 0xC04 -; X86-NEXT: bextrl %ecx, (%eax), %eax -; X86-NEXT: retl -; -; X64-LABEL: bextr32b_load: -; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, (%rdi), %eax -; X64-NEXT: retq +; X86-SLOW-BEXTR-LABEL: bextr32b_load: +; X86-SLOW-BEXTR: # %bb.0: +; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax +; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X86-SLOW-BEXTR-NEXT: retl +; +; X64-SLOW-BEXTR-LABEL: bextr32b_load: +; X64-SLOW-BEXTR: # %bb.0: +; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax +; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X64-SLOW-BEXTR-NEXT: retq +; +; X86-FAST-BEXTR-LABEL: bextr32b_load: +; X86-FAST-BEXTR: # %bb.0: +; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax +; X86-FAST-BEXTR-NEXT: retl +; +; X64-FAST-BEXTR-LABEL: bextr32b_load: +; X64-FAST-BEXTR: # %bb.0: +; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax +; X64-FAST-BEXTR-NEXT: retq %1 = load i32, i32* %x %2 = lshr i32 %1, 4 %3 = and i32 %2, 4095 diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll index 6f7fffb5d7c..e9a3cecc1ae 100644 --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 ; *Please* keep in sync with test/CodeGen/AArch64/extract-bits.ll |