summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-06-06 10:52:10 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-06-06 10:52:10 +0000
commit3d1415889103642e9c6cc943345d36ac53886337 (patch)
tree5319697ab7f46666303b34d4a97f352cbeb22f41
parent1b8bfd7e7d63704eb665922d331c7317ae8b1786 (diff)
downloadbcm5719-llvm-3d1415889103642e9c6cc943345d36ac53886337.tar.gz
bcm5719-llvm-3d1415889103642e9c6cc943345d36ac53886337.zip
[X86][BMI][TBM] Only demand bottom 16-bits of the BEXTR control op (PR34042)
Only the bottom 16-bits of BEXTR's control op are required (0:8 INDEX, 15:8 LENGTH). Differential Revision: https://reviews.llvm.org/D47690 llvm-svn: 334083
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp8
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp34
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td11
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td70
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h4
-rw-r--r--llvm/test/CodeGen/X86/bmi-x86_64.ll4
-rw-r--r--llvm/test/CodeGen/X86/bmi.ll3
-rw-r--r--llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll10
8 files changed, 99 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 392583dea8a..c7e91678374 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1780,10 +1780,10 @@ bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
}
// In static codegen with small code model, we can get the address of a label
- // into a register with 'movl'. TableGen has already made sure we're looking
- // at a label of some kind.
- assert(N->getOpcode() == X86ISD::Wrapper &&
- "Unexpected node type for MOV32ri64");
+ // into a register with 'movl'
+ if (N->getOpcode() != X86ISD::Wrapper)
+ return false;
+
N = N.getOperand(0);
// At least GNU as does not accept 'movl' for TPOFF relocations.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8d5a5e53dfd..2e454fafafd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36834,6 +36834,39 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ unsigned NumBits = VT.getSizeInBits();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+
+ // TODO - Constant Folding.
+ if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // Reduce Cst1 to the bottom 16-bits.
+ // NOTE: SimplifyDemandedBits won't do this for constants.
+ const APInt &Val1 = Cst1->getAPIntValue();
+ APInt MaskedVal1 = Val1 & 0xFFFF;
+ if (MaskedVal1 != Val1)
+ return DAG.getNode(X86ISD::BEXTR, SDLoc(N), VT, Op0,
+ DAG.getConstant(MaskedVal1, SDLoc(N), VT));
+ }
+
+ // Only bottom 16-bits of the control bits are required.
+ KnownBits Known;
+ APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16));
+ if (TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
static bool isNullFPScalarOrVectorConst(SDValue V) {
return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
@@ -39220,6 +39253,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
+ case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, Subtarget);
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index e455349e0d8..c863bac9722 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -2040,14 +2040,3 @@ let Predicates = [HasBMI, NoTBM] in {
(MOV32ri64 mov64imm32:$src2),
sub_32bit))>;
} // HasBMI, NoTBM
-
-let Predicates = [HasTBM] in {
- def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
- (BEXTRI32ri GR32:$src1, imm:$src2)>;
- def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
- (BEXTRI32mi addr:$src1, imm:$src2)>;
- def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2),
- (BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>;
- def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2),
- (BEXTRI64mi addr:$src1, i64immSExt32:$src2)>;
-}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 8a03b48e34b..4da5af69b77 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -2357,16 +2357,16 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
-multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic Int,
- PatFrag ld_frag, X86FoldableSchedWrite Sched> {
+multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode,
+ PatFrag ld_frag, X86FoldableSchedWrite Sched> {
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
T8PS, VEX, Sched<[Sched]>;
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
+ [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
(implicit EFLAGS)]>, T8PS, VEX,
Sched<[Sched.Folded,
// x86memop:$src1
@@ -2377,17 +2377,36 @@ multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
- defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem,
- int_x86_bmi_bextr_32, loadi32, WriteBEXTR>;
- defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem,
- int_x86_bmi_bextr_64, loadi64, WriteBEXTR>, VEX_W;
+ defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
+ X86bextr, loadi32, WriteBEXTR>;
+ defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
+ X86bextr, loadi64, WriteBEXTR>, VEX_W;
+}
+
+multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int,
+ PatFrag ld_frag, X86FoldableSchedWrite Sched> {
+ def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ T8PS, VEX, Sched<[Sched]>;
+ def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
+ (implicit EFLAGS)]>, T8PS, VEX,
+ Sched<[Sched.Folded,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src2
+ ReadAfterLd]>;
}
let Predicates = [HasBMI2], Defs = [EFLAGS] in {
- defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
- int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
- defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
- int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
+ defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
+ int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
+ defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
+ int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
}
def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -2507,31 +2526,30 @@ let Predicates = [HasBMI2] in {
//
let Predicates = [HasTBM], Defs = [EFLAGS] in {
-multiclass tbm_ternary_imm_intr<bits<8> opc, RegisterClass RC, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
- Intrinsic Int, Operand immtype,
- SDPatternOperator immoperator,
- X86FoldableSchedWrite Sched> {
+multiclass tbm_ternary_imm<bits<8> opc, RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ SDNode OpNode, Operand immtype,
+ SDPatternOperator immoperator,
+ X86FoldableSchedWrite Sched> {
def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
!strconcat(OpcodeStr,
"\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
- [(set RC:$dst, (Int RC:$src1, immoperator:$cntl))]>,
+ [(set RC:$dst, (OpNode RC:$src1, immoperator:$cntl))]>,
XOP, XOPA, Sched<[Sched]>;
def mi : Ii32<opc, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, immtype:$cntl),
!strconcat(OpcodeStr,
"\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
- [(set RC:$dst, (Int (ld_frag addr:$src1), immoperator:$cntl))]>,
+ [(set RC:$dst, (OpNode (ld_frag addr:$src1), immoperator:$cntl))]>,
XOP, XOPA, Sched<[Sched.Folded]>;
}
-defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr{l}", i32mem, loadi32,
- int_x86_tbm_bextri_u32, i32imm, imm,
- WriteBEXTR>;
+defm BEXTRI32 : tbm_ternary_imm<0x10, GR32, "bextr{l}", i32mem, loadi32,
+ X86bextr, i32imm, imm, WriteBEXTR>;
let ImmT = Imm32S in
-defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr{q}", i64mem, loadi64,
- int_x86_tbm_bextri_u64, i64i32imm,
- i64immSExt32, WriteBEXTR>, VEX_W;
+defm BEXTRI64 : tbm_ternary_imm<0x10, GR64, "bextr{q}", i64mem, loadi64,
+ X86bextr, i64i32imm,
+ i64immSExt32, WriteBEXTR>, VEX_W;
multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
RegisterClass RC, string OpcodeStr,
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 219366da299..9e3810b10ca 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1345,6 +1345,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0),
@@ -1456,6 +1458,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll
index 5a733ca3cf0..970dafdde4b 100644
--- a/llvm/test/CodeGen/X86/bmi-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll
@@ -52,8 +52,8 @@ define i64 @bextr64b_load(i64* %x) {
define i64 @bextr64c(i64 %x, i32 %y) {
; CHECK-LABEL: bextr64c:
; CHECK: # %bb.0:
-; CHECK-NEXT: movslq %esi, %rax
-; CHECK-NEXT: bextrq %rax, %rdi, %rax
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: bextrq %rsi, %rdi, %rax
; CHECK-NEXT: retq
%tmp0 = sext i32 %y to i64
%tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0)
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index 40e4c66959b..5ec2da420c4 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -404,8 +404,7 @@ define i32 @bextr32c(i32 %x, i16 zeroext %y) {
;
; X64-LABEL: bextr32c:
; X64: # %bb.0:
-; X64-NEXT: movswl %si, %eax
-; X64-NEXT: bextrl %eax, %edi, %eax
+; X64-NEXT: bextrl %esi, %edi, %eax
; X64-NEXT: retq
%tmp0 = sext i16 %y to i32
%tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
diff --git a/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
index 0f4b7ce6c93..98ee8f07c00 100644
--- a/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
@@ -24,6 +24,16 @@ entry:
ret i64 %0
}
+define i64 @test_x86_tbm_bextri_u64_bigint(i64 %a) nounwind readnone {
+; CHECK-LABEL: test_x86_tbm_bextri_u64_bigint:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bextrq $65535, %rdi, %rax # imm = 0xFFFF
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 549755813887)
+ ret i64 %0
+}
+
define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind readnone {
; CHECK-LABEL: test_x86_tbm_bextri_u64_z:
; CHECK: # %bb.0: # %entry
OpenPOWER on IntegriCloud