diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrCompiler.td | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrShiftRotate.td | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/shift-bmi2.ll | 18 |
3 files changed, 68 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 3547c6526f1..d1cab69856b 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1709,6 +1709,35 @@ let Predicates = [HasBMI2] in { (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } + + let AddedComplexity = -20 in { + def : Pat<(sra (loadi32 addr:$src1), (and GR8:$src2, immShift32)), + (SARX32rm addr:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra (loadi64 addr:$src1), (and GR8:$src2, immShift64)), + (SARX64rm addr:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl (loadi32 addr:$src1), (and GR8:$src2, immShift32)), + (SHRX32rm addr:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl (loadi64 addr:$src1), (and GR8:$src2, immShift64)), + (SHRX64rm addr:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl (loadi32 addr:$src1), (and GR8:$src2, immShift32)), + (SHLX32rm addr:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl (loadi64 addr:$src1), (and GR8:$src2, immShift64)), + (SHLX64rm addr:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } } // (anyext (setcc_carry)) -> (setcc_carry) diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index 0efb383e1c8..44bcef6d98b 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -961,16 +961,40 @@ let Predicates = [HasBMI2] in { (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } - // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor - // + // Artificially lower the complexity so that we'll favor // mov (%ecx), %esi // shl $imm, $esi // // over // - // movb $imm %al + // movb $imm, %al // shlx %al, (%ecx), %esi - // - // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole - // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible. + let AddedComplexity = -20 in { + def : Pat<(sra (loadi32 addr:$src1), GR8:$src2), + (SARX32rm addr:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra (loadi64 addr:$src1), GR8:$src2), + (SARX64rm addr:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl (loadi32 addr:$src1), GR8:$src2), + (SHRX32rm addr:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl (loadi64 addr:$src1), GR8:$src2), + (SHRX64rm addr:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl (loadi32 addr:$src1), GR8:$src2), + (SHLX32rm addr:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl (loadi64 addr:$src1), GR8:$src2), + (SHLX64rm addr:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } } diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll index f9bca503218..008dce7bb60 100644 --- a/llvm/test/CodeGen/X86/shift-bmi2.ll +++ b/llvm/test/CodeGen/X86/shift-bmi2.ll @@ -36,9 +36,9 @@ define i32 @shl32i(i32 %x) nounwind uwtable readnone { define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: shl32p: ; BMI2: # BB#0: -; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; BMI2-NEXT: shlxl %eax, (%ecx), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: shlxl %ecx, (%eax), %eax ; BMI2-NEXT: retl ; ; BMI264-LABEL: shl32p: @@ -126,9 +126,9 @@ define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone { define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: lshr32p: ; BMI2: # BB#0: -; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; BMI2-NEXT: shrxl %eax, (%ecx), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: shrxl %ecx, (%eax), %eax ; BMI2-NEXT: retl ; ; BMI264-LABEL: lshr32p: @@ -177,9 +177,9 @@ define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone { define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: ashr32p: ; BMI2: # BB#0: -; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; BMI2-NEXT: sarxl %eax, (%ecx), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: sarxl %ecx, (%eax), %eax ; BMI2-NEXT: retl ; ; BMI264-LABEL: ashr32p: |

