summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td29
-rw-r--r--llvm/lib/Target/X86/X86InstrShiftRotate.td36
-rw-r--r--llvm/test/CodeGen/X86/shift-bmi2.ll18
3 files changed, 68 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 3547c6526f1..d1cab69856b 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1709,6 +1709,35 @@ let Predicates = [HasBMI2] in {
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+
+ let AddedComplexity = -20 in {
+ def : Pat<(sra (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+ (SARX32rm addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+ (SARX64rm addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+ (SHRX32rm addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+ (SHRX64rm addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+ (SHLX32rm addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+ (SHLX64rm addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
}
// (anyext (setcc_carry)) -> (setcc_carry)
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 0efb383e1c8..44bcef6d98b 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -961,16 +961,40 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
- // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
- //
+ // Artificially lower the complexity so that we'll favor
// mov (%ecx), %esi
// shl $imm, $esi
//
// over
//
- // movb $imm %al
+ // movb $imm, %al
// shlx %al, (%ecx), %esi
- //
- // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
- // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
+ let AddedComplexity = -20 in {
+ def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
+ (SARX32rm addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
+ (SARX64rm addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
+ (SHRX32rm addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
+ (SHRX64rm addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
+ (SHLX32rm addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
+ (SHLX64rm addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
}
diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll
index f9bca503218..008dce7bb60 100644
--- a/llvm/test/CodeGen/X86/shift-bmi2.ll
+++ b/llvm/test/CodeGen/X86/shift-bmi2.ll
@@ -36,9 +36,9 @@ define i32 @shl32i(i32 %x) nounwind uwtable readnone {
define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: shl32p:
; BMI2: # BB#0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; BMI2-NEXT: shlxl %eax, (%ecx), %eax
+; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: shlxl %ecx, (%eax), %eax
; BMI2-NEXT: retl
;
; BMI264-LABEL: shl32p:
@@ -126,9 +126,9 @@ define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: lshr32p:
; BMI2: # BB#0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; BMI2-NEXT: shrxl %eax, (%ecx), %eax
+; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: shrxl %ecx, (%eax), %eax
; BMI2-NEXT: retl
;
; BMI264-LABEL: lshr32p:
@@ -177,9 +177,9 @@ define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: ashr32p:
; BMI2: # BB#0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; BMI2-NEXT: sarxl %eax, (%ecx), %eax
+; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: sarxl %ecx, (%eax), %eax
; BMI2-NEXT: retl
;
; BMI264-LABEL: ashr32p:
OpenPOWER on IntegriCloud