summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp18
-rw-r--r--llvm/lib/Target/X86/X86InstrShiftRotate.td28
-rw-r--r--llvm/test/CodeGen/X86/rot32.ll16
-rw-r--r--llvm/test/CodeGen/X86/rot64.ll16
4 files changed, 52 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index c03b602dfe0..e157bc13ec2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4083,6 +4083,20 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
return true;
}
+
+static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
+ MIB->setDesc(Desc);
+ int64_t ShiftAmt = MIB->getOperand(2).getImm();
+ // Temporarily remove the immediate so we can add another source register.
+ MIB->RemoveOperand(2);
+ // Add the register. Don't copy the kill flag if there is one.
+ MIB.addReg(MIB->getOperand(1).getReg(),
+ getUndefRegState(MIB->getOperand(1).isUndef()));
+ // Add back the immediate.
+ MIB.addImm(ShiftAmt);
+ return true;
+}
+
bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -4237,6 +4251,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::XOR64_FP:
case X86::XOR32_FP:
return expandXorFP(MIB, *this);
+ case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
+ case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
+ case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
+ case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break;
case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break;
case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break;
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 633b7099af6..9d974b716dd 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -838,16 +838,24 @@ def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
// Sandy Bridge and newer Intel processors support faster rotates using
// SHLD to avoid a partial flag update on the normal rotate instructions.
-let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
- def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
- (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
- def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
- (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
-
- def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
- (SHRD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
- def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
- (SHRD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+// Use a pseudo so that TwoInstructionPass and register allocation will see
+// this as unary instruction.
+let Predicates = [HasFastSHLDRotate], AddedComplexity = 5,
+ Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteSHDrri],
+ Constraints = "$src1 = $dst" in {
+ def SHLDROT32ri : I<0, Pseudo, (outs GR32:$dst),
+ (ins GR32:$src1, u8imm:$shamt), "",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$shamt)))]>;
+ def SHLDROT64ri : I<0, Pseudo, (outs GR64:$dst),
+ (ins GR64:$src1, u8imm:$shamt), "",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$shamt)))]>;
+
+ def SHRDROT32ri : I<0, Pseudo, (outs GR32:$dst),
+ (ins GR32:$src1, u8imm:$shamt), "",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$shamt)))]>;
+ def SHRDROT64ri : I<0, Pseudo, (outs GR64:$dst),
+ (ins GR64:$src1, u8imm:$shamt), "",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>;
}
def ROT32L2R_imm8 : SDNodeXForm<imm, [{
diff --git a/llvm/test/CodeGen/X86/rot32.ll b/llvm/test/CodeGen/X86/rot32.ll
index 29dd679ae9d..59041a1d9ec 100644
--- a/llvm/test/CodeGen/X86/rot32.ll
+++ b/llvm/test/CodeGen/X86/rot32.ll
@@ -127,7 +127,7 @@ define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
; SHLD64-LABEL: xfoo:
; SHLD64: # %bb.0: # %entry
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shldl $7, %edi, %eax
+; SHLD64-NEXT: shldl $7, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: xfoo:
@@ -233,7 +233,7 @@ define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
; SHLD64-LABEL: xun:
; SHLD64: # %bb.0: # %entry
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shldl $25, %edi, %eax
+; SHLD64-NEXT: shldl $25, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: xun:
@@ -341,7 +341,7 @@ define i32 @fshl(i32 %x) nounwind {
; SHLD64-LABEL: fshl:
; SHLD64: # %bb.0:
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shldl $7, %edi, %eax
+; SHLD64-NEXT: shldl $7, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: fshl:
@@ -380,7 +380,7 @@ define i32 @fshl1(i32 %x) nounwind {
; SHLD64-LABEL: fshl1:
; SHLD64: # %bb.0:
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shldl $1, %edi, %eax
+; SHLD64-NEXT: shldl $1, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: fshl1:
@@ -418,7 +418,7 @@ define i32 @fshl31(i32 %x) nounwind {
; SHLD64-LABEL: fshl31:
; SHLD64: # %bb.0:
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shldl $31, %edi, %eax
+; SHLD64-NEXT: shldl $31, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: fshl31:
@@ -498,7 +498,7 @@ define i32 @fshr(i32 %x) nounwind {
; SHLD64-LABEL: fshr:
; SHLD64: # %bb.0:
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shrdl $7, %edi, %eax
+; SHLD64-NEXT: shrdl $7, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: fshr:
@@ -537,7 +537,7 @@ define i32 @fshr1(i32 %x) nounwind {
; SHLD64-LABEL: fshr1:
; SHLD64: # %bb.0:
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shrdl $1, %edi, %eax
+; SHLD64-NEXT: shrdl $1, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: fshr1:
@@ -575,7 +575,7 @@ define i32 @fshr31(i32 %x) nounwind {
; SHLD64-LABEL: fshr31:
; SHLD64: # %bb.0:
; SHLD64-NEXT: movl %edi, %eax
-; SHLD64-NEXT: shrdl $31, %edi, %eax
+; SHLD64-NEXT: shrdl $31, %eax, %eax
; SHLD64-NEXT: retq
;
; BMI264-LABEL: fshr31:
diff --git a/llvm/test/CodeGen/X86/rot64.ll b/llvm/test/CodeGen/X86/rot64.ll
index fdc7e9ab21a..8ecb2773f1c 100644
--- a/llvm/test/CodeGen/X86/rot64.ll
+++ b/llvm/test/CodeGen/X86/rot64.ll
@@ -77,7 +77,7 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
; SHLD-LABEL: xfoo:
; SHLD: # %bb.0: # %entry
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shldq $7, %rdi, %rax
+; SHLD-NEXT: shldq $7, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: xfoo:
@@ -139,7 +139,7 @@ define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
; SHLD-LABEL: xun:
; SHLD: # %bb.0: # %entry
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shldq $57, %rdi, %rax
+; SHLD-NEXT: shldq $57, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: xun:
@@ -201,7 +201,7 @@ define i64 @fshl(i64 %x) nounwind {
; SHLD-LABEL: fshl:
; SHLD: # %bb.0:
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shldq $7, %rdi, %rax
+; SHLD-NEXT: shldq $7, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: fshl:
@@ -223,7 +223,7 @@ define i64 @fshl1(i64 %x) nounwind {
; SHLD-LABEL: fshl1:
; SHLD: # %bb.0:
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shldq $1, %rdi, %rax
+; SHLD-NEXT: shldq $1, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: fshl1:
@@ -244,7 +244,7 @@ define i64 @fshl63(i64 %x) nounwind {
; SHLD-LABEL: fshl63:
; SHLD: # %bb.0:
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shldq $63, %rdi, %rax
+; SHLD-NEXT: shldq $63, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: fshl63:
@@ -287,7 +287,7 @@ define i64 @fshr(i64 %x) nounwind {
; SHLD-LABEL: fshr:
; SHLD: # %bb.0:
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shrdq $7, %rdi, %rax
+; SHLD-NEXT: shrdq $7, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: fshr:
@@ -309,7 +309,7 @@ define i64 @fshr1(i64 %x) nounwind {
; SHLD-LABEL: fshr1:
; SHLD: # %bb.0:
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shrdq $1, %rdi, %rax
+; SHLD-NEXT: shrdq $1, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: fshr1:
@@ -330,7 +330,7 @@ define i64 @fshr63(i64 %x) nounwind {
; SHLD-LABEL: fshr63:
; SHLD: # %bb.0:
; SHLD-NEXT: movq %rdi, %rax
-; SHLD-NEXT: shrdq $63, %rdi, %rax
+; SHLD-NEXT: shrdq $63, %rax, %rax
; SHLD-NEXT: retq
;
; BMI2-LABEL: fshr63:
OpenPOWER on IntegriCloud