diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrShiftRotate.td | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/rot32.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/rot64.ll | 16 |
4 files changed, 52 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index c03b602dfe0..e157bc13ec2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4083,6 +4083,20 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB, return true; } + +static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { + MIB->setDesc(Desc); + int64_t ShiftAmt = MIB->getOperand(2).getImm(); + // Temporarily remove the immediate so we can add another source register. + MIB->RemoveOperand(2); + // Add the register. Don't copy the kill flag if there is one. + MIB.addReg(MIB->getOperand(1).getReg(), + getUndefRegState(MIB->getOperand(1).isUndef())); + // Add back the immediate. + MIB.addImm(ShiftAmt); + return true; +} + bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); @@ -4237,6 +4251,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::XOR64_FP: case X86::XOR32_FP: return expandXorFP(MIB, *this); + case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8)); + case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8)); + case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8)); + case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8)); case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break; case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break; case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break; diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index 633b7099af6..9d974b716dd 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -838,16 +838,24 @@ def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst), // Sandy Bridge and newer Intel processors support faster rotates using // SHLD to avoid a partial flag update on the normal rotate instructions. -let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in { - def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), - (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>; - def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), - (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>; - - def : Pat<(rotr GR32:$src, (i8 imm:$shamt)), - (SHRD32rri8 GR32:$src, GR32:$src, imm:$shamt)>; - def : Pat<(rotr GR64:$src, (i8 imm:$shamt)), - (SHRD64rri8 GR64:$src, GR64:$src, imm:$shamt)>; +// Use a pseudo so that TwoInstructionPass and register allocation will see +// this as unary instruction. +let Predicates = [HasFastSHLDRotate], AddedComplexity = 5, + Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteSHDrri], + Constraints = "$src1 = $dst" in { + def SHLDROT32ri : I<0, Pseudo, (outs GR32:$dst), + (ins GR32:$src1, u8imm:$shamt), "", + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$shamt)))]>; + def SHLDROT64ri : I<0, Pseudo, (outs GR64:$dst), + (ins GR64:$src1, u8imm:$shamt), "", + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$shamt)))]>; + + def SHRDROT32ri : I<0, Pseudo, (outs GR32:$dst), + (ins GR32:$src1, u8imm:$shamt), "", + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$shamt)))]>; + def SHRDROT64ri : I<0, Pseudo, (outs GR64:$dst), + (ins GR64:$src1, u8imm:$shamt), "", + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>; } def ROT32L2R_imm8 : SDNodeXForm<imm, [{ diff --git a/llvm/test/CodeGen/X86/rot32.ll b/llvm/test/CodeGen/X86/rot32.ll index 29dd679ae9d..59041a1d9ec 100644 --- a/llvm/test/CodeGen/X86/rot32.ll +++ b/llvm/test/CodeGen/X86/rot32.ll @@ -127,7 +127,7 @@ define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone { ; SHLD64-LABEL: xfoo: ; SHLD64: # %bb.0: # %entry ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shldl $7, %edi, %eax +; SHLD64-NEXT: shldl $7, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: xfoo: @@ -233,7 +233,7 @@ define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone { ; SHLD64-LABEL: xun: ; SHLD64: # %bb.0: # %entry ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shldl $25, %edi, %eax +; SHLD64-NEXT: shldl $25, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: xun: @@ -341,7 +341,7 @@ define i32 @fshl(i32 %x) nounwind { ; SHLD64-LABEL: fshl: ; SHLD64: # %bb.0: ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shldl $7, %edi, %eax +; SHLD64-NEXT: shldl $7, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: fshl: @@ -380,7 +380,7 @@ define i32 @fshl1(i32 %x) nounwind { ; SHLD64-LABEL: fshl1: ; SHLD64: # %bb.0: ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shldl $1, %edi, %eax +; SHLD64-NEXT: shldl $1, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: fshl1: @@ -418,7 +418,7 @@ define i32 @fshl31(i32 %x) nounwind { ; SHLD64-LABEL: fshl31: ; SHLD64: # %bb.0: ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shldl $31, %edi, %eax +; SHLD64-NEXT: shldl $31, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: fshl31: @@ -498,7 +498,7 @@ define i32 @fshr(i32 %x) nounwind { ; SHLD64-LABEL: fshr: ; SHLD64: # %bb.0: ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shrdl $7, %edi, %eax +; SHLD64-NEXT: shrdl $7, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: fshr: @@ -537,7 +537,7 @@ define i32 @fshr1(i32 %x) nounwind { ; SHLD64-LABEL: fshr1: ; SHLD64: # %bb.0: ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shrdl $1, %edi, %eax +; SHLD64-NEXT: shrdl $1, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: fshr1: @@ -575,7 +575,7 @@ define i32 @fshr31(i32 %x) nounwind { ; SHLD64-LABEL: fshr31: ; SHLD64: # %bb.0: ; SHLD64-NEXT: movl %edi, %eax -; SHLD64-NEXT: shrdl $31, %edi, %eax +; SHLD64-NEXT: shrdl $31, %eax, %eax ; SHLD64-NEXT: retq ; ; BMI264-LABEL: fshr31: diff --git a/llvm/test/CodeGen/X86/rot64.ll b/llvm/test/CodeGen/X86/rot64.ll index fdc7e9ab21a..8ecb2773f1c 100644 --- a/llvm/test/CodeGen/X86/rot64.ll +++ b/llvm/test/CodeGen/X86/rot64.ll @@ -77,7 +77,7 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; SHLD-LABEL: xfoo: ; SHLD: # %bb.0: # %entry ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shldq $7, %rdi, %rax +; SHLD-NEXT: shldq $7, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xfoo: @@ -139,7 +139,7 @@ define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { ; SHLD-LABEL: xun: ; SHLD: # %bb.0: # %entry ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shldq $57, %rdi, %rax +; SHLD-NEXT: shldq $57, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xun: @@ -201,7 +201,7 @@ define i64 @fshl(i64 %x) nounwind { ; SHLD-LABEL: fshl: ; SHLD: # %bb.0: ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shldq $7, %rdi, %rax +; SHLD-NEXT: shldq $7, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: fshl: @@ -223,7 +223,7 @@ define i64 @fshl1(i64 %x) nounwind { ; SHLD-LABEL: fshl1: ; SHLD: # %bb.0: ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shldq $1, %rdi, %rax +; SHLD-NEXT: shldq $1, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: fshl1: @@ -244,7 +244,7 @@ define i64 @fshl63(i64 %x) nounwind { ; SHLD-LABEL: fshl63: ; SHLD: # %bb.0: ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shldq $63, %rdi, %rax +; SHLD-NEXT: shldq $63, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: fshl63: @@ -287,7 +287,7 @@ define i64 @fshr(i64 %x) nounwind { ; SHLD-LABEL: fshr: ; SHLD: # %bb.0: ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shrdq $7, %rdi, %rax +; SHLD-NEXT: shrdq $7, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: fshr: @@ -309,7 +309,7 @@ define i64 @fshr1(i64 %x) nounwind { ; SHLD-LABEL: fshr1: ; SHLD: # %bb.0: ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shrdq $1, %rdi, %rax +; SHLD-NEXT: shrdq $1, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: fshr1: @@ -330,7 +330,7 @@ define i64 @fshr63(i64 %x) nounwind { ; SHLD-LABEL: fshr63: ; SHLD: # %bb.0: ; SHLD-NEXT: movq %rdi, %rax -; SHLD-NEXT: shrdq $63, %rdi, %rax +; SHLD-NEXT: shrdq $63, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: fshr63: |

