summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp11
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--llvm/test/CodeGen/X86/pr32241.ll16
3 files changed, 24 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 4a46cb0874e..6ffff041929 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1559,6 +1559,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT == MVT::i1) {
+ if (!Subtarget->is64Bit()) {
+ // If this isn't a 64-bit target we need to constrain the reg class
+ // to avoid high registers here otherwise we might use a high register
+ // to copy from a mask register.
+ unsigned OldReg = ResultReg;
+ ResultReg = createResultReg(&X86::GR8_ABCD_LRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(OldReg);
+ }
+
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d3838149a0e..60c6f745664 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6325,6 +6325,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
return X86::KMOVWrk;
}
if (X86::GR8RegClass.contains(DestReg)) {
+ assert(!isHReg(DestReg) && "Cannot move between mask and h-reg");
DestReg = getX86SubSuperRegister(DestReg, 32);
return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
}
@@ -6348,6 +6349,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
return X86::KMOVWkr;
}
if (X86::GR8RegClass.contains(SrcReg)) {
+ assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg");
SrcReg = getX86SubSuperRegister(SrcReg, 32);
return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
}
diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll
index 65b36d09aaf..102977a6509 100644
--- a/llvm/test/CodeGen/X86/pr32241.ll
+++ b/llvm/test/CodeGen/X86/pr32241.ll
@@ -4,9 +4,14 @@
define i32 @_Z3foov() {
; CHECK-LABEL: _Z3foov:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: subl $24, %esp
+; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: .Lcfi0:
-; CHECK-NEXT: .cfi_def_cfa_offset 28
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: subl $24, %esp
+; CHECK-NEXT: .Lcfi1:
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .Lcfi2:
+; CHECK-NEXT: .cfi_offset %ebx, -8
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF
; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376
@@ -35,9 +40,9 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movb %ah, %cl
; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: kmovw %ecx, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: andb $1, %ah
-; CHECK-NEXT: movzbl %ah, %ecx
+; CHECK-NEXT: kmovb %k0, %ebx
+; CHECK-NEXT: andb $1, %bl
+; CHECK-NEXT: movzbl %bl, %ecx
; CHECK-NEXT: xorl $-1, %ecx
; CHECK-NEXT: cmpl $0, %ecx
; CHECK-NEXT: kmovb %eax, %k0
@@ -58,6 +63,7 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $24, %esp
+; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
entry:
%aa = alloca i16, align 2
OpenPOWER on IntegriCloud