diff options
author | Eric Liu <ioeric@google.com> | 2018-01-30 14:18:33 +0000 |
---|---|---|
committer | Eric Liu <ioeric@google.com> | 2018-01-30 14:18:33 +0000 |
commit | 0b69b5ed85c298ca72150b81849c4f57d30e355e (patch) | |
tree | 8ad21f11cfbc1aaeb15d48a5fc67fa6582c158eb | |
parent | cbc2d1e1110cae108b6350eb4d5dc1cf861124a6 (diff) | |
download | bcm5719-llvm-0b69b5ed85c298ca72150b81849c4f57d30e355e.tar.gz bcm5719-llvm-0b69b5ed85c298ca72150b81849c4f57d30e355e.zip |
Revert "[X86] Avoid using high register trick for test instruction"
This reverts commit r323690. This causes crash in llc. See the original commit thread for details.
llvm-svn: 323761
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 78 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrArithmetic.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MacroFusion.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/test-shrink.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/testb-je-fusion.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vastart-defs-eflags.ll | 9 |
7 files changed, 82 insertions, 29 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index cbc8eee28b0..e325d975486 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3073,33 +3073,67 @@ void X86DAGToDAGISel::Select(SDNode *Node) { return; } + // For example, "testl %eax, $2048" to "testb %ah, $8". + if (isShiftedUInt<8, 8>(Mask) && + (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { + // Shift the immediate right by 8 bits. + SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8); + SDValue Reg = N0.getOperand(0); + + // Extract the h-register. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, + MVT::i8, Reg); + + // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only + // target GR8_NOREX registers, so make sure the register class is + // forced. + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + CurDAG->RemoveDeadNode(Node); + return; + } + + // For example, "testl %eax, $32776" to "testw %ax, $32776". + // NOTE: We only want to form TESTW instructions if optimizing for + // min size. Otherwise we only save one byte and possibly get a length + // changing prefix penalty in the decoders. + if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 && + (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16); + SDValue Reg = N0.getOperand(0); + + // Extract the 16-bit subregister. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, + MVT::i16, Reg); + + // Emit a testw. + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + CurDAG->RemoveDeadNode(Node); + return; + } + // For example, "testq %rax, $268468232" to "testl %eax, $268468232". - if (isUInt<32>(Mask) && + if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 && (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) { - MVT VT = MVT::i32; - int SubRegOp = X86::sub_32bit; - unsigned Op = X86::TEST32ri; - - // For example, "testl %eax, $32776" to "testw %ax, $32776". - // NOTE: We only want to form TESTW instructions if optimizing for - // min size. Otherwise we only save one byte and possibly get a length - // changing prefix penalty in the decoders. - if (OptForMinSize && isUInt<16>(Mask) && - (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { - VT = MVT::i16; - SubRegOp = X86::sub_16bit; - Op = X86::TEST16ri; - } - - SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); + SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32); SDValue Reg = N0.getOperand(0); - // Extract the subregister if necessary. - if (N0.getValueType() != VT) - Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); + // Extract the 32-bit subregister. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, + MVT::i32, Reg); - // Emit a testl or testw. - SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm); + // Emit a testl. + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index d35b4338c72..d09deb5b758 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1257,6 +1257,14 @@ let isCompare = 1 in { def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>; let Predicates = [In64BitMode] in def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>; + + // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the + // register class is constrained to GR8_NOREX. This pseudo is explicitly + // marked side-effect free, since it doesn't have an isel pattern like + // other test instructions. + let isPseudo = 1, hasSideEffects = 0 in + def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), + "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } // Defs = [EFLAGS] def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 9c6c8600745..ba5f0f2130f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8018,6 +8018,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::VMOVUPSZ256mr_NOVLX: return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); + case X86::TEST8ri_NOREX: + MI.setDesc(get(X86::TEST8ri)); + return true; case X86::MOV32ri64: MI.setDesc(get(X86::MOV32ri)); return true; diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index 4e11397dec4..67d95c2233d 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -86,6 +86,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::TEST16mr: case X86::TEST32mr: case X86::TEST64mr: + case X86::TEST8ri_NOREX: case X86::AND16i16: case X86::AND16ri: case X86::AND16ri8: diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll index 0cc7849e8e4..a054c1f1edb 100644 --- a/llvm/test/CodeGen/X86/test-shrink.ll +++ b/llvm/test/CodeGen/X86/test-shrink.ll @@ -484,7 +484,8 @@ no: define void @truncand32(i16 inreg %x) nounwind { ; CHECK-LINUX64-LABEL: truncand32: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testl $2049, %edi # imm = 0x801 +; CHECK-LINUX64-NEXT: andl $2049, %edi # imm = 0x801 +; CHECK-LINUX64-NEXT: testw %di, %di ; CHECK-LINUX64-NEXT: je .LBB11_1 ; CHECK-LINUX64-NEXT: # %bb.2: # %no ; CHECK-LINUX64-NEXT: retq @@ -497,7 +498,8 @@ define void @truncand32(i16 inreg %x) nounwind { ; CHECK-WIN32-64-LABEL: truncand32: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testl $2049, %ecx # imm = 0x801 +; CHECK-WIN32-64-NEXT: andl $2049, %ecx # imm = 0x801 +; CHECK-WIN32-64-NEXT: testw %cx, %cx ; CHECK-WIN32-64-NEXT: je .LBB11_1 ; CHECK-WIN32-64-NEXT: # %bb.2: # %no ; CHECK-WIN32-64-NEXT: addq $40, %rsp @@ -509,7 +511,8 @@ define void @truncand32(i16 inreg %x) nounwind { ; ; CHECK-X86-LABEL: truncand32: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: testl $2049, %eax # imm = 0x801 +; CHECK-X86-NEXT: andl $2049, %eax # imm = 0x801 +; CHECK-X86-NEXT: testw %ax, %ax ; CHECK-X86-NEXT: je .LBB11_1 ; CHECK-X86-NEXT: # %bb.2: # %no ; CHECK-X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/testb-je-fusion.ll b/llvm/test/CodeGen/X86/testb-je-fusion.ll index 47453ca6791..9822ad3941d 100644 --- a/llvm/test/CodeGen/X86/testb-je-fusion.ll +++ b/llvm/test/CodeGen/X86/testb-je-fusion.ll @@ -6,8 +6,9 @@ define i32 @check_flag(i32 %flags, ...) nounwind { ; CHECK-LABEL: check_flag: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl $512, %edi # imm = 0x200 +; CHECK-NEXT: testb $2, %ch ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: movl $1, %eax diff --git a/llvm/test/CodeGen/X86/vastart-defs-eflags.ll b/llvm/test/CodeGen/X86/vastart-defs-eflags.ll index 6ef691552aa..4c527a7c6c0 100644 --- a/llvm/test/CodeGen/X86/vastart-defs-eflags.ll +++ b/llvm/test/CodeGen/X86/vastart-defs-eflags.ll @@ -8,7 +8,9 @@ target triple = "x86_64-apple-macosx10.10.0" define i32 @check_flag(i32 %flags, ...) nounwind { ; CHECK-LABEL: check_flag: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $48, %rsp +; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %entry @@ -27,7 +29,7 @@ define i32 @check_flag(i32 %flags, ...) nounwind { ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl $512, %edi ## imm = 0x200 +; CHECK-NEXT: testb $2, %bh ; CHECK-NEXT: je LBB0_4 ; CHECK-NEXT: ## %bb.3: ## %if.then ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax @@ -38,7 +40,8 @@ define i32 @check_flag(i32 %flags, ...) nounwind { ; CHECK-NEXT: movl $8, 0 ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: LBB0_4: ## %if.end -; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: addq $48, %rsp +; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq entry: %and = and i32 %flags, 512 |