diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2014-10-23 21:55:31 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2014-10-23 21:55:31 +0000 |
| commit | 5175bcf43aecf18daf380ecc2a11d1a067eb693e (patch) | |
| tree | b769142a61cd20bd3be6f44b15f119397340dad4 | |
| parent | ff4181adec1b3a708838b7e64ef6fdcd2e1a8612 (diff) | |
| download | bcm5719-llvm-5175bcf43aecf18daf380ecc2a11d1a067eb693e.tar.gz bcm5719-llvm-5175bcf43aecf18daf380ecc2a11d1a067eb693e.zip | |
[X86] Improve mul w/ overflow codegen, to MUL8+SETO.
Currently, @llvm.smul.with.overflow.i8 expands to 9 instructions, where
3 are really needed.
This adds X86ISD::UMUL8/SMUL8 SD nodes, and custom lowers them to
MUL8/IMUL8 + SETO.
i8 is a special case because there is no two/three operand variants of
(I)MUL8, so the first operand and return value need to go in AL/AX.
Also, we can't write patterns for these instructions: TableGen refuses
patterns where output operands don't match SDNode results. In this case,
instructions where the output operand is an implicitly defined register.
A related special case (and FIXME) exists for MUL8 (X86InstrArith.td):
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]
Ideally, these go away with UMUL8, but we still need to improve TableGen
support of implicit operands in patterns.
Before this change:
movsbl %sil, %eax
movsbl %dil, %ecx
imull %eax, %ecx
movb %cl, %al
sarb $7, %al
movzbl %al, %eax
movzbl %ch, %esi
cmpl %eax, %esi
setne %al
After:
movb %dil, %al
imulb %sil
seto %al
Also, remove a made-redundant testcase for PR19858, and enable more FastISel
ALU-overflow tests for SelectionDAG too.
Differential Revision: http://reviews.llvm.org/D5809
llvm-svn: 220516
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 19 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/i8-umulo.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/xaluo.ll | 25 |
5 files changed, 39 insertions, 42 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index c42a05f46ba..4386028e9f5 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2218,6 +2218,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), getI8Imm(ShlVal)); } + case X86ISD::UMUL8: + case X86ISD::SMUL8: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + return nullptr; + } + case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 73cbd563f5a..dbe3c4aee1c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1597,9 +1597,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::UMULO, VT, Custom); } - // There are no 8-bit 3-address imul/mul instructions - setOperationAction(ISD::SMULO, MVT::i8, Expand); - setOperationAction(ISD::UMULO, MVT::i8, Expand); if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -18190,10 +18187,15 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { Cond = X86::COND_B; break; case ISD::SMULO: - BaseOp = X86ISD::SMUL; + BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL; Cond = X86::COND_O; break; case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs + if (N->getValueType(0) == MVT::i8) { + BaseOp = X86ISD::UMUL8; + Cond = X86::COND_O; + break; + } SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), MVT::i32); SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f63b821ce6a..e8e611d0719 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -301,6 +301,9 @@ namespace llvm { UMUL, // LOW, HI, FLAGS = umul LHS, RHS + // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS + SMUL8, UMUL8, + // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, diff --git a/llvm/test/CodeGen/X86/i8-umulo.ll b/llvm/test/CodeGen/X86/i8-umulo.ll deleted file mode 100644 index 1d70f4a8754..00000000000 --- a/llvm/test/CodeGen/X86/i8-umulo.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s -; PR19858 - -declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b) -define i8 @testumulo(i32 %argc) { -; CHECK: imull -; CHECK: testb %{{.+}}, %{{.+}} -; CHECK: je [[NOOVERFLOWLABEL:.+]] -; CHECK: {{.*}}[[NOOVERFLOWLABEL]]: -; CHECK-NEXT: movb -; CHECK-NEXT: retl -top: - %RHS = trunc i32 %argc to i8 - %umul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 25, i8 %RHS) - %ex = extractvalue { i8, i1 } %umul, 1 - br i1 %ex, label %overflow, label %nooverlow - -overflow: - ret i8 %RHS - -nooverlow: - %umul.value = extractvalue { i8, i1 } %umul, 0 - ret i8 %umul.value -} diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll index 6a98037f29d..54a4d6aa35b 100644 --- a/llvm/test/CodeGen/X86/xaluo.ll +++ b/llvm/test/CodeGen/X86/xaluo.ll @@ -123,12 +123,9 @@ entry: ; Check boundary conditions for large immediates. define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) { entry: -; SDAG-LABEL: saddo.i64imm2 -; SDAG: addq $-2147483648, %rdi -; SDAG-NEXT: seto %al -; FAST-LABEL: saddo.i64imm2 -; FAST: addq $-2147483648, %rdi -; FAST-NEXT: seto %al +; CHECK-LABEL: saddo.i64imm2 +; CHECK: addq $-2147483648, %rdi +; CHECK-NEXT: seto %al %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -2147483648) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -297,10 +294,10 @@ entry: ; SMULO define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) { entry: -; FAST-LABEL: smulo.i8 -; FAST: movb %dil, %al -; FAST-NEXT: imulb %sil -; FAST-NEXT: seto %cl +; CHECK-LABEL: smulo.i8 +; CHECK: movb %dil, %al +; CHECK-NEXT: imulb %sil +; CHECK-NEXT: seto %cl %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) %val = extractvalue {i8, i1} %t, 0 %obit = extractvalue {i8, i1} %t, 1 @@ -347,10 +344,10 @@ entry: ; UMULO define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) { entry: -; FAST-LABEL: umulo.i8 -; FAST: movb %dil, %al -; FAST-NEXT: mulb %sil -; FAST-NEXT: seto %cl +; CHECK-LABEL: umulo.i8 +; CHECK: movb %dil, %al +; CHECK-NEXT: mulb %sil +; CHECK-NEXT: seto %cl %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) %val = extractvalue {i8, i1} %t, 0 %obit = extractvalue {i8, i1} %t, 1 |

