summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-01-02 19:44:58 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-01-02 19:44:58 +0000
commit2fdea4c8f1e89f36548a92bb68f003a40c07372c (patch)
tree8370a3b6024f8875ae1bf58af5087c581576db81
parent68b7bb95d48d1b2dfabcb33447e6a1f1b3928f8e (diff)
downloadbcm5719-llvm-2fdea4c8f1e89f36548a92bb68f003a40c07372c.tar.gz
bcm5719-llvm-2fdea4c8f1e89f36548a92bb68f003a40c07372c.zip
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.
We could implement a DAGCombine to turn x * 0x0101 back into logic operations on targets that doesn't support the multiply or it is slow (p4) if someone cares enough. Example code: void test(char *s, int a) { __builtin_memset(s, a, 4); } before: _test: ## @test movzbl 8(%esp), %eax movl %eax, %ecx shll $8, %ecx orl %eax, %ecx movl %ecx, %eax shll $16, %eax orl %ecx, %eax movl 4(%esp), %ecx movl %eax, 4(%ecx) movl %eax, (%ecx) ret after: _test: ## @test movzbl 8(%esp), %eax imull $16843009, %eax, %eax ## imm = 0x1010101 movl 4(%esp), %ecx movl %eax, 4(%ecx) movl %eax, (%ecx) ret llvm-svn: 122707
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp32
-rw-r--r--llvm/test/CodeGen/X86/memset-2.ll11
2 files changed, 28 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9b2a601d65e..b639b7c9cf7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3132,6 +3132,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3140,27 +3151,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = APInt(NumBits, C->getZExtValue() & 255);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
+ APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
if (VT.isInteger())
return DAG.getConstant(Val, VT);
return DAG.getConstantFP(APFloat(Val), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Value = DAG.getNode(ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Value,
- DAG.getConstant(Shift,
- TLI.getShiftAmountTy())),
- Value);
- Shift <<= 1;
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = SplatByte(NumBits, 0x01);
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
return Value;
diff --git a/llvm/test/CodeGen/X86/memset-2.ll b/llvm/test/CodeGen/X86/memset-2.ll
index 692965f6f7e..11f619cd6da 100644
--- a/llvm/test/CodeGen/X86/memset-2.ll
+++ b/llvm/test/CodeGen/X86/memset-2.ll
@@ -17,3 +17,14 @@ entry:
call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
unreachable
}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+ ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+
OpenPOWER on IntegriCloud