diff options
| author | Sam Parker <sam.parker@arm.com> | 2018-11-26 10:22:55 +0000 |
|---|---|---|
| committer | Sam Parker <sam.parker@arm.com> | 2018-11-26 10:22:55 +0000 |
| commit | 5338f7aae42f224b7e0d4b18ff61ed3b32e27074 (patch) | |
| tree | e3792ec764c9c857ada82ab7f92d0c3b754e40e8 | |
| parent | 8cd9d1b5cebe8a694089ac983038966a2fe6a516 (diff) | |
| download | bcm5719-llvm-5338f7aae42f224b7e0d4b18ff61ed3b32e27074.tar.gz bcm5719-llvm-5338f7aae42f224b7e0d4b18ff61ed3b32e27074.zip | |
[ARM] Prevent parallel macs for unsigned values
Both zext and sext are currently allowed during the search for narrow
sequences and sexts operands are later added to the mac candidates.
But operands of muls are also added, without checking whether they're
sext or zext, which means we can generate a signed smlad when we
shouldn't.
Differential Revision: https://reviews.llvm.org/D54790
llvm-svn: 347542
| -rw-r--r-- | llvm/lib/Target/ARM/ARMParallelDSP.cpp | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/smlad0.ll | 80 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/smlald0.ll | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/smlald2.ll | 86 |
4 files changed, 214 insertions, 13 deletions
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp index c43ab7b7238..fc3258914f9 100644 --- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp +++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp @@ -525,26 +525,20 @@ static void MatchParallelMACSequences(Reduction &R, if (!I) return false; - Value *MulOp0, *MulOp1; - switch (I->getOpcode()) { case Instruction::Add: if (Match(I->getOperand(0)) || (Match(I->getOperand(1)))) return true; break; - case Instruction::Mul: - if (match (I, (m_Mul(m_Value(MulOp0), m_Value(MulOp1))))) { + case Instruction::Mul: { + Value *MulOp0 = I->getOperand(0); + Value *MulOp1 = I->getOperand(1); + if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1)) AddMACCandidate(Candidates, I, MulOp0, MulOp1); - return false; - } - break; + return false; + } case Instruction::SExt: - if (match (I, (m_SExt(m_Mul(m_Value(MulOp0), m_Value(MulOp1)))))) { - Instruction *Mul = cast<Instruction>(I->getOperand(0)); - AddMACCandidate(Candidates, Mul, MulOp0, MulOp1); - return false; - } - break; + return Match(I->getOperand(0)); } return false; }; diff --git a/llvm/test/CodeGen/ARM/smlad0.ll b/llvm/test/CodeGen/ARM/smlad0.ll index b9278b4c22b..477f5659c16 100644 --- a/llvm/test/CodeGen/ARM/smlad0.ll +++ b/llvm/test/CodeGen/ARM/smlad0.ll @@ -130,3 +130,83 @@ for.body: %cmp = icmp slt i32 %add29, %arg br i1 %cmp, label %for.body, label %for.cond.cleanup } + +define i32 @one_zext(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +; CHECK-LABEL: @one_zext +; CHECK-NOT: call i32 @llvm.arm.smlad +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = zext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = zext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +define i32 @two_zext(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +; CHECK-LABEL: @two_zext +; CHECK-NOT: call i32 @llvm.arm.smlad +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = zext i16 %2 to i32 + %conv4 = zext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = zext i16 %3 to i32 + %conv8 = zext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} diff --git a/llvm/test/CodeGen/ARM/smlald0.ll b/llvm/test/CodeGen/ARM/smlald0.ll index 6d98c227cfe..97177366d56 100644 --- a/llvm/test/CodeGen/ARM/smlald0.ll +++ b/llvm/test/CodeGen/ARM/smlald0.ll @@ -130,3 +130,44 @@ for.body: %cmp = icmp slt i32 %add29, %arg br i1 %cmp, label %for.body, label %for.cond.cleanup } + +define i64 @reduction_zext(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +; CHECK-LABEL: @reduction_zext +; CHECK-NOT: call i64 @llvm.arm.smlald +; CHECK-NOT: call i32 @llvm.arm.smlad +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i64 [ 0, %entry ], [ %add11, %for.body ] + ret i64 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i64 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i64 + %conv4 = zext i16 %0 to i64 + %mul = mul nsw i64 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i64 + %conv8 = zext i16 %1 to i64 + %mul9 = mul nsw i64 %conv7, %conv8 + %add10 = add i64 %mul, %mac1.026 + %add11 = add i64 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} diff --git a/llvm/test/CodeGen/ARM/smlald2.ll b/llvm/test/CodeGen/ARM/smlald2.ll index bf70489f979..517a9456c0e 100644 --- a/llvm/test/CodeGen/ARM/smlald2.ll +++ b/llvm/test/CodeGen/ARM/smlald2.ll @@ -136,3 +136,89 @@ for.body: %cmp = icmp slt i32 %add29, %arg br i1 %cmp, label %for.body, label %for.cond.cleanup } + +define i64 @zext_mul_reduction(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +; CHECK-LABEL: @zext_mul_reduction +; CHECK-NOT: call i64 @llvm.arm.smlald +; CHECK-NOT: call i32 @llvm.arm.smlad +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i64 [ 0, %entry ], [ %add11, %for.body ] + ret i64 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i64 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = zext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %sext0 = sext i32 %mul to i64 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = zext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %sext1 = sext i32 %mul9 to i64 + %add10 = add i64 %sext0, %mac1.026 + %add11 = add i64 %sext1, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +define i64 @zext_add_reduction(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +; CHECK-LABEL: @zext_add_reduction +; CHECK-NOT: call i64 @llvm.arm.smlald +; CHECK-NOT: call i32 @llvm.arm.smlad +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i64 [ 0, %entry ], [ %add11, %for.body ] + ret i64 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i64 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %sext0 = zext i32 %mul to i64 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %sext1 = zext i32 %mul9 to i64 + %add10 = add i64 %sext0, %mac1.026 + %add11 = add i64 %sext1, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} |

