diff options
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll | 506 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlad0.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlad1.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlad11.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlad6.ll | 50 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlad7.ll | 53 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll | 23 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlald0.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlald1.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlald2.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll | 21 |
12 files changed, 559 insertions, 150 deletions
diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll b/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll new file mode 100644 index 00000000000..47047c7f44b --- /dev/null +++ b/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll @@ -0,0 +1,506 @@ +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -verify -S | FileCheck %s +; +; Alias check: check that the rewrite isn't triggered when there's a store +; instruction possibly aliasing any mul load operands; arguments are passed +; without 'restrict' enabled. +; +; CHECK-NOT: call i32 @llvm.arm.smlad +; +define dso_local i32 @no_restrict(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + +; Store inserted here, aliasing with arrayidx, arrayidx1, arrayidx3 + store i16 42, i16* %arrayidx, align 2 + + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; Alias check: check that the rewrite isn't triggered when there's a store +; aliasing one of the mul load operands. Arguments are now annotated with +; 'noalias'. +; +; CHECK-NOT: call i32 @llvm.arm.smlad +; +define dso_local i32 @restrict(i32 %arg, i32* noalias %arg1, i16* noalias readonly %arg2, i16* noalias readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + +; Store inserted here, aliasing only with loads from 'arrayidx'. + store i16 42, i16* %arrayidx, align 2 + + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + +; Here the Mul is the LHS, and the Add the RHS. + %add11 = add i32 %mul9, %add10 + + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_dominates_all +; CHECK: store +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: smlad +define dso_local i32 @store_dominates_all(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + store i16 42, i16* %arrayidx, align 2 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: loads_dominate +; CHECK-NOT: store +; CHECK: load i32 +; CHECK-NOT: store +; CHECK: load i32 +; CHECK: store +define dso_local i32 @loads_dominate(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + store i16 42, i16* %arrayidx, align 2 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_alias_arg3_legal_1 +; CHECK-NOT: store +; CHECK: phi i32 +; CHECK: [[IV:%[^ ]+]] = phi i32 [ %add +; CHECK: [[ARG3_GEP:%[^ ]+]] = getelementptr inbounds i16, i16* %arg3, i32 [[IV]] +; CHECK: [[ARG3:%[^ ]+]] = bitcast i16* [[ARG3_GEP]] to i32* +; CHECK: load i32, i32* [[ARG3]] +; CHECK: [[ARG2_GEP:%[^ ]+]] = getelementptr inbounds i16, i16* %arg2, i32 [[IV]] +; CHECK: [[ARG2:%[^ ]+]] = bitcast i16* [[ARG2_GEP]] to i32* +; CHECK: load i32, i32* [[ARG2]] +; CHECK: store +define dso_local i32 @store_alias_arg3_legal_1(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + store i16 42, i16* %arrayidx, align 2 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_alias_arg3_legal_2 +; CHECK-NOT: store +; CHECK: [[BITCAST:[^ ]+]] = bitcast i16* %arrayidx to i32* +; CHECK: load i32, i32* [[BITCAST]] +; CHECK: store i16 42, i16* %arrayidx +; CHECK: [[BITCAST3:[^ ]+]] = bitcast i16* %arrayidx3 to i32* +; CHECK: load i32, i32* [[BITCAST3]] +; CHECK: smlad +define dso_local i32 @store_alias_arg3_legal_2(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + store i16 42, i16* %arrayidx, align 2 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_alias_arg3_illegal_1 +; CHECK-NOT: load i32 +define dso_local i32 @store_alias_arg3_illegal_1(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* noalias nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + store i16 42, i16* %arrayidx1, align 2 + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_alias_arg3_illegal_2 +; CHECK-NOT: load i32 +define dso_local i32 @store_alias_arg3_illegal_2(i32 %arg, i32* nocapture %arg1, i16* noalias nocapture readonly %arg2, i16* noalias nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + store i16 42, i16* %arrayidx, align 2 + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_alias_arg2_illegal_1 +; CHECK-NOT: load i32 +define dso_local i32 @store_alias_arg2_illegal_1(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + store i16 42, i16* %arrayidx6, align 2 + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: store_alias_arg2_illegal_2 +; CHECK-NOT: load i32 +define dso_local i32 @store_alias_arg2_illegal_2(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { +entry: + %cmp24 = icmp sgt i32 %arg, 0 + br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %.pre = load i16, i16* %arg3, align 2 + %.pre27 = load i16, i16* %arg2, align 2 + br label %for.body + +for.cond.cleanup: + %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] + ret i32 %mac1.0.lcssa + +for.body: + %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] + %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 + %0 = load i16, i16* %arrayidx, align 2 + %add = add nuw nsw i32 %i.025, 1 + %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add + %1 = load i16, i16* %arrayidx1, align 2 + %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 + %2 = load i16, i16* %arrayidx3, align 2 + %conv = sext i16 %2 to i32 + %conv4 = sext i16 %0 to i32 + %mul = mul nsw i32 %conv, %conv4 + %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add + store i16 42, i16* %arrayidx3, align 2 + %3 = load i16, i16* %arrayidx6, align 2 + %conv7 = sext i16 %3 to i32 + %conv8 = sext i16 %1 to i32 + %mul9 = mul nsw i32 %conv7, %conv8 + %add10 = add i32 %mul, %mac1.026 + %add11 = add i32 %mul9, %add10 + %exitcond = icmp ne i32 %add, %arg + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: one_pair_alias +; FIXME: This tests shows we have a bug with smlad insertion +define i32 @one_pair_alias(i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %add26 + +for.body: ; preds = %for.body, %entry + %i.050 = phi i32 [ 0, %entry ], [ %add27, %for.body ] + %a.049 = phi i32 [ 0, %entry ], [ %add26, %for.body ] + %add3 = or i32 %i.050, 1 + %add11 = or i32 %i.050, 2 + %add19 = or i32 %i.050, 3 + %arrayidx = getelementptr inbounds i16, i16* %b, i32 %i.050 + %arrayidx4 = getelementptr inbounds i16, i16* %b, i32 %add3 + %arrayidx12 = getelementptr inbounds i16, i16* %b, i32 %add11 + %arrayidx20 = getelementptr inbounds i16, i16* %b, i32 %add19 + %arrayidx1 = getelementptr inbounds i16, i16* %c, i32 %i.050 + %arrayidx7 = getelementptr inbounds i16, i16* %c, i32 %add3 + %arrayidx15 = getelementptr inbounds i16, i16* %c, i32 %add11 + %arrayidx23 = getelementptr inbounds i16, i16* %c, i32 %add19 + %tmp = load i16, i16* %arrayidx, align 2 + %tmp2 = load i16, i16* %arrayidx4, align 2 + %tmp4 = load i16, i16* %arrayidx12, align 2 + %tmp6 = load i16, i16* %arrayidx20, align 2 + %tmp1 = load i16, i16* %arrayidx1, align 2 + store i16 43, i16 *%arrayidx7 + %tmp3 = load i16, i16* %arrayidx7, align 2 + %tmp5 = load i16, i16* %arrayidx15, align 2 + %tmp7 = load i16, i16* %arrayidx23, align 2 + %conv = sext i16 %tmp to i32 + %conv2 = sext i16 %tmp1 to i32 + %mul = mul nsw i32 %conv2, %conv + %add = add nsw i32 %mul, %a.049 + %conv5 = sext i16 %tmp2 to i32 + %conv8 = sext i16 %tmp3 to i32 + %mul9 = mul nsw i32 %conv8, %conv5 + %add10 = add nsw i32 %add, %mul9 + %conv13 = sext i16 %tmp4 to i32 + %conv16 = sext i16 %tmp5 to i32 + %mul17 = mul nsw i32 %conv16, %conv13 + %add18 = add nsw i32 %add10, %mul17 + %conv21 = sext i16 %tmp6 to i32 + %conv24 = sext i16 %tmp7 to i32 + %mul25 = mul nsw i32 %conv24, %conv21 + %add26 = add nsw i32 %add18, %mul25 + %add27 = add nuw nsw i32 %i.050, 4 + %cmp = icmp ult i32 %add27, 100 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlad0.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlad0.ll index acd694fb7a8..5b3207d8532 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlad0.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlad0.ll @@ -11,11 +11,11 @@ define dso_local i32 @OneReduction(i32 %arg, i32* nocapture readnone %arg1, i16* ; ; CHECK-LABEL: @OneReduction ; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] -; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* +; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx to i32* ; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 -; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx to i32* +; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* ; CHECK: [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2 -; CHECK: [[V8]] = call i32 @llvm.arm.smlad(i32 [[V5]], i32 [[V7]], i32 %mac1{{\.}}026) +; CHECK: [[V8]] = call i32 @llvm.arm.smlad(i32 [[V7]], i32 [[V5]], i32 %mac1{{\.}}026) ; CHECK-NOT: call i32 @llvm.arm.smlad ; ; CHECK-UNSUPPORTED-NOT: call i32 @llvm.arm.smlad diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlad1.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlad1.ll index 60179f22374..6bce049eafb 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlad1.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlad1.ll @@ -2,11 +2,11 @@ ; CHECK-LABEL: @test1 ; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] -; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* +; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx to i32* ; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 -; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx to i32* +; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* ; CHECK: [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2 -; CHECK: [[V8]] = call i32 @llvm.arm.smlad(i32 [[V5]], i32 [[V7]], i32 %mac1{{\.}}026) +; CHECK: [[V8]] = call i32 @llvm.arm.smlad(i32 [[V7]], i32 [[V5]], i32 %mac1{{\.}}026) define dso_local i32 @test1(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlad11.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlad11.ll index 04586e66930..b17106e70ed 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlad11.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlad11.ll @@ -4,15 +4,15 @@ ; A more complicated chain: 4 mul operations, so we expect 2 smlad calls. ; ; CHECK: %mac1{{\.}}054 = phi i32 [ [[V17:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] -; CHECK: [[V8:%[0-9]+]] = bitcast i16* %arrayidx8 to i32* -; CHECK: [[V9:%[0-9]+]] = load i32, i32* [[V8]], align 2 ; CHECK: [[V10:%[0-9]+]] = bitcast i16* %arrayidx to i32* ; CHECK: [[V11:%[0-9]+]] = load i32, i32* [[V10]], align 2 -; CHECK: [[V12:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V9]], i32 [[V11]], i32 %mac1{{\.}}054) -; CHECK: [[V13:%[0-9]+]] = bitcast i16* %arrayidx17 to i32* -; CHECK: [[V14:%[0-9]+]] = load i32, i32* [[V13]], align 2 ; CHECK: [[V15:%[0-9]+]] = bitcast i16* %arrayidx4 to i32* ; CHECK: [[V16:%[0-9]+]] = load i32, i32* [[V15]], align 2 +; CHECK: [[V8:%[0-9]+]] = bitcast i16* %arrayidx8 to i32* +; CHECK: [[V9:%[0-9]+]] = load i32, i32* [[V8]], align 2 +; CHECK: [[V13:%[0-9]+]] = bitcast i16* %arrayidx17 to i32* +; CHECK: [[V14:%[0-9]+]] = load i32, i32* [[V13]], align 2 +; CHECK: [[V12:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V9]], i32 [[V11]], i32 %mac1{{\.}}054) ; CHECK: [[V17:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V14]], i32 [[V16]], i32 [[V12]]) ; ; And we don't want to see a 3rd smlad: diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlad6.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlad6.ll deleted file mode 100644 index 421036ecfc0..00000000000 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlad6.ll +++ /dev/null @@ -1,50 +0,0 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s -; -; Alias check: check that the rewrite isn't triggered when there's a store -; instruction possibly aliasing any mul load operands; arguments are passed -; without 'restrict' enabled. -; -; CHECK-NOT: call i32 @llvm.arm.smlad -; -define dso_local i32 @test(i32 %arg, i32* nocapture %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { -entry: - %cmp24 = icmp sgt i32 %arg, 0 - br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 - br label %for.body - -for.cond.cleanup: - %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] - ret i32 %mac1.0.lcssa - -for.body: - %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] - %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 - -; Store inserted here, aliasing with arrayidx, arrayidx1, arrayidx3 - store i16 42, i16* %arrayidx, align 2 - - %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 - %conv = sext i16 %2 to i32 - %conv4 = sext i16 %0 to i32 - %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 - %conv7 = sext i16 %3 to i32 - %conv8 = sext i16 %1 to i32 - %mul9 = mul nsw i32 %conv7, %conv8 - %add10 = add i32 %mul, %mac1.026 - %add11 = add i32 %mul9, %add10 - %exitcond = icmp ne i32 %add, %arg - br i1 %exitcond, label %for.body, label %for.cond.cleanup -} - diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlad7.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlad7.ll deleted file mode 100644 index 76c7d676f69..00000000000 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlad7.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s -; -; Alias check: check that the rewrite isn't triggered when there's a store -; aliasing one of the mul load operands. Arguments are now annotated with -; 'noalias'. -; -; CHECK-NOT: call i32 @llvm.arm.smlad -; -define dso_local i32 @test(i32 %arg, i32* noalias %arg1, i16* noalias readonly %arg2, i16* noalias readonly %arg3) { -entry: - %cmp24 = icmp sgt i32 %arg, 0 - br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: - %.pre = load i16, i16* %arg3, align 2 - %.pre27 = load i16, i16* %arg2, align 2 - br label %for.body - -for.cond.cleanup: - %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ] - ret i32 %mac1.0.lcssa - -for.body: - %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ] - %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %arg3, i32 %i.025 - %0 = load i16, i16* %arrayidx, align 2 - -; Store inserted here, aliasing only with loads from 'arrayidx'. - store i16 42, i16* %arrayidx, align 2 - - %add = add nuw nsw i32 %i.025, 1 - %arrayidx1 = getelementptr inbounds i16, i16* %arg3, i32 %add - %1 = load i16, i16* %arrayidx1, align 2 - %arrayidx3 = getelementptr inbounds i16, i16* %arg2, i32 %i.025 - %2 = load i16, i16* %arrayidx3, align 2 - %conv = sext i16 %2 to i32 - %conv4 = sext i16 %0 to i32 - %mul = mul nsw i32 %conv, %conv4 - %arrayidx6 = getelementptr inbounds i16, i16* %arg2, i32 %add - %3 = load i16, i16* %arrayidx6, align 2 - %conv7 = sext i16 %3 to i32 - %conv8 = sext i16 %1 to i32 - %mul9 = mul nsw i32 %conv7, %conv8 - %add10 = add i32 %mul, %mac1.026 - -; Here the Mul is the LHS, and the Add the RHS. - %add11 = add i32 %mul9, %add10 - - %exitcond = icmp ne i32 %add, %arg - br i1 %exitcond, label %for.body, label %for.cond.cleanup -} - diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll index a194c667d4e..aa012573606 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smladx-1.ll @@ -8,15 +8,15 @@ define i32 @smladx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, ; CHECK-LABEL: smladx ; CHECK: = phi i32 [ 0, %for.body.preheader.new ], ; CHECK: [[ACC0:%[^ ]+]] = phi i32 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] +; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* +; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 +; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* +; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 ; CHECK: [[PIN23:%[^ ]+]] = bitcast i16* %pIn2.3 to i32* ; CHECK: [[IN23:%[^ ]+]] = load i32, i32* [[PIN23]], align 2 ; CHECK: [[PIN12:%[^ ]+]] = bitcast i16* %pIn1.2 to i32* ; CHECK: [[IN12:%[^ ]+]] = load i32, i32* [[PIN12]], align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[IN23]], i32 [[IN12]], i32 [[ACC0]]) -; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* -; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 -; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* -; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 ; CHECK: [[ACC2]] = call i32 @llvm.arm.smladx(i32 [[IN21]], i32 [[IN10]], i32 [[ACC1]]) ; CHECK-NOT: call i32 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i32 @llvm.arm.smlad @@ -124,20 +124,21 @@ define i32 @smladx_swap(i16* nocapture readonly %pIn1, i16* nocapture readonly % ; CHECK: [[IV:%[^ ]+]] = phi i32 ; CHECK: [[ACC0:%[^ ]+]] = phi i32 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 -; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 +; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* +; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 +; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 +; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* +; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 ; CHECK: [[PIN2_2_CAST:%[^ ]+]] = bitcast i16* [[PIN2_2]] to i32* ; CHECK: [[IN2_2:%[^ ]+]] = load i32, i32* [[PIN2_2_CAST]], align 2 + ; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* ; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 -; CHECK: [[ACC1:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[IN2_2]], i32 [[IN1]], i32 [[ACC0]]) -; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* -; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 -; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* -; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[ACC1:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[IN2_2]], i32 [[IN1]], i32 [[ACC0]]) ; CHECK: [[ACC2]] = call i32 @llvm.arm.smladx(i32 [[IN2]], i32 [[IN1_2]], i32 [[ACC1]]) ; CHECK: [[PIN1_NEXT]] = getelementptr i16, i16* [[PIN1]], i32 4 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlald0.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlald0.ll index 4db6b91006b..45f3dbdf833 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlald0.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlald0.ll @@ -11,10 +11,10 @@ define dso_local i64 @OneReduction(i32 %arg, i32* nocapture readnone %arg1, i16* ; ; CHECK-LABEL: @OneReduction ; CHECK: %mac1{{\.}}026 = phi i64 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] -; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* -; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 ; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx to i32* ; CHECK: [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2 +; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* +; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 ; CHECK: [[V8]] = call i64 @llvm.arm.smlald(i32 [[V5]], i32 [[V7]], i64 %mac1{{\.}}026) ; CHECK-NOT: call i64 @llvm.arm.smlald ; diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlald1.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlald1.ll index 61435e97674..f591cba9843 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlald1.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlald1.ll @@ -2,10 +2,10 @@ ; CHECK-LABEL: @test1 ; CHECK: %mac1{{\.}}026 = phi i64 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] -; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* -; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 ; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx to i32* ; CHECK: [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2 +; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* +; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 ; CHECK: [[V8]] = call i64 @llvm.arm.smlald(i32 [[V5]], i32 [[V7]], i64 %mac1{{\.}}026) define dso_local i64 @test1(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlald2.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlald2.ll index 517a9456c0e..5fe37ce56c0 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlald2.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlald2.ll @@ -10,10 +10,10 @@ define dso_local i64 @OneReduction(i32 %arg, i32* nocapture readnone %arg1, i16* ; ; CHECK-LABEL: @OneReduction ; CHECK: %mac1{{\.}}026 = phi i64 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] -; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* -; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 ; CHECK: [[V6:%[0-9]+]] = bitcast i16* %arrayidx to i32* ; CHECK: [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2 +; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* +; CHECK: [[V5:%[0-9]+]] = load i32, i32* [[V4]], align 2 ; CHECK: [[V8]] = call i64 @llvm.arm.smlald(i32 [[V5]], i32 [[V7]], i64 %mac1{{\.}}026) ; CHECK-NOT: call i64 @llvm.arm.smlald ; diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll index e615f209f57..e325fe2f2a8 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-1.ll @@ -7,15 +7,15 @@ define i64 @smlaldx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2 ; CHECK-LABEL: smlaldx ; CHECK: = phi i32 [ 0, %for.body.preheader.new ], ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] +; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* +; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 +; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* +; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 ; CHECK: [[PIN23:%[^ ]+]] = bitcast i16* %pIn2.3 to i32* ; CHECK: [[IN23:%[^ ]+]] = load i32, i32* [[PIN23]], align 2 ; CHECK: [[PIN12:%[^ ]+]] = bitcast i16* %pIn1.2 to i32* ; CHECK: [[IN12:%[^ ]+]] = load i32, i32* [[PIN12]], align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN23]], i32 [[IN12]], i64 [[ACC0]]) -; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* -; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 -; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* -; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN21]], i32 [[IN10]], i64 [[ACC1]]) ; CHECK-NOT: call i64 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i64 @llvm.arm.smlad @@ -181,19 +181,21 @@ for.cond.cleanup: ; CHECK: [[IV:%[^ ]+]] = phi i32 ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] +; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* +; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 + ; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 -; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 +; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* +; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 ; CHECK: [[PIN2_2_CAST:%[^ ]+]] = bitcast i16* [[PIN2_2]] to i32* ; CHECK: [[IN2_2:%[^ ]+]] = load i32, i32* [[PIN2_2_CAST]], align 2 + ; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* ; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 -; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN2_2]], i32 [[IN1]], i64 [[ACC0]]) -; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* -; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 -; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* -; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 +; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN2_2]], i32 [[IN1]], i64 [[ACC0]]) ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN2]], i32 [[IN1_2]], i64 [[ACC1]]) ; CHECK: [[PIN1_NEXT]] = getelementptr i16, i16* [[PIN1]], i32 4 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll index a4b5a272dc6..971c85f1b66 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/smlaldx-2.ll @@ -7,15 +7,15 @@ define i64 @smlaldx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2 ; CHECK-LABEL: smlaldx ; CHECK: = phi i32 [ 0, %for.body.preheader.new ], ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] +; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* +; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 +; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* +; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 ; CHECK: [[PIN23:%[^ ]+]] = bitcast i16* %pIn2.3 to i32* ; CHECK: [[IN23:%[^ ]+]] = load i32, i32* [[PIN23]], align 2 ; CHECK: [[PIN12:%[^ ]+]] = bitcast i16* %pIn1.2 to i32* ; CHECK: [[IN12:%[^ ]+]] = load i32, i32* [[PIN12]], align 2 ; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN23]], i32 [[IN12]], i64 [[ACC0]]) -; CHECK: [[PIN21:%[^ ]+]] = bitcast i16* %pIn2.1 to i32* -; CHECK: [[IN21:%[^ ]+]] = load i32, i32* [[PIN21]], align 2 -; CHECK: [[PIN10:%[^ ]+]] = bitcast i16* %pIn1.0 to i32* -; CHECK: [[IN10:%[^ ]+]] = load i32, i32* [[PIN10]], align 2 ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN21]], i32 [[IN10]], i64 [[ACC1]]) ; CHECK-NOT: call i64 @llvm.arm.smlad ; CHECK-UNSUPPORTED-NOT: call i64 @llvm.arm.smlad @@ -180,19 +180,22 @@ for.cond.cleanup: ; CHECK: [[PIN1:%[^ ]+]] = phi i16* [ [[PIN1_NEXT:%[^ ]+]], %for.body ], [ [[PIN1Base]], %for.body.preheader.new ] ; CHECK: [[IV:%[^ ]+]] = phi i32 ; CHECK: [[ACC0:%[^ ]+]] = phi i64 [ 0, %for.body.preheader.new ], [ [[ACC2:%[^ ]+]], %for.body ] -; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 -; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 ; CHECK: [[PIN2_CAST:%[^ ]+]] = bitcast i16* [[PIN2]] to i32* ; CHECK: [[IN2:%[^ ]+]] = load i32, i32* [[PIN2_CAST]], align 2 + +; CHECK: [[PIN1_2:%[^ ]+]] = getelementptr i16, i16* [[PIN1]], i32 -2 ; CHECK: [[PIN1_2_CAST:%[^ ]+]] = bitcast i16* [[PIN1_2]] to i32* ; CHECK: [[IN1_2:%[^ ]+]] = load i32, i32* [[PIN1_2_CAST]], align 2 -; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN2]], i32 [[IN1_2]], i64 [[ACC0]]) -; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* -; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 +; CHECK: [[PIN2_2:%[^ ]+]] = getelementptr i16, i16* [[PIN2]], i32 -2 ; CHECK: [[PIN2_2_CAST:%[^ ]+]] = bitcast i16* [[PIN2_2]] to i32* ; CHECK: [[IN2_2:%[^ ]+]] = load i32, i32* [[PIN2_2_CAST]], align 2 + +; CHECK: [[PIN1_CAST:%[^ ]+]] = bitcast i16* [[PIN1]] to i32* +; CHECK: [[IN1:%[^ ]+]] = load i32, i32* [[PIN1_CAST]], align 2 + +; CHECK: [[ACC1:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[IN2]], i32 [[IN1_2]], i64 [[ACC0]]) ; CHECK: [[ACC2]] = call i64 @llvm.arm.smlaldx(i32 [[IN1]], i32 [[IN2_2]], i64 [[ACC1]]) ; CHECK: [[PIN1_NEXT]] = getelementptr i16, i16* [[PIN1]], i32 4 |