diff options
author | Sander de Smalen <sander.desmalen@arm.com> | 2019-06-13 09:37:38 +0000 |
---|---|---|
committer | Sander de Smalen <sander.desmalen@arm.com> | 2019-06-13 09:37:38 +0000 |
commit | 51c2fa0e2ac1ee0afdffb45ff3c2bac4bf09c3cf (patch) | |
tree | 49360c0aef3239b611cbd42593dc54c2f6470098 /llvm/test/Analysis | |
parent | 8d59f5370d46a2b9cb2714a3342eb58bacd52580 (diff) | |
download | bcm5719-llvm-51c2fa0e2ac1ee0afdffb45ff3c2bac4bf09c3cf.tar.gz bcm5719-llvm-51c2fa0e2ac1ee0afdffb45ff3c2bac4bf09c3cf.zip |
Improve reduction intrinsics by overloading result value.
This patch uses the mechanism from D62995 to strengthen the
definitions of the reduction intrinsics by letting the scalar
result/accumulator type be overloaded from the vector element type.
For example:
; The LLVM LangRef specifies that the scalar result must equal the
; vector element type, but this is not checked/enforced by LLVM.
declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a)
This patch changes that into:
declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a)
Which has the type-constraint more explicit and causes LLVM to check
the result type with the vector element type.
Reviewers: RKSimon, arsenm, rnk, greened, aemerson
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D62996
llvm-svn: 363240
Diffstat (limited to 'llvm/test/Analysis')
19 files changed, 4144 insertions, 4144 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll index 8cc6eea4ec4..d42d740dabf 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll @@ -2,278 +2,278 @@ ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE ; COST-LABEL: add.i8.v8i8 -; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v) ; CODE-LABEL: add.i8.v8i8 ; CODE: addv b0, v0.8b define i8 @add.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: add.i8.v16i8 -; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v) ; CODE-LABEL: add.i8.v16i8 ; CODE: addv b0, v0.16b define i8 @add.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: add.i16.v4i16 -; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v) ; CODE-LABEL: add.i16.v4i16 ; CODE: addv h0, v0.4h define i16 @add.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: add.i16.v8i16 -; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v) ; CODE-LABEL: add.i16.v8i16 ; CODE: addv h0, v0.8h define i16 @add.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: add.i32.v4i32 -; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v) ; CODE-LABEL: add.i32.v4i32 ; CODE: addv s0, v0.4s define i32 @add.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v) + %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: umin.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v) ; CODE-LABEL: umin.i8.v8i8 ; CODE: uminv b0, v0.8b define i8 @umin.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: umin.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v) ; CODE-LABEL: umin.i8.v16i8 ; CODE: uminv b0, v0.16b define i8 @umin.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: umin.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v) ; CODE-LABEL: umin.i16.v4i16 ; CODE: uminv h0, v0.4h define i16 @umin.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: umin.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v) ; CODE-LABEL: umin.i16.v8i16 ; CODE: uminv h0, v0.8h define i16 @umin.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: umin.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v) ; CODE-LABEL: umin.i32.v4i32 ; CODE: uminv s0, v0.4s define i32 @umin.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v) + %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: umax.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v) ; CODE-LABEL: umax.i8.v8i8 ; CODE: umaxv b0, v0.8b define i8 @umax.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: umax.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v) ; CODE-LABEL: umax.i8.v16i8 ; CODE: umaxv b0, v0.16b define i8 @umax.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: umax.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v) ; CODE-LABEL: umax.i16.v4i16 ; CODE: umaxv h0, v0.4h define i16 @umax.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: umax.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v) ; CODE-LABEL: umax.i16.v8i16 ; CODE: umaxv h0, v0.8h define i16 @umax.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: umax.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v) ; CODE-LABEL: umax.i32.v4i32 ; CODE: umaxv s0, v0.4s define i32 @umax.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v) + %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: smin.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v) ; CODE-LABEL: smin.i8.v8i8 ; CODE: sminv b0, v0.8b define i8 @smin.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: smin.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v) ; CODE-LABEL: smin.i8.v16i8 ; CODE: sminv b0, v0.16b define i8 @smin.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: smin.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v) ; CODE-LABEL: smin.i16.v4i16 ; CODE: sminv h0, v0.4h define i16 @smin.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: smin.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v) ; CODE-LABEL: smin.i16.v8i16 ; CODE: sminv h0, v0.8h define i16 @smin.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: smin.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v) ; CODE-LABEL: smin.i32.v4i32 ; CODE: sminv s0, v0.4s define i32 @smin.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v) + %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: smax.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v) ; CODE-LABEL: smax.i8.v8i8 ; CODE: smaxv b0, v0.8b define i8 @smax.i8.v8i8(<8 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v) ret i8 %r } ; COST-LABEL: smax.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v) ; CODE-LABEL: smax.i8.v16i8 ; CODE: smaxv b0, v0.16b define i8 @smax.i8.v16i8(<16 x i8> %v) { - %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v) + %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v) ret i8 %r } ; COST-LABEL: smax.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v) ; CODE-LABEL: smax.i16.v4i16 ; CODE: smaxv h0, v0.4h define i16 @smax.i16.v4i16(<4 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v) ret i16 %r } ; COST-LABEL: smax.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v) ; CODE-LABEL: smax.i16.v8i16 ; CODE: smaxv h0, v0.8h define i16 @smax.i16.v8i16(<8 x i16> %v) { - %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v) + %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v) ret i16 %r } ; COST-LABEL: smax.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v) ; CODE-LABEL: smax.i32.v4i32 ; CODE: smaxv s0, v0.4s define i32 @smax.i32.v4i32(<4 x i32> %v) { - %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v) + %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v) ret i32 %r } ; COST-LABEL: fmin.f32.v4f32 -; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v) ; CODE-LABEL: fmin.f32.v4f32 ; CODE: fminnmv s0, v0.4s define float @fmin.f32.v4f32(<4 x float> %v) { - %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v) + %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %r } ; COST-LABEL: fmax.f32.v4f32 -; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v) +; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v) ; CODE-LABEL: fmax.f32.v4f32 ; CODE: fmaxnmv s0, v0.4s define float @fmax.f32.v4f32(<4 x float> %v) { - %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v) + %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %r } -declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>) - -declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) - -declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>) - -declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>) +declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + +declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) + +declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) + +declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) + +declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) + +declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) + +declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll index 5230511b11e..0450cc0843b 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll @@ -10,298 +10,298 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-add.ll b/llvm/test/Analysis/CostModel/X86/reduce-add.ll index bb762233385..066ddb77a20 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-add.ll @@ -10,298 +10,298 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'reduce_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll index 712d9dc9b64..a69b12aa24f 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll @@ -10,369 +10,369 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE-LABEL: 'reduce_i1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and.ll b/llvm/test/Analysis/CostModel/X86/reduce-and.ll index e3d90d271e9..77e8ee57e0c 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-and.ll @@ -10,369 +10,369 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE-LABEL: 'reduce_i1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>) +declare i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll index 1331f78f0b4..9ea2dac8f55 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i64' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i64' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i64' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll index 68f94cdfec9..ce94216f4b4 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll @@ -10,330 +10,330 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i64' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i64' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i64' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i32' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i32' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll index 45f140220ee..d0fb99b12b3 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll @@ -10,369 +10,369 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE-LABEL: 'reduce_i1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or.ll b/llvm/test/Analysis/CostModel/X86/reduce-or.ll index 8858768276a..16978e5185b 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-or.ll @@ -10,369 +10,369 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE-LABEL: 'reduce_i1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>) +declare i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll index 6afa23c9d39..0aea5eee165 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll index 804e62723be..54613791ef6 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll index b1d020b9d28..a12768a0e2a 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll @@ -10,305 +10,305 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll index 9448208f290..777ac088e70 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll index 3b658e7e77e..380c57b7a79 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll index d2fb14277fa..1259bdf8cb6 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll index 7ffbac46ea6..347cae73699 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll index df35f2401c7..b946310e85e 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll @@ -10,314 +10,314 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll index ff0d276c221..69af1f370f2 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll @@ -10,391 +10,391 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll index 8f7dfdd02fb..a97fc21f36c 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll @@ -10,391 +10,391 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) - %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) - %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) - %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) - %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) + %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef) + %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef) + %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef) + %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef) + %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) - %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) - %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) - %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) - %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) + %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef) + %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef) + %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) + %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef) + %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef) - %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) - %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) - %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) - %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) - %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) + %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef) + %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef) + %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef) + %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef) + %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef) + %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef) - %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef) - %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) - %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) - %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) - %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) - %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) + %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef) + %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef) + %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef) + %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef) + %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef) + %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef) + %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef) ret i32 undef } define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; - %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) - %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) - %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) - %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) - %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) - %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) - %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) - %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) + %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef) + %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef) + %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef) + %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef) + %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef) + %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef) + %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef) + %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef) ret i32 undef } -declare i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>) -declare i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>) +declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>) -declare i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>) +declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>) -declare i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>) +declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>) -declare i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>) +declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>) -declare i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>) +declare i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1>) |