summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2018-10-19 19:08:06 +0000
committerThomas Lively <tlively@google.com>2018-10-19 19:08:06 +0000
commit11a332d08d9c1ccfd17a14cb47e209444fd48df5 (patch)
tree04e15be2220de0c1f20be2cbb70fbb48867c991e
parent2bfe759a8d7f4e544ac0f079d447f33c0b4dcf9c (diff)
downloadbcm5719-llvm-11a332d08d9c1ccfd17a14cb47e209444fd48df5.tar.gz
bcm5719-llvm-11a332d08d9c1ccfd17a14cb47e209444fd48df5.zip
[WebAssembly] Handle undefined lane indices in SIMD patterns
Summary: Undefined indices in shuffles can be used when not all lanes of the output vector will be used. This happens for example in the expansion of vector reduce operations. Regardless, undefs are legal as lane indices in IR and should be supported. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D53057 llvm-svn: 344803
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td36
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd.ll266
3 files changed, 306 insertions, 2 deletions
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 30c2e843408..6ca619c910a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -990,8 +990,10 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Expand mask indices to byte indices and materialize them as operands
for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
for (size_t J = 0; J < LaneBytes; ++J) {
- Ops[OpIdx++] =
- DAG.getConstant((uint64_t)Mask[I] * LaneBytes + J, DL, MVT::i32);
+ // Lower undefs (represented by -1 in mask) to zero
+ uint64_t ByteIndex =
+ Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
+ Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index b0fd6cab229..95c87266273 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -181,6 +181,28 @@ def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
(EXTRACT_LANE_v8i16_u V128:$vec, (i32 LaneIdx8:$idx))>;
+// Lower undef lane indices to zero
+def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
+ (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
+ (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), undef),
+ (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), undef),
+ (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), undef),
+ (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), undef),
+ (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+
// Replace lane value: replace_lane
multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
WebAssemblyRegClass reg_t, ValueType lane_t,
@@ -201,6 +223,20 @@ defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 20>;
defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 21>;
defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 22>;
+// Lower undef lane indices to zero
+def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
+ (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
+def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
+ (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
+def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
+ (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
+
// Arbitrary other BUILD_VECTOR patterns
def : Pat<(v16i8 (build_vector
(i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index 193e3120b9e..1e1feeb35df 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -54,6 +54,18 @@ define i32 @extract_v16i8_s(<16 x i8> %v) {
ret i32 %a
}
+; CHECK-LABEL: extract_undef_v16i8_s:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @extract_undef_v16i8_s(<16 x i8> %v) {
+ %elem = extractelement <16 x i8> %v, i8 undef
+ %a = sext i8 %elem to i32
+ ret i32 %a
+}
+
; CHECK-LABEL: extract_v16i8_u:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .param v128{{$}}
@@ -66,6 +78,18 @@ define i32 @extract_v16i8_u(<16 x i8> %v) {
ret i32 %a
}
+; CHECK-LABEL: extract_undef_v16i8_u:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @extract_undef_v16i8_u(<16 x i8> %v) {
+ %elem = extractelement <16 x i8> %v, i8 undef
+ %a = zext i8 %elem to i32
+ ret i32 %a
+}
+
; CHECK-LABEL: extract_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .param v128{{$}}
@@ -77,6 +101,17 @@ define i8 @extract_v16i8(<16 x i8> %v) {
ret i8 %elem
}
+; CHECK-LABEL: extract_undef_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i8 @extract_undef_v16i8(<16 x i8> %v) {
+ %elem = extractelement <16 x i8> %v, i8 undef
+ ret i8 %elem
+}
+
; CHECK-LABEL: replace_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .param v128, i32{{$}}
@@ -88,6 +123,17 @@ define <16 x i8> @replace_v16i8(<16 x i8> %v, i8 %x) {
ret <16 x i8> %res
}
+; CHECK-LABEL: replace_undef_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <16 x i8> @replace_undef_v16i8(<16 x i8> %v, i8 %x) {
+ %res = insertelement <16 x i8> %v, i8 %x, i32 undef
+ ret <16 x i8> %res
+}
+
; CHECK-LABEL: shuffle_v16i8:
; NO-SIMD128-NOT: v8x16
; SIMD128-NEXT: .param v128, v128{{$}}
@@ -102,6 +148,22 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
ret <16 x i8> %res
}
+; CHECK-LABEL: shuffle_undef_v16i8:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
+ %res = shufflevector <16 x i8> %x, <16 x i8> %y,
+ <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %res
+}
+
; CHECK-LABEL: build_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}}
@@ -190,6 +252,18 @@ define i32 @extract_v8i16_s(<8 x i16> %v) {
ret i32 %a
}
+; CHECK-LABEL: extract_undef_v8i16_s:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @extract_undef_v8i16_s(<8 x i16> %v) {
+ %elem = extractelement <8 x i16> %v, i16 undef
+ %a = sext i16 %elem to i32
+ ret i32 %a
+}
+
; CHECK-LABEL: extract_v8i16_u:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .param v128{{$}}
@@ -202,6 +276,18 @@ define i32 @extract_v8i16_u(<8 x i16> %v) {
ret i32 %a
}
+; CHECK-LABEL: extract_undef_v8i16_u:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @extract_undef_v8i16_u(<8 x i16> %v) {
+ %elem = extractelement <8 x i16> %v, i16 undef
+ %a = zext i16 %elem to i32
+ ret i32 %a
+}
+
; CHECK-LABEL: extract_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .param v128{{$}}
@@ -213,6 +299,17 @@ define i16 @extract_v8i16(<8 x i16> %v) {
ret i16 %elem
}
+; CHECK-LABEL: extract_undef_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i16 @extract_undef_v8i16(<8 x i16> %v) {
+ %elem = extractelement <8 x i16> %v, i16 undef
+ ret i16 %elem
+}
+
; CHECK-LABEL: replace_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .param v128, i32{{$}}
@@ -224,6 +321,17 @@ define <8 x i16> @replace_v8i16(<8 x i16> %v, i16 %x) {
ret <8 x i16> %res
}
+; CHECK-LABEL: replace_undef_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @replace_undef_v8i16(<8 x i16> %v, i16 %x) {
+ %res = insertelement <8 x i16> %v, i16 %x, i32 undef
+ ret <8 x i16> %res
+}
+
; CHECK-LABEL: shuffle_v8i16:
; NO-SIMD128-NOT: v8x16
; SIMD128-NEXT: .param v128, v128{{$}}
@@ -237,6 +345,20 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) {
ret <8 x i16> %res
}
+; CHECK-LABEL: shuffle_undef_v8i16:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-SAME: 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
+ %res = shufflevector <8 x i16> %x, <8 x i16> %y,
+ <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i16> %res
+}
+
; CHECK-LABEL: build_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .param i32, i32, i32, i32, i32, i32, i32, i32{{$}}
@@ -305,6 +427,17 @@ define i32 @extract_v4i32(<4 x i32> %v) {
ret i32 %elem
}
+; CHECK-LABEL: extract_undef_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i32{{$}}
+; SIMD128-NEXT: i32x4.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @extract_undef_v4i32(<4 x i32> %v) {
+ %elem = extractelement <4 x i32> %v, i32 undef
+ ret i32 %elem
+}
+
; CHECK-LABEL: replace_v4i32:
; NO-SIMD128-NOT: i32x4
; SIMD128-NEXT: .param v128, i32{{$}}
@@ -316,6 +449,17 @@ define <4 x i32> @replace_v4i32(<4 x i32> %v, i32 %x) {
ret <4 x i32> %res
}
+; CHECK-LABEL: replace_undef_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @replace_undef_v4i32(<4 x i32> %v, i32 %x) {
+ %res = insertelement <4 x i32> %v, i32 %x, i32 undef
+ ret <4 x i32> %res
+}
+
; CHECK-LABEL: shuffle_v4i32:
; NO-SIMD128-NOT: v8x16
; SIMD128-NEXT: .param v128, v128{{$}}
@@ -329,6 +473,19 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) {
ret <4 x i32> %res
}
+; CHECK-LABEL: shuffle_undef_v4i32:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
+ %res = shufflevector <4 x i32> %x, <4 x i32> %y,
+ <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ ret <4 x i32> %res
+}
+
; CHECK-LABEL: build_v4i32:
; NO-SIMD128-NOT: i32x4
; SIMD128-NEXT: .param i32, i32, i32, i32{{$}}
@@ -390,6 +547,18 @@ define i64 @extract_v2i64(<2 x i64> %v) {
ret i64 %elem
}
+; CHECK-LABEL: extract_undef_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-VM-NOT: i64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result i64{{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @extract_undef_v2i64(<2 x i64> %v) {
+ %elem = extractelement <2 x i64> %v, i64 undef
+ ret i64 %elem
+}
+
; CHECK-LABEL: replace_v2i64:
; NO-SIMD128-NOT: i64x2
; SIMD128-VM-NOT: i64x2
@@ -402,6 +571,18 @@ define <2 x i64> @replace_v2i64(<2 x i64> %v, i64 %x) {
ret <2 x i64> %res
}
+; CHECK-LABEL: replace_undef_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-VM-NOT: i64x2
+; SIMD128-NEXT: .param v128, i64{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @replace_undef_v2i64(<2 x i64> %v, i64 %x) {
+ %res = insertelement <2 x i64> %v, i64 %x, i32 undef
+ ret <2 x i64> %res
+}
+
; CHECK-LABEL: shuffle_v2i64:
; NO-SIMD128-NOT: v8x16
; SIMD128-NEXT: .param v128, v128{{$}}
@@ -414,6 +595,19 @@ define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) {
ret <2 x i64> %res
}
+; CHECK-LABEL: shuffle_undef_v2i64:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
+ %res = shufflevector <2 x i64> %x, <2 x i64> %y,
+ <2 x i32> <i32 1, i32 undef>
+ ret <2 x i64> %res
+}
+
; CHECK-LABEL: build_v2i64:
; NO-SIMD128-NOT: i64x2
; SIMD128-VM-NOT: i64x2
@@ -472,6 +666,17 @@ define float @extract_v4f32(<4 x float> %v) {
ret float %elem
}
+; CHECK-LABEL: extract_undef_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result f32{{$}}
+; SIMD128-NEXT: f32x4.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define float @extract_undef_v4f32(<4 x float> %v) {
+ %elem = extractelement <4 x float> %v, i32 undef
+ ret float %elem
+}
+
; CHECK-LABEL: replace_v4f32:
; NO-SIMD128-NOT: f32x4
; SIMD128-NEXT: .param v128, f32{{$}}
@@ -483,6 +688,17 @@ define <4 x float> @replace_v4f32(<4 x float> %v, float %x) {
ret <4 x float> %res
}
+; CHECK-LABEL: replace_undef_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128, f32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @replace_undef_v4f32(<4 x float> %v, float %x) {
+ %res = insertelement <4 x float> %v, float %x, i32 undef
+ ret <4 x float> %res
+}
+
; CHECK-LABEL: shuffle_v4f32:
; NO-SIMD128-NOT: v8x16
; SIMD128-NEXT: .param v128, v128{{$}}
@@ -496,6 +712,19 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %res
}
+; CHECK-LABEL: shuffle_undef_v4f32:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
+ %res = shufflevector <4 x float> %x, <4 x float> %y,
+ <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ ret <4 x float> %res
+}
+
; CHECK-LABEL: build_v4f32:
; NO-SIMD128-NOT: f32x4
; SIMD128-NEXT: .param f32, f32, f32, f32{{$}}
@@ -556,6 +785,18 @@ define double @extract_v2f64(<2 x double> %v) {
ret double %elem
}
+; CHECK-LABEL: extract_undef_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-VM-NOT: f64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result f64{{$}}
+; SIMD128-NEXT: f64x2.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define double @extract_undef_v2f64(<2 x double> %v) {
+ %elem = extractelement <2 x double> %v, i32 undef
+ ret double %elem
+}
+
; CHECK-LABEL: replace_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-VM-NOT: f64x2
@@ -568,6 +809,18 @@ define <2 x double> @replace_v2f64(<2 x double> %v, double %x) {
ret <2 x double> %res
}
+; CHECK-LABEL: replace_undef_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-VM-NOT: f64x2
+; SIMD128-NEXT: .param v128, f64{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @replace_undef_v2f64(<2 x double> %v, double %x) {
+ %res = insertelement <2 x double> %v, double %x, i32 undef
+ ret <2 x double> %res
+}
+
; CHECK-LABEL: shuffle_v2f64:
; NO-SIMD128-NOT: v8x16
; SIMD128-NEXT: .param v128, v128{{$}}
@@ -581,6 +834,19 @@ define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %res
}
+; CHECK-LABEL: shuffle_undef_v2f64:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
+ %res = shufflevector <2 x double> %x, <2 x double> %y,
+ <2 x i32> <i32 1, i32 undef>
+ ret <2 x double> %res
+}
+
; CHECK-LABEL: build_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-VM-NOT: f64x2
OpenPOWER on IntegriCloud