diff options
| author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2018-03-16 15:03:37 +0000 |
|---|---|---|
| committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2018-03-16 15:03:37 +0000 |
| commit | 9915291ab8141ae5c05d30c9bf0962673ad73280 (patch) | |
| tree | c83b65a902719dc9bdce9ca87e27c8e209e6facf | |
| parent | 9569fd51ac1b2886ffa8dac6cfcd06c098dd0787 (diff) | |
| download | bcm5719-llvm-9915291ab8141ae5c05d30c9bf0962673ad73280.tar.gz bcm5719-llvm-9915291ab8141ae5c05d30c9bf0962673ad73280.zip | |
[Hexagon] Fix zero-extending non-HVX bool vectors
llvm-svn: 327712
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatterns.td | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll | 69 |
3 files changed, 96 insertions, 12 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 5e0b50a1a17..b020d1a07e4 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1506,6 +1506,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LOAD, VecVT, Custom); } + for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) { + setCondCodeAction(ISD::SETLT, VT, Expand); + setCondCodeAction(ISD::SETLE, VT, Expand); + setCondCodeAction(ISD::SETULT, VT, Expand); + setCondCodeAction(ISD::SETULE, VT, Expand); + } + // Custom-lower bitcasts from i8 to v8i1. setOperationAction(ISD::BITCAST, MVT::i8, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); @@ -2231,7 +2238,7 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV, // position 0. assert(ty(IdxV) == MVT::i32); SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, - DAG.getConstant(8, dl, MVT::i32)); + DAG.getConstant(8*Scale, dl, MVT::i32)); SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV); SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0); while (Scale > 1) { diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index e0cff2ac238..6806981ab58 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -430,11 +430,19 @@ let AddedComplexity = 20 in { def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; -def: Pat<(v8i8 (zext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; -def: Pat<(v4i16 (zext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; -def: Pat<(v2i32 (zext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; -def: Pat<(v4i8 (zext V4I1:$Pu)), (LoReg (C2_mask V4I1:$Pu))>; -def: Pat<(v2i16 (zext V2I1:$Pu)), (LoReg (C2_mask V2I1:$Pu))>; +def Vsplatpi: OutPatFrag<(ops node:$V), + (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; +def: Pat<(v8i8 (zext V8I1:$Pu)), + (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>; +def: Pat<(v4i16 (zext V4I1:$Pu)), + (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; +def: Pat<(v2i32 (zext V2I1:$Pu)), + (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; + +def: Pat<(v4i8 (zext V4I1:$Pu)), + (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; +def: Pat<(v2i16 (zext V2I1:$Pu)), + (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; @@ -695,18 +703,18 @@ def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), +def: Pat<(v4i1 (seteq V4I8:$Rs, V4I8:$Rt)), (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), +def: Pat<(v4i1 (setgt V4I8:$Rs, V4I8:$Rt)), (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), +def: Pat<(v4i1 (setugt V4I8:$Rs, V4I8:$Rt)), (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), +def: Pat<(v2i1 (seteq V2I16:$Rs, V2I16:$Rt)), (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), +def: Pat<(v2i1 (setgt V2I16:$Rs, V2I16:$Rt)), (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), +def: Pat<(v2i1 (setugt V2I16:$Rs, V2I16:$Rt)), (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), diff --git a/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll new file mode 100644 index 00000000000..b9cbb2e0fa4 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll @@ -0,0 +1,69 @@ +; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s + +; Check that zero-extends of short boolean vectors are done correctly. +; These are not the only possible instruction sequences, so if something +; changes, the tests should be changed as well. + +; CHECK-LABEL: f0: +; CHECK-DAG: r[[D00:([0-9]+:[0-9]+)]] = combine(#0,r0) +; CHECK-DAG: r[[D01:([0-9]+:[0-9]+)]] = combine(#0,r1) +; CHECK: p[[P00:[0-3]]] = vcmpb.gt(r[[D01]],r[[D00]]) +; CHECK: r{{[0-9]+}}:[[R00:[0-9]+]] = mask(p[[P00]]) +; CHECK: r0 = and(r[[R00]],##16843009) +define <4 x i8> @f0(<4 x i8> %a0, <4 x i8> %a1) #0 { +b0: + %v0 = icmp slt <4 x i8> %a0, %a1 + %v1 = zext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: f1: +; CHECK-DAG: r[[D10:([0-9]+:[0-9]+)]] = vsxthw(r0) +; CHECK-DAG: r[[D11:([0-9]+:[0-9]+)]] = vsxthw(r1) +; CHECK: p[[P10:[0-3]]] = vcmpw.gt(r[[D11]],r[[D10]]) +; CHECK: r{{[0-9]+}}:[[R10:[0-9]+]] = mask(p[[P10]]) +; CHECK: r0 = and(r[[R10]],##65537) +define <2 x i16> @f1(<2 x i16> %a0, <2 x i16> %a1) #0 { +b0: + %v0 = icmp slt <2 x i16> %a0, %a1 + %v1 = zext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: f2: +; CHECK-DAG: r[[D20:([0-9]+:[0-9]+)]] = CONST64(#72340172838076673) +; CHECK-DAG: p[[P20:[0-3]]] = vcmpb.gt(r3:2,r1:0) +; CHECK: r[[D21:([0-9]+:[0-9]+)]] = mask(p[[P20]]) +; CHECK: r1:0 = and(r[[D21]],r[[D20]]) +define <8 x i8> @f2(<8 x i8> %a0, <8 x i8> %a1) #0 { +b0: + %v0 = icmp slt <8 x i8> %a0, %a1 + %v1 = zext <8 x i1> %v0 to <8 x i8> + ret <8 x i8> %v1 +} + +; CHECK-LABEL: f3: +; CHECK-DAG: r[[D30:([0-9]+:[0-9]+)]] = CONST64(#281479271743489) +; CHECK-DAG: p[[P30:[0-3]]] = vcmph.gt(r3:2,r1:0) +; CHECK: r[[D31:([0-9]+:[0-9]+)]] = mask(p[[P30]]) +; CHECK: r1:0 = and(r[[D31]],r[[D30]]) +define <4 x i16> @f3(<4 x i16> %a0, <4 x i16> %a1) #0 { +b0: + %v0 = icmp slt <4 x i16> %a0, %a1 + %v1 = zext <4 x i1> %v0 to <4 x i16> + ret <4 x i16> %v1 +} + +; CHECK-LABEL: f4: +; CHECK-DAG: r[[D40:([0-9]+:[0-9]+)]] = combine(#1,#1) +; CHECK-DAG: p[[P40:[0-3]]] = vcmpw.gt(r3:2,r1:0) +; CHECK: r[[D41:([0-9]+:[0-9]+)]] = mask(p[[P40]]) +; CHECK: r1:0 = and(r[[D41]],r[[D40]]) +define <2 x i32> @f4(<2 x i32> %a0, <2 x i32> %a1) #0 { +b0: + %v0 = icmp slt <2 x i32> %a0, %a1 + %v1 = zext <2 x i1> %v0 to <2 x i32> + ret <2 x i32> %v1 +} + +attributes #0 = { nounwind readnone } |

