diff options
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatterns.td | 37 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/constant_compound.ll | 52 |
2 files changed, 79 insertions, 10 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 2f5033a20af..f671238ec12 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -257,6 +257,23 @@ class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; class Not2<PatFrag P> : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; +// If there is a constant operand that feeds the and/or instruction, +// do not generate the compound instructions. +// It is not always profitable, as some times we end up with a transfer. +// Check the below example. +// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra) +// Instead this is preferable. +// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra) +class Su_ni1<PatFrag Op> + : PatFrag<Op.Operands, !head(Op.Fragments), [{ + if (hasOneUse(N)){ + // Check if Op1 is an immediate operand. + SDValue Op1 = N->getOperand(1); + return !dyn_cast<ConstantSDNode>(Op1); + } + return false;}], + Op.OperandTransform>; + class Su<PatFrag Op> : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }], Op.OperandTransform>; @@ -1336,16 +1353,16 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8), def: Pat<(add Sext64:$Rs, I64:$Rt), (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; -def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>; -def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>; -def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; -def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>; -def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>; -def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; -def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>; -def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>; -def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; -def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; +def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>; +def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>; +def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>; +def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; // For dags like (or (and (not _), _), (shl _, _)) where the "or" with // one argument matches the patterns below, and with the other argument diff --git a/llvm/test/CodeGen/Hexagon/constant_compound.ll b/llvm/test/CodeGen/Hexagon/constant_compound.ll new file mode 100644 index 00000000000..4ca2dc5d4ed --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/constant_compound.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=hexagon < %s 2>&1 | FileCheck %s + +; Generating a compound instruction with a constant is not profitable. +; The constant needs to be kept in a register before it is fed to compound +; instruction. +; Before, we are generating +; ra = #65820; +; rb = lsr(rb, #8); +; rc ^= and (rb, ra) +; Now, we are generating +; ra = and (#65820, lsr(ra, #8)); +; rb = xor(rb, ra) + +; CHECK: and(##65280,lsr(r +; CHECK-NOT : ^= and + +define dso_local zeroext i16 @test_compound(i16 zeroext %varA, i16 zeroext %varB) local_unnamed_addr #0 { +entry: + %tmp = zext i16 %varB to i32 + %tmp1 = and i16 %varA, 255 + %tmp2 = zext i16 %tmp1 to i32 + %.masked.i = and i32 %tmp, 255 + %tmp3 = xor i32 %.masked.i, %tmp2 + %tmp4 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp3, i32 255) #2 + %tmp5 = trunc i64 %tmp4 to i32 + %tmp6 = and i32 %tmp5, 255 + %tmp7 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp6, i32 81922) #2 + %tmp8 = trunc i64 %tmp7 to i32 + %tmp9 = xor i32 %tmp8, %tmp + %tmp10 = lshr i32 %tmp9, 8 + %tmp11 = lshr i16 %varA, 8 + %conv2 = zext i16 %tmp11 to i32 + %tmp12 = and i32 %tmp10, 65280 + %.masked.i7 = and i32 %tmp10, 255 + %tmp13 = xor i32 %.masked.i7, %conv2 + %tmp14 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp13, i32 255) #2 + %tmp15 = trunc i64 %tmp14 to i32 + %tmp16 = and i32 %tmp15, 255 + %tmp17 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp16, i32 81922) #2 + %tmp18 = trunc i64 %tmp17 to i32 + %tmp19 = xor i32 %tmp12, %tmp18 + %tmp20 = lshr i32 %tmp19, 8 + %tmp21 = trunc i32 %tmp20 to i16 + ret i16 %tmp21 +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.hexagon.M4.pmpyw(i32, i32) #1 + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv65" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } |