diff options
| author | Tim Northover <tnorthover@apple.com> | 2014-04-18 09:31:20 +0000 |
|---|---|---|
| committer | Tim Northover <tnorthover@apple.com> | 2014-04-18 09:31:20 +0000 |
| commit | 848bb3ced537dc5466b28318d880b1c78ec93ab5 (patch) | |
| tree | 35f5fc9bafc94e3914e60052657aa9022a9881b7 /llvm/lib/Target/ARM64 | |
| parent | 5ec51a89811b50abec1049f0591d97af6caf5b0a (diff) | |
| download | bcm5719-llvm-848bb3ced537dc5466b28318d880b1c78ec93ab5.tar.gz bcm5719-llvm-848bb3ced537dc5466b28318d880b1c78ec93ab5.zip | |
ARM64: implement cunning optimisation from AArch64
A vector extract followed by a dup can become a single instruction even if the
types don't match. AArch64 handled this in ISelLowering, but a few reasonably
simple patterns can take care of it in TableGen, so that's where I've put it.
llvm-svn: 206573
Diffstat (limited to 'llvm/lib/Target/ARM64')
| -rw-r--r-- | llvm/lib/Target/ARM64/ARM64InstrInfo.td | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM64/ARM64InstrInfo.td b/llvm/lib/Target/ARM64/ARM64InstrInfo.td index 509e215f823..53d1dbe2dfd 100644 --- a/llvm/lib/Target/ARM64/ARM64InstrInfo.td +++ b/llvm/lib/Target/ARM64/ARM64InstrInfo.td @@ -3026,6 +3026,59 @@ def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; +// If there's an (ARM64dup (vector_extract ...) ...), we can use a duplane +// instruction even if the types don't match: we just have to remap the lane +// carefully. N.b. this trick only applies to truncations. +def VecIndex_x2 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64); +}]>; +def VecIndex_x4 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64); +}]>; +def VecIndex_x8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64); +}]>; + +multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, + ValueType Src128VT, ValueType ScalVT, + Instruction DUP, SDNodeXForm IdxXFORM> { + def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src128VT V128:$Rn), + imm:$idx)))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src64VT V64:$Rn), + imm:$idx)))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; +defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; +defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; + +defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; +defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; +defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; + +multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, + SDNodeXForm IdxXFORM> { + def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), + imm:$idx))))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), + imm:$idx))))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; +defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; +defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; + +defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; +defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; +defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; + +// SMOV and UMOV definitions, with some extra patterns for convenience defm SMOV : SMov; defm UMOV : UMov; |

