diff options
| author | Jiangning Liu <jiangning.liu@arm.com> | 2013-11-06 03:35:27 +0000 |
|---|---|---|
| committer | Jiangning Liu <jiangning.liu@arm.com> | 2013-11-06 03:35:27 +0000 |
| commit | f4226f1d7bc4c38a80d2730c0b5c2a718332ac9c (patch) | |
| tree | a9ec2151809e0c6f1f2e3bfd8a3427dc71179a91 /llvm/lib | |
| parent | 95d005c7897870b34ef2d6cdddd0dde700f1b0c3 (diff) | |
| download | bcm5719-llvm-f4226f1d7bc4c38a80d2730c0b5c2a718332ac9c.tar.gz bcm5719-llvm-f4226f1d7bc4c38a80d2730c0b5c2a718332ac9c.zip | |
Implement AArch64 Neon instruction set Perm.
llvm-svn: 194123
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrNEON.td | 329 |
2 files changed, 347 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 8a2142646e5..b0aadfb031a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1001,6 +1001,24 @@ class NeonI_BitExtract<bit q, bits<2> op2, // Inherit Rd in 4-0 } +// Format AdvSIMD perm +class NeonI_Perm<bit q, bits<2> size, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b001110; + let Inst{23-22} = size; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + let Inst{14-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + // Format AdvSIMD 3 vector registers with same vector type class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode, dag outs, dag ins, string asmstr, diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td index 8a78d14b8c9..5cdac1ef884 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -2360,6 +2360,335 @@ defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv", defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", int_aarch64_neon_vminv>; +// The followings are for instruction class (Perm) + +class NeonI_Permute<bit q, bits<2> size, bits<3> opcode, + string asmop, RegisterOperand OpVPR, string OpS> + : NeonI_Perm<q, size, opcode, + (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS, + [], NoItinerary>; + +multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> { + def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">; + def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">; + def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">; + def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">; + def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">; + def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">; + def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">; +} + +defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">; +defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">; +defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">; +defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">; +defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">; +defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">; + +// Extract and Insert +def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins), + (vector_insert node:$Rn, + (i32 (vector_extract node:$Rm, node:$Ext)), + node:$Ins)>; + +def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins), + (vector_insert node:$Rn, + (f32 (vector_extract node:$Rm, node:$Ext)), + node:$Ins)>; + +// uzp1 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rn), + (v16i8 VPR128:$Rn), 2, 1)), + (v16i8 VPR128:$Rn), 4, 2)), + (v16i8 VPR128:$Rn), 6, 3)), + (v16i8 VPR128:$Rn), 8, 4)), + (v16i8 VPR128:$Rn), 10, 5)), + (v16i8 VPR128:$Rn), 12, 6)), + (v16i8 VPR128:$Rn), 14, 7)), + (v16i8 VPR128:$Rm), 0, 8)), + (v16i8 VPR128:$Rm), 2, 9)), + (v16i8 VPR128:$Rm), 4, 10)), + (v16i8 VPR128:$Rm), 6, 11)), + (v16i8 VPR128:$Rm), 8, 12)), + (v16i8 VPR128:$Rm), 10, 13)), + (v16i8 VPR128:$Rm), 12, 14)), + (v16i8 VPR128:$Rm), 14, 15)), + (UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST> + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rn), + (Ty VPR:$Rn), 2, 1)), + (Ty VPR:$Rn), 4, 2)), + (Ty VPR:$Rn), 6, 3)), + (Ty VPR:$Rm), 0, 4)), + (Ty VPR:$Rm), 2, 5)), + (Ty VPR:$Rm), 4, 6)), + (Ty VPR:$Rm), 6, 7)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>; +def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>; + +class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST, + PatFrag ei> + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rn), + (Ty VPR:$Rn), 2, 1)), + (Ty VPR:$Rm), 0, 2)), + (Ty VPR:$Rm), 2, 3)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>; +def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>; +def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>; + +// uzp2 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rm), + (v16i8 VPR128:$Rn), 1, 0)), + (v16i8 VPR128:$Rn), 3, 1)), + (v16i8 VPR128:$Rn), 5, 2)), + (v16i8 VPR128:$Rn), 7, 3)), + (v16i8 VPR128:$Rn), 9, 4)), + (v16i8 VPR128:$Rn), 11, 5)), + (v16i8 VPR128:$Rn), 13, 6)), + (v16i8 VPR128:$Rn), 15, 7)), + (v16i8 VPR128:$Rm), 1, 8)), + (v16i8 VPR128:$Rm), 3, 9)), + (v16i8 VPR128:$Rm), 5, 10)), + (v16i8 VPR128:$Rm), 7, 11)), + (v16i8 VPR128:$Rm), 9, 12)), + (v16i8 VPR128:$Rm), 11, 13)), + (v16i8 VPR128:$Rm), 13, 14)), + (UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST> + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 1)), + (Ty VPR:$Rn), 5, 2)), + (Ty VPR:$Rn), 7, 3)), + (Ty VPR:$Rm), 1, 4)), + (Ty VPR:$Rm), 3, 5)), + (Ty VPR:$Rm), 5, 6)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>; +def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>; + +class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST, + PatFrag ei> + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 1)), + (Ty VPR:$Rm), 1, 2)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>; +def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>; +def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>; + +// zip1 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rn), + (v16i8 VPR128:$Rm), 0, 1)), + (v16i8 VPR128:$Rn), 1, 2)), + (v16i8 VPR128:$Rm), 1, 3)), + (v16i8 VPR128:$Rn), 2, 4)), + (v16i8 VPR128:$Rm), 2, 5)), + (v16i8 VPR128:$Rn), 3, 6)), + (v16i8 VPR128:$Rm), 3, 7)), + (v16i8 VPR128:$Rn), 4, 8)), + (v16i8 VPR128:$Rm), 4, 9)), + (v16i8 VPR128:$Rn), 5, 10)), + (v16i8 VPR128:$Rm), 5, 11)), + (v16i8 VPR128:$Rn), 6, 12)), + (v16i8 VPR128:$Rm), 6, 13)), + (v16i8 VPR128:$Rn), 7, 14)), + (v16i8 VPR128:$Rm), 7, 15)), + (ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST> + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rn), 1, 2)), + (Ty VPR:$Rm), 1, 3)), + (Ty VPR:$Rn), 2, 4)), + (Ty VPR:$Rm), 2, 5)), + (Ty VPR:$Rn), 3, 6)), + (Ty VPR:$Rm), 3, 7)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>; +def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>; + +class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST, + PatFrag ei> + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rn), 1, 2)), + (Ty VPR:$Rm), 1, 3)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>; +def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>; +def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>; + +// zip2 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rm), + (v16i8 VPR128:$Rn), 8, 0)), + (v16i8 VPR128:$Rm), 8, 1)), + (v16i8 VPR128:$Rn), 9, 2)), + (v16i8 VPR128:$Rm), 9, 3)), + (v16i8 VPR128:$Rn), 10, 4)), + (v16i8 VPR128:$Rm), 10, 5)), + (v16i8 VPR128:$Rn), 11, 6)), + (v16i8 VPR128:$Rm), 11, 7)), + (v16i8 VPR128:$Rn), 12, 8)), + (v16i8 VPR128:$Rm), 12, 9)), + (v16i8 VPR128:$Rn), 13, 10)), + (v16i8 VPR128:$Rm), 13, 11)), + (v16i8 VPR128:$Rn), 14, 12)), + (v16i8 VPR128:$Rm), 14, 13)), + (v16i8 VPR128:$Rn), 15, 14)), + (ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST> + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rm), + (Ty VPR:$Rn), 4, 0)), + (Ty VPR:$Rm), 4, 1)), + (Ty VPR:$Rn), 5, 2)), + (Ty VPR:$Rm), 5, 3)), + (Ty VPR:$Rn), 6, 4)), + (Ty VPR:$Rm), 6, 5)), + (Ty VPR:$Rn), 7, 6)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>; +def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>; + +class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST, + PatFrag ei> + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rm), + (Ty VPR:$Rn), 2, 0)), + (Ty VPR:$Rm), 2, 1)), + (Ty VPR:$Rn), 3, 2)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>; +def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>; +def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>; + +// trn1 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rn), + (v16i8 VPR128:$Rm), 0, 1)), + (v16i8 VPR128:$Rm), 2, 3)), + (v16i8 VPR128:$Rm), 4, 5)), + (v16i8 VPR128:$Rm), 6, 7)), + (v16i8 VPR128:$Rm), 8, 9)), + (v16i8 VPR128:$Rm), 10, 11)), + (v16i8 VPR128:$Rm), 12, 13)), + (v16i8 VPR128:$Rm), 14, 15)), + (TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST> + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rm), 2, 3)), + (Ty VPR:$Rm), 4, 5)), + (Ty VPR:$Rm), 6, 7)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>; +def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>; + +class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST, + PatFrag ei> + : Pat<(Ty (ei (Ty (ei + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rm), 2, 3)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>; +def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>; +def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>; + +// trn2 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rm), + (v16i8 VPR128:$Rn), 1, 0)), + (v16i8 VPR128:$Rn), 3, 2)), + (v16i8 VPR128:$Rn), 5, 4)), + (v16i8 VPR128:$Rn), 7, 6)), + (v16i8 VPR128:$Rn), 9, 8)), + (v16i8 VPR128:$Rn), 11, 10)), + (v16i8 VPR128:$Rn), 13, 12)), + (v16i8 VPR128:$Rn), 15, 14)), + (TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST> + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 2)), + (Ty VPR:$Rn), 5, 4)), + (Ty VPR:$Rn), 7, 6)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>; +def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>; + +class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST, + PatFrag ei> + : Pat<(Ty (ei (Ty (ei + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 2)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>; +def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>; +def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>; + +// End of implementation for instruction class (Perm) + // The followings are for instruction class (3V Diff) // normal long/long2 pattern |

