diff options
| -rw-r--r-- | llvm/lib/Target/ARM64/ARM64InstrInfo.td | 47 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM64/indexed-vector-ldst.ll | 211 |
2 files changed, 258 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM64/ARM64InstrInfo.td b/llvm/lib/Target/ARM64/ARM64InstrInfo.td index d2f8452a9fb..51fe207ced8 100644 --- a/llvm/lib/Target/ARM64/ARM64InstrInfo.td +++ b/llvm/lib/Target/ARM64/ARM64InstrInfo.td @@ -4416,6 +4416,53 @@ def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; +multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction ST1, + int offset> { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + am_simdnoindex:$vaddr, offset), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, am_simdnoindex:$vaddr, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + am_simdnoindex:$vaddr, GPR64:$Rm), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, am_simdnoindex:$vaddr, $Rm)>; +} + +defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; +defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, + 2>; +defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; +defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; +defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; +defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; + +multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction ST1, + int offset> { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + am_simdnoindex:$vaddr, offset), + (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + am_simdnoindex:$vaddr, GPR64:$Rm), + (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr, $Rm)>; +} + +defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, + 1>; +defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, + 2>; +defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; +defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; +defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; +defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; + let mayStore = 1, neverHasSideEffects = 1 in { defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; diff --git a/llvm/test/CodeGen/ARM64/indexed-vector-ldst.ll b/llvm/test/CodeGen/ARM64/indexed-vector-ldst.ll index 1f510b07a16..c909a447e1e 100644 --- a/llvm/test/CodeGen/ARM64/indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/ARM64/indexed-vector-ldst.ll @@ -400,3 +400,214 @@ define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) { store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) ret void } + +define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) { +; CHECK-LABEL: test_v16i8_post_imm_st1_lane: +; CHECK: st1.b { v0 }[3], [x0], #1 + %elt = extractelement <16 x i8> %in, i32 3 + store i8 %elt, i8* %addr + + %newaddr = getelementptr i8* %addr, i32 1 + ret i8* %newaddr +} + +define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) { +; CHECK-LABEL: test_v16i8_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2 +; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]] + %elt = extractelement <16 x i8> %in, i32 3 + store i8 %elt, i8* %addr + + %newaddr = getelementptr i8* %addr, i32 2 + ret i8* %newaddr +} + + +define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) { +; CHECK-LABEL: test_v8i16_post_imm_st1_lane: +; CHECK: st1.h { v0 }[3], [x0], #2 + %elt = extractelement <8 x i16> %in, i32 3 + store i16 %elt, i16* %addr + + %newaddr = getelementptr i16* %addr, i32 1 + ret i16* %newaddr +} + +define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) { +; CHECK-LABEL: test_v8i16_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4 +; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]] + %elt = extractelement <8 x i16> %in, i32 3 + store i16 %elt, i16* %addr + + %newaddr = getelementptr i16* %addr, i32 2 + ret i16* %newaddr +} + +define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) { +; CHECK-LABEL: test_v4i32_post_imm_st1_lane: +; CHECK: st1.s { v0 }[3], [x0], #4 + %elt = extractelement <4 x i32> %in, i32 3 + store i32 %elt, i32* %addr + + %newaddr = getelementptr i32* %addr, i32 1 + ret i32* %newaddr +} + +define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) { +; CHECK-LABEL: test_v4i32_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 +; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]] + %elt = extractelement <4 x i32> %in, i32 3 + store i32 %elt, i32* %addr + + %newaddr = getelementptr i32* %addr, i32 2 + ret i32* %newaddr +} + +define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) { +; CHECK-LABEL: test_v4f32_post_imm_st1_lane: +; CHECK: st1.s { v0 }[3], [x0], #4 + %elt = extractelement <4 x float> %in, i32 3 + store float %elt, float* %addr + + %newaddr = getelementptr float* %addr, i32 1 + ret float* %newaddr +} + +define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) { +; CHECK-LABEL: test_v4f32_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 +; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]] + %elt = extractelement <4 x float> %in, i32 3 + store float %elt, float* %addr + + %newaddr = getelementptr float* %addr, i32 2 + ret float* %newaddr +} + +define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) { +; CHECK-LABEL: test_v2i64_post_imm_st1_lane: +; CHECK: st1.d { v0 }[1], [x0], #8 + %elt = extractelement <2 x i64> %in, i64 1 + store i64 %elt, i64* %addr + + %newaddr = getelementptr i64* %addr, i64 1 + ret i64* %newaddr +} + +define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) { +; CHECK-LABEL: test_v2i64_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10 +; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]] + %elt = extractelement <2 x i64> %in, i64 1 + store i64 %elt, i64* %addr + + %newaddr = getelementptr i64* %addr, i64 2 + ret i64* %newaddr +} + +define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) { +; CHECK-LABEL: test_v2f64_post_imm_st1_lane: +; CHECK: st1.d { v0 }[1], [x0], #8 + %elt = extractelement <2 x double> %in, i32 1 + store double %elt, double* %addr + + %newaddr = getelementptr double* %addr, i32 1 + ret double* %newaddr +} + +define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) { +; CHECK-LABEL: test_v2f64_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10 +; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]] + %elt = extractelement <2 x double> %in, i32 1 + store double %elt, double* %addr + + %newaddr = getelementptr double* %addr, i32 2 + ret double* %newaddr +} + +define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) { +; CHECK-LABEL: test_v8i8_post_imm_st1_lane: +; CHECK: st1.b { v0 }[3], [x0], #1 + %elt = extractelement <8 x i8> %in, i32 3 + store i8 %elt, i8* %addr + + %newaddr = getelementptr i8* %addr, i32 1 + ret i8* %newaddr +} + +define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) { +; CHECK-LABEL: test_v8i8_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2 +; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]] + %elt = extractelement <8 x i8> %in, i32 3 + store i8 %elt, i8* %addr + + %newaddr = getelementptr i8* %addr, i32 2 + ret i8* %newaddr +} + +define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) { +; CHECK-LABEL: test_v4i16_post_imm_st1_lane: +; CHECK: st1.h { v0 }[3], [x0], #2 + %elt = extractelement <4 x i16> %in, i32 3 + store i16 %elt, i16* %addr + + %newaddr = getelementptr i16* %addr, i32 1 + ret i16* %newaddr +} + +define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) { +; CHECK-LABEL: test_v4i16_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4 +; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]] + %elt = extractelement <4 x i16> %in, i32 3 + store i16 %elt, i16* %addr + + %newaddr = getelementptr i16* %addr, i32 2 + ret i16* %newaddr +} + +define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) { +; CHECK-LABEL: test_v2i32_post_imm_st1_lane: +; CHECK: st1.s { v0 }[1], [x0], #4 + %elt = extractelement <2 x i32> %in, i32 1 + store i32 %elt, i32* %addr + + %newaddr = getelementptr i32* %addr, i32 1 + ret i32* %newaddr +} + +define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) { +; CHECK-LABEL: test_v2i32_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 +; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]] + %elt = extractelement <2 x i32> %in, i32 1 + store i32 %elt, i32* %addr + + %newaddr = getelementptr i32* %addr, i32 2 + ret i32* %newaddr +} + +define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) { +; CHECK-LABEL: test_v2f32_post_imm_st1_lane: +; CHECK: st1.s { v0 }[1], [x0], #4 + %elt = extractelement <2 x float> %in, i32 1 + store float %elt, float* %addr + + %newaddr = getelementptr float* %addr, i32 1 + ret float* %newaddr +} + +define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) { +; CHECK-LABEL: test_v2f32_post_reg_st1_lane: +; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 +; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]] + %elt = extractelement <2 x float> %in, i32 1 + store float %elt, float* %addr + + %newaddr = getelementptr float* %addr, i32 2 + ret float* %newaddr +} |

