diff options
| author | Jessica Paquette <jpaquette@apple.com> | 2019-01-24 22:00:41 +0000 |
|---|---|---|
| committer | Jessica Paquette <jpaquette@apple.com> | 2019-01-24 22:00:41 +0000 |
| commit | 245047dfe8b4abcdce02b7f16f464ed1d86ccbee (patch) | |
| tree | 3ac054f59a0d0eee97a5d3ddc481b550116b1bae /llvm/test/CodeGen/AArch64 | |
| parent | 38ebaf7d5d25ef3113a6ab4bf19ed4e1869373ab (diff) | |
| download | bcm5719-llvm-245047dfe8b4abcdce02b7f16f464ed1d86ccbee.tar.gz bcm5719-llvm-245047dfe8b4abcdce02b7f16f464ed1d86ccbee.zip | |
[GlobalISel][AArch64] Add isel support for FP16 vector @llvm.ceil
This patch adds support for vector @llvm.ceil intrinsics when full 16 bit
floating point support isn't available.
To do this, this patch...
- Implements basic isel for G_UNMERGE_VALUES
- Teaches the legalizer about 16 bit floats
- Teaches AArch64RegisterBankInfo to respect floating point registers on
G_BUILD_VECTOR and G_UNMERGE_VALUES
- Teaches selectCopy about 16-bit floating point vectors
It also adds
- A legalizer test for the 16-bit vector ceil which verifies that we create a
G_UNMERGE_VALUES and G_BUILD_VECTOR when full fp16 isn't supported
- An instruction selection test which makes sure we lower to G_FCEIL when
full fp16 is supported
- A test for selecting G_UNMERGE_VALUES
And also updates arm64-vfloatintrinsics.ll to show that the new ceiling types
work as expected.
https://reviews.llvm.org/D56682
llvm-svn: 352113
Diffstat (limited to 'llvm/test/CodeGen/AArch64')
4 files changed, 306 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir new file mode 100644 index 00000000000..bb0063b167f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir @@ -0,0 +1,86 @@ +# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -O0 -mattr=-fullfp16 -run-pass=legalizer %s -o - | FileCheck %s + +--- | + define <8 x half> @test_v8f16.ceil(<8 x half> %a) { + ret <8 x half> %a + } + + define <4 x half> @test_v4f16.ceil(<4 x half> %a) { + ret <4 x half> %a + } + +... +--- +name: test_v8f16.ceil +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_v8f16.ceil + %0:_(<8 x s16>) = COPY $q0 + ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<8 x s16>) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(<8 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16) + %1:_(<8 x s16>) = G_FCEIL %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4f16.ceil +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: $d0 + ; CHECK-LABEL: name: test_v4f16.ceil + %0:_(<4 x s16>) = COPY $d0 + ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16) + %1:_(<4 x s16>) = G_FCEIL %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir index 5d42a2cfd4d..9bacb7c75b9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir @@ -1,5 +1,6 @@ # RUN: llc -verify-machineinstrs -mtriple aarch64--- \ -# RUN: -run-pass=instruction-select -global-isel %s -o - | FileCheck %s +# RUN: -run-pass=instruction-select -mattr=+fullfp16 -global-isel %s -o - \ +# RUN: | FileCheck %s ... --- name: ceil_float @@ -91,3 +92,39 @@ body: | $q0 = COPY %1(<2 x s64>) ... +--- +name: ceil_v4f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: ceil_v4f16 + ; CHECK: %{{[0-9]+}}:fpr64 = FRINTPv4f16 %{{[0-9]+}} + liveins: $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = G_FCEIL %0 + $d0 = COPY %1(<4 x s16>) + +... +--- +name: ceil_v8f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: ceil_v8f16 + ; CHECK: %{{[0-9]+}}:fpr128 = FRINTPv8f16 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = G_FCEIL %0 + $q0 = COPY %1(<8 x s16>) + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir new file mode 100644 index 00000000000..6814b993394 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir @@ -0,0 +1,154 @@ + +# RUN: llc -O0 -mattr=-fullfp16 -mtriple=aarch64-- \ +# RUN: -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define <2 x double> @test_v2s64_unmerge(<2 x double> %a) { + ret <2 x double> %a + } + + define <4 x float> @test_v4s32_unmerge(<4 x float> %a) { + ret <4 x float> %a + } + + define <4 x half> @test_v4s16_unmerge(<4 x half> %a) { + ret <4 x half> %a + } + + define <8 x half> @test_v8s16_unmerge(<8 x half> %a) { + ret <8 x half> %a + } + +... +--- +name: test_v2s64_unmerge +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_v2s64_unmerge + %0:fpr(<2 x s64>) = COPY $q0 + + ; Since 2 * 64 = 128, we can just directly copy. + ; CHECK: %2:fpr64 = COPY %0.dsub + ; CHECK: %3:fpr64 = CPYi64 %0, 1 + %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %0(<2 x s64>) + + %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64) + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 +... +--- +name: test_v4s32_unmerge +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_v4s32_unmerge + %0:fpr(<4 x s32>) = COPY $q0 + + ; Since 4 * 32 = 128, we can just directly copy. + ; CHECK: %2:fpr32 = COPY %0.ssub + ; CHECK: %3:fpr32 = CPYi32 %0, 1 + ; CHECK: %4:fpr32 = CPYi32 %0, 2 + ; CHECK: %5:fpr32 = CPYi32 %0, 3 + %2:fpr(s32), %3:fpr(s32), %4:fpr(s32), %5:fpr(s32) = G_UNMERGE_VALUES %0(<4 x s32>) + + %1:fpr(<4 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32), %5(s32) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: test_v4s16_unmerge +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: $d0 + ; CHECK-LABEL: name: test_v4s16_unmerge + %0:fpr(<4 x s16>) = COPY $d0 + + ; Since 4 * 16 != 128, we need to widen using implicit defs. + ; Note that we expect to reuse one of the INSERT_SUBREG results, as CPYi16 + ; expects a lane > 0. + ; CHECK-DAG: [[IMPDEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INS_SHARED:%[0-9]+]]:fpr128 = INSERT_SUBREG [[IMPDEF1]], %0, %subreg.dsub + ; CHECK: [[IMPDEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INS2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[IMPDEF2]], %0, %subreg.dsub + ; CHECK: [[IMPDEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INS3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[IMPDEF3]], %0, %subreg.dsub + ; CHECK: %2:fpr16 = COPY [[INS_SHARED]].hsub + ; CHECK: %3:fpr16 = CPYi16 [[INS_SHARED]], 1 + ; CHECK: %4:fpr16 = CPYi16 [[INS2]], 2 + ; CHECK: %5:fpr16 = CPYi16 [[INS3]], 3 + %2:fpr(s16), %3:fpr(s16), %4:fpr(s16), %5:fpr(s16) = G_UNMERGE_VALUES %0(<4 x s16>) + + %1:fpr(<4 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16) + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: test_v8s16_unmerge +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } + - { id: 6, class: fpr } + - { id: 7, class: fpr } + - { id: 8, class: fpr } + - { id: 9, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_v8s16_unmerge + %0:fpr(<8 x s16>) = COPY $q0 + + ; Since 8 * 16 = 128, we can just directly copy. + ; CHECK: %2:fpr16 = COPY %0.hsub + ; CHECK: %3:fpr16 = CPYi16 %0, 1 + ; CHECK: %4:fpr16 = CPYi16 %0, 2 + ; CHECK: %5:fpr16 = CPYi16 %0, 3 + ; CHECK: %6:fpr16 = CPYi16 %0, 4 + ; CHECK: %7:fpr16 = CPYi16 %0, 5 + ; CHECK: %8:fpr16 = CPYi16 %0, 6 + ; CHECK: %9:fpr16 = CPYi16 %0, 7 + %2:fpr(s16), %3:fpr(s16), %4:fpr(s16), %5:fpr(s16), %6:fpr(s16), %7:fpr(s16), %8:fpr(s16), %9:fpr(s16) = G_UNMERGE_VALUES %0(<8 x s16>) + + %1:fpr(<8 x s16>) = G_BUILD_VECTOR %2:fpr(s16), %3:fpr(s16), %4:fpr(s16), %5:fpr(s16), %6:fpr(s16), %7:fpr(s16), %8:fpr(s16), %9:fpr(s16) + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll index 2d7976dd487..8680a10a929 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -3,6 +3,13 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16 +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \ +; RUN: -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ +; RUN: 2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-NOFP16,FALLBACK +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \ +; RUN: -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ +; RUN: 2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-FP16,FALLBACK + ;;; Half vectors %v4f16 = type <4 x half> @@ -111,6 +118,12 @@ define %v4f16 @test_v4f16.ceil(%v4f16 %a) { ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frintp.4h ; CHECK-FP16-NEXT: ret + ; FALLBACK-NOT: remark{{.*}}test_v4f16.ceil: + ; GISEL-LABEL: test_v4f16.ceil: + ; GISEL-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: frintp.4h + ; GISEL-FP16-NEXT: ret %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a) ret %v4f16 %1 } @@ -268,6 +281,12 @@ define %v8f16 @test_v8f16.ceil(%v8f16 %a) { ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frintp.8h ; CHECK-FP16-NEXT: ret + ; FALLBACK-NOT: remark{{.*}}test_v8f16.ceil: + ; GISEL-LABEL: test_v8f16.ceil: + ; GISEL-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: frintp.8h + ; GISEL-FP16-NEXT: ret %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a) ret %v8f16 %1 } @@ -400,8 +419,11 @@ define %v2f32 @test_v2f32.floor(%v2f32 %a) { ret %v2f32 %1 } ; CHECK-LABEL: test_v2f32.ceil: +; FALLBACK-NOT: remark{{.*}}test_v2f32.ceil +; GISEL-LABEL: test_v2f32.ceil: define %v2f32 @test_v2f32.ceil(%v2f32 %a) { ; CHECK: frintp.2s + ; GISEL: frintp.2s %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a) ret %v2f32 %1 } @@ -525,8 +547,11 @@ define %v4f32 @test_v4f32.floor(%v4f32 %a) { ret %v4f32 %1 } ; CHECK: test_v4f32.ceil: +; FALLBACK-NOT: remark{{.*}}test_v4f32.ceil +; GISEL-LABEL: test_v4f32.ceil: define %v4f32 @test_v4f32.ceil(%v4f32 %a) { ; CHECK: frintp.4s + ; GISEL: frintp.4s %1 = call %v4f32 @llvm.ceil.v4f32(%v4f32 %a) ret %v4f32 %1 } @@ -649,8 +674,11 @@ define %v2f64 @test_v2f64.floor(%v2f64 %a) { ret %v2f64 %1 } ; CHECK: test_v2f64.ceil: +; FALLBACK-NOT: remark{{.*}}test_v2f64.ceil +; GISEL-LABEL: test_v2f64.ceil: define %v2f64 @test_v2f64.ceil(%v2f64 %a) { ; CHECK: frintp.2d + ; GISEL: frintp.2d %1 = call %v2f64 @llvm.ceil.v2f64(%v2f64 %a) ret %v2f64 %1 } |

