summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AArch64
diff options
context:
space:
mode:
authorJessica Paquette <jpaquette@apple.com>2019-01-24 22:00:41 +0000
committerJessica Paquette <jpaquette@apple.com>2019-01-24 22:00:41 +0000
commit245047dfe8b4abcdce02b7f16f464ed1d86ccbee (patch)
tree3ac054f59a0d0eee97a5d3ddc481b550116b1bae /llvm/test/CodeGen/AArch64
parent38ebaf7d5d25ef3113a6ab4bf19ed4e1869373ab (diff)
downloadbcm5719-llvm-245047dfe8b4abcdce02b7f16f464ed1d86ccbee.tar.gz
bcm5719-llvm-245047dfe8b4abcdce02b7f16f464ed1d86ccbee.zip
[GlobalISel][AArch64] Add isel support for FP16 vector @llvm.ceil
This patch adds support for vector @llvm.ceil intrinsics when full 16 bit floating point support isn't available. To do this, this patch... - Implements basic isel for G_UNMERGE_VALUES - Teaches the legalizer about 16 bit floats - Teaches AArch64RegisterBankInfo to respect floating point registers on G_BUILD_VECTOR and G_UNMERGE_VALUES - Teaches selectCopy about 16-bit floating point vectors It also adds - A legalizer test for the 16-bit vector ceil which verifies that we create a G_UNMERGE_VALUES and G_BUILD_VECTOR when full fp16 isn't supported - An instruction selection test which makes sure we lower to G_FCEIL when full fp16 is supported - A test for selecting G_UNMERGE_VALUES And also updates arm64-vfloatintrinsics.ll to show that the new ceiling types work as expected. https://reviews.llvm.org/D56682 llvm-svn: 352113
Diffstat (limited to 'llvm/test/CodeGen/AArch64')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir86
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir39
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir154
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll28
4 files changed, 306 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir
new file mode 100644
index 00000000000..bb0063b167f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir
@@ -0,0 +1,86 @@
+# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -O0 -mattr=-fullfp16 -run-pass=legalizer %s -o - | FileCheck %s
+
+--- |
+ define <8 x half> @test_v8f16.ceil(<8 x half> %a) {
+ ret <8 x half> %a
+ }
+
+ define <4 x half> @test_v4f16.ceil(<4 x half> %a) {
+ ret <4 x half> %a
+ }
+
+...
+---
+name: test_v8f16.ceil
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $q0
+ ; CHECK-LABEL: name: test_v8f16.ceil
+ %0:_(<8 x s16>) = COPY $q0
+ ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<8 x s16>)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(<8 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16)
+ %1:_(<8 x s16>) = G_FCEIL %0
+ $q0 = COPY %1(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v4f16.ceil
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $d0
+ ; CHECK-LABEL: name: test_v4f16.ceil
+ %0:_(<4 x s16>) = COPY $d0
+ ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16)
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32)
+ ; CHECK: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16)
+ %1:_(<4 x s16>) = G_FCEIL %0
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir
index 5d42a2cfd4d..9bacb7c75b9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ceil.mir
@@ -1,5 +1,6 @@
# RUN: llc -verify-machineinstrs -mtriple aarch64--- \
-# RUN: -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
+# RUN: -run-pass=instruction-select -mattr=+fullfp16 -global-isel %s -o - \
+# RUN: | FileCheck %s
...
---
name: ceil_float
@@ -91,3 +92,39 @@ body: |
$q0 = COPY %1(<2 x s64>)
...
+---
+name: ceil_v4f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_v4f16
+ ; CHECK: %{{[0-9]+}}:fpr64 = FRINTPv4f16 %{{[0-9]+}}
+ liveins: $d0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %1:fpr(<4 x s16>) = G_FCEIL %0
+ $d0 = COPY %1(<4 x s16>)
+
+...
+---
+name: ceil_v8f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_v8f16
+ ; CHECK: %{{[0-9]+}}:fpr128 = FRINTPv8f16 %{{[0-9]+}}
+ liveins: $q0
+ %0:fpr(<8 x s16>) = COPY $q0
+ %1:fpr(<8 x s16>) = G_FCEIL %0
+ $q0 = COPY %1(<8 x s16>)
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir
new file mode 100644
index 00000000000..6814b993394
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir
@@ -0,0 +1,154 @@
+
+# RUN: llc -O0 -mattr=-fullfp16 -mtriple=aarch64-- \
+# RUN: -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ define <2 x double> @test_v2s64_unmerge(<2 x double> %a) {
+ ret <2 x double> %a
+ }
+
+ define <4 x float> @test_v4s32_unmerge(<4 x float> %a) {
+ ret <4 x float> %a
+ }
+
+ define <4 x half> @test_v4s16_unmerge(<4 x half> %a) {
+ ret <4 x half> %a
+ }
+
+ define <8 x half> @test_v8s16_unmerge(<8 x half> %a) {
+ ret <8 x half> %a
+ }
+
+...
+---
+name: test_v2s64_unmerge
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: fpr }
+ - { id: 3, class: fpr }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $q0
+ ; CHECK-LABEL: name: test_v2s64_unmerge
+ %0:fpr(<2 x s64>) = COPY $q0
+
+ ; Since 2 * 64 = 128, we can just directly copy.
+ ; CHECK: %2:fpr64 = COPY %0.dsub
+ ; CHECK: %3:fpr64 = CPYi64 %0, 1
+ %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %0(<2 x s64>)
+
+ %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64)
+ $q0 = COPY %1(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: test_v4s32_unmerge
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: fpr }
+ - { id: 3, class: fpr }
+ - { id: 4, class: fpr }
+ - { id: 5, class: fpr }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $q0
+ ; CHECK-LABEL: name: test_v4s32_unmerge
+ %0:fpr(<4 x s32>) = COPY $q0
+
+ ; Since 4 * 32 = 128, we can just directly copy.
+ ; CHECK: %2:fpr32 = COPY %0.ssub
+ ; CHECK: %3:fpr32 = CPYi32 %0, 1
+ ; CHECK: %4:fpr32 = CPYi32 %0, 2
+ ; CHECK: %5:fpr32 = CPYi32 %0, 3
+ %2:fpr(s32), %3:fpr(s32), %4:fpr(s32), %5:fpr(s32) = G_UNMERGE_VALUES %0(<4 x s32>)
+
+ %1:fpr(<4 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32), %5(s32)
+ $q0 = COPY %1(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: test_v4s16_unmerge
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: fpr }
+ - { id: 3, class: fpr }
+ - { id: 4, class: fpr }
+ - { id: 5, class: fpr }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $d0
+ ; CHECK-LABEL: name: test_v4s16_unmerge
+ %0:fpr(<4 x s16>) = COPY $d0
+
+ ; Since 4 * 16 != 128, we need to widen using implicit defs.
+ ; Note that we expect to reuse one of the INSERT_SUBREG results, as CPYi16
+ ; expects a lane > 0.
+ ; CHECK-DAG: [[IMPDEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INS_SHARED:%[0-9]+]]:fpr128 = INSERT_SUBREG [[IMPDEF1]], %0, %subreg.dsub
+ ; CHECK: [[IMPDEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INS2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[IMPDEF2]], %0, %subreg.dsub
+ ; CHECK: [[IMPDEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INS3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[IMPDEF3]], %0, %subreg.dsub
+ ; CHECK: %2:fpr16 = COPY [[INS_SHARED]].hsub
+ ; CHECK: %3:fpr16 = CPYi16 [[INS_SHARED]], 1
+ ; CHECK: %4:fpr16 = CPYi16 [[INS2]], 2
+ ; CHECK: %5:fpr16 = CPYi16 [[INS3]], 3
+ %2:fpr(s16), %3:fpr(s16), %4:fpr(s16), %5:fpr(s16) = G_UNMERGE_VALUES %0(<4 x s16>)
+
+ %1:fpr(<4 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16)
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+...
+---
+name: test_v8s16_unmerge
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: fpr }
+ - { id: 3, class: fpr }
+ - { id: 4, class: fpr }
+ - { id: 5, class: fpr }
+ - { id: 6, class: fpr }
+ - { id: 7, class: fpr }
+ - { id: 8, class: fpr }
+ - { id: 9, class: fpr }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $q0
+ ; CHECK-LABEL: name: test_v8s16_unmerge
+ %0:fpr(<8 x s16>) = COPY $q0
+
+ ; Since 8 * 16 = 128, we can just directly copy.
+ ; CHECK: %2:fpr16 = COPY %0.hsub
+ ; CHECK: %3:fpr16 = CPYi16 %0, 1
+ ; CHECK: %4:fpr16 = CPYi16 %0, 2
+ ; CHECK: %5:fpr16 = CPYi16 %0, 3
+ ; CHECK: %6:fpr16 = CPYi16 %0, 4
+ ; CHECK: %7:fpr16 = CPYi16 %0, 5
+ ; CHECK: %8:fpr16 = CPYi16 %0, 6
+ ; CHECK: %9:fpr16 = CPYi16 %0, 7
+ %2:fpr(s16), %3:fpr(s16), %4:fpr(s16), %5:fpr(s16), %6:fpr(s16), %7:fpr(s16), %8:fpr(s16), %9:fpr(s16) = G_UNMERGE_VALUES %0(<8 x s16>)
+
+ %1:fpr(<8 x s16>) = G_BUILD_VECTOR %2:fpr(s16), %3:fpr(s16), %4:fpr(s16), %5:fpr(s16), %6:fpr(s16), %7:fpr(s16), %8:fpr(s16), %9:fpr(s16)
+ $q0 = COPY %1(<8 x s16>)
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index 2d7976dd487..8680a10a929 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -3,6 +3,13 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \
+; RUN: -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
+; RUN: 2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-NOFP16,FALLBACK
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \
+; RUN: -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
+; RUN: 2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-FP16,FALLBACK
+
;;; Half vectors
%v4f16 = type <4 x half>
@@ -111,6 +118,12 @@ define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
; CHECK-FP16-NOT: fcvt
; CHECK-FP16: frintp.4h
; CHECK-FP16-NEXT: ret
+ ; FALLBACK-NOT: remark{{.*}}test_v4f16.ceil:
+ ; GISEL-LABEL: test_v4f16.ceil:
+ ; GISEL-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
+ ; GISEL-FP16-NOT: fcvt
+ ; GISEL-FP16: frintp.4h
+ ; GISEL-FP16-NEXT: ret
%1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
ret %v4f16 %1
}
@@ -268,6 +281,12 @@ define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
; CHECK-FP16-NOT: fcvt
; CHECK-FP16: frintp.8h
; CHECK-FP16-NEXT: ret
+ ; FALLBACK-NOT: remark{{.*}}test_v8f16.ceil:
+ ; GISEL-LABEL: test_v8f16.ceil:
+ ; GISEL-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
+ ; GISEL-FP16-NOT: fcvt
+ ; GISEL-FP16: frintp.8h
+ ; GISEL-FP16-NEXT: ret
%1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
ret %v8f16 %1
}
@@ -400,8 +419,11 @@ define %v2f32 @test_v2f32.floor(%v2f32 %a) {
ret %v2f32 %1
}
; CHECK-LABEL: test_v2f32.ceil:
+; FALLBACK-NOT: remark{{.*}}test_v2f32.ceil
+; GISEL-LABEL: test_v2f32.ceil:
define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
; CHECK: frintp.2s
+ ; GISEL: frintp.2s
%1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
ret %v2f32 %1
}
@@ -525,8 +547,11 @@ define %v4f32 @test_v4f32.floor(%v4f32 %a) {
ret %v4f32 %1
}
; CHECK: test_v4f32.ceil:
+; FALLBACK-NOT: remark{{.*}}test_v4f32.ceil
+; GISEL-LABEL: test_v4f32.ceil:
define %v4f32 @test_v4f32.ceil(%v4f32 %a) {
; CHECK: frintp.4s
+ ; GISEL: frintp.4s
%1 = call %v4f32 @llvm.ceil.v4f32(%v4f32 %a)
ret %v4f32 %1
}
@@ -649,8 +674,11 @@ define %v2f64 @test_v2f64.floor(%v2f64 %a) {
ret %v2f64 %1
}
; CHECK: test_v2f64.ceil:
+; FALLBACK-NOT: remark{{.*}}test_v2f64.ceil
+; GISEL-LABEL: test_v2f64.ceil:
define %v2f64 @test_v2f64.ceil(%v2f64 %a) {
; CHECK: frintp.2d
+ ; GISEL: frintp.2d
%1 = call %v2f64 @llvm.ceil.v2f64(%v2f64 %a)
ret %v2f64 %1
}
OpenPOWER on IntegriCloud