summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-05 05:50:51 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-05 05:50:51 +0000
commitdb8467ae26835b57cf1f9a494c8d9e91d570e92b (patch)
tree76dd1d7dbee050f7c4baa608a49f651f5c6f6b31
parentfde4a454efb5b6766b479fa5607945846ac714ef (diff)
downloadbcm5719-llvm-db8467ae26835b57cf1f9a494c8d9e91d570e92b.tar.gz
bcm5719-llvm-db8467ae26835b57cf1f9a494c8d9e91d570e92b.zip
[AVX-512] Teach fast isel to handle 512-bit vector bitcasts.
llvm-svn: 288641
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp10
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll244
2 files changed, 252 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 44d918f3786..53e6ab6e359 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3563,8 +3563,14 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
if (!SrcVT.isSimple() || !DstVT.isSimple())
return false;
- if (!SrcVT.is128BitVector() &&
- !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+ MVT SVT = SrcVT.getSimpleVT();
+ MVT DVT = DstVT.getSimpleVT();
+
+ if (!SVT.is128BitVector() &&
+ !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
+ !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
+ (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
+ DVT.getScalarSizeInBits() >= 32))))
return false;
unsigned Reg = getRegForValue(I->getOperand(0));
diff --git a/llvm/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll b/llvm/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll
new file mode 100644
index 00000000000..7b81be3fc43
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll
@@ -0,0 +1,244 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+;
+; Bitcasts between 512-bit vector types are no-ops since no instruction is
+; needed for the conversion.
+
+define <8 x i64> @v16i32_to_v8i64(<16 x i32> %a) {
+;CHECK-LABEL: v16i32_to_v8i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i32> %a to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @v32i16_to_v8i64(<32 x i16> %a) {
+;CHECK-LABEL: v32i16_to_v8i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i16> %a to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @v64i8_to_v8i64(<64 x i8> %a) {
+;CHECK-LABEL: v64i8_to_v8i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <64 x i8> %a to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @v8f64_to_v8i64(<8 x double> %a) {
+;CHECK-LABEL: v8f64_to_v8i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x double> %a to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @v16f32_to_v8i64(<16 x float> %a) {
+;CHECK-LABEL: v16f32_to_v8i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x float> %a to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <16 x i32> @v8i64_to_v16i32(<8 x i64> %a) {
+;CHECK-LABEL: v8i64_to_v16i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i64> %a to <16 x i32>
+ ret <16 x i32> %1
+}
+
+define <16 x i32> @v32i16_to_v16i32(<32 x i16> %a) {
+;CHECK-LABEL: v32i16_to_v16i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i16> %a to <16 x i32>
+ ret <16 x i32> %1
+}
+
+define <16 x i32> @v64i8_to_v16i32(<64 x i8> %a) {
+;CHECK-LABEL: v64i8_to_v16i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <64 x i8> %a to <16 x i32>
+ ret <16 x i32> %1
+}
+
+define <16 x i32> @v8f64_to_v16i32(<8 x double> %a) {
+;CHECK-LABEL: v8f64_to_v16i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x double> %a to <16 x i32>
+ ret <16 x i32> %1
+}
+
+define <16 x i32> @v16f32_to_v16i32(<16 x float> %a) {
+;CHECK-LABEL: v16f32_to_v16i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x float> %a to <16 x i32>
+ ret <16 x i32> %1
+}
+
+define <32 x i16> @v8i64_to_v32i16(<8 x i64> %a) {
+;CHECK-LABEL: v8i64_to_v32i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i64> %a to <32 x i16>
+ ret <32 x i16> %1
+}
+
+define <32 x i16> @v16i32_to_v32i16(<16 x i32> %a) {
+;CHECK-LABEL: v16i32_to_v32i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i32> %a to <32 x i16>
+ ret <32 x i16> %1
+}
+
+define <32 x i16> @v64i8_to_v32i16(<64 x i8> %a) {
+;CHECK-LABEL: v64i8_to_v32i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <64 x i8> %a to <32 x i16>
+ ret <32 x i16> %1
+}
+
+define <32 x i16> @v8f64_to_v32i16(<8 x double> %a) {
+;CHECK-LABEL: v8f64_to_v32i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x double> %a to <32 x i16>
+ ret <32 x i16> %1
+}
+
+define <32 x i16> @v16f32_to_v32i16(<16 x float> %a) {
+;CHECK-LABEL: v16f32_to_v32i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x float> %a to <32 x i16>
+ ret <32 x i16> %1
+}
+
+define <64 x i8> @v32i16_to_v64i8(<32 x i16> %a) {
+;CHECK-LABEL: v32i16_to_v64i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i16> %a to <64 x i8>
+ ret <64 x i8> %1
+}
+
+define <64 x i8> @v8i64_to_v64i8(<8 x i64> %a) {
+;CHECK-LABEL: v8i64_to_v64i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i64> %a to <64 x i8>
+ ret <64 x i8> %1
+}
+
+define <64 x i8> @v16i32_to_v64i8(<16 x i32> %a) {
+;CHECK-LABEL: v16i32_to_v64i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i32> %a to <64 x i8>
+ ret <64 x i8> %1
+}
+
+define <64 x i8> @v8f64_to_v64i8(<8 x double> %a) {
+;CHECK-LABEL: v8f64_to_v64i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x double> %a to <64 x i8>
+ ret <64 x i8> %1
+}
+
+define <64 x i8> @v16f32_to_v64i8(<16 x float> %a) {
+;CHECK-LABEL: v16f32_to_v64i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x float> %a to <64 x i8>
+ ret <64 x i8> %1
+}
+
+define <16 x float> @v64i8_to_v16f32(<64 x i8> %a) {
+;CHECK-LABEL: v64i8_to_v16f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <64 x i8> %a to <16 x float>
+ ret <16 x float> %1
+}
+
+define <16 x float> @v32i16_to_v16f32(<32 x i16> %a) {
+;CHECK-LABEL: v32i16_to_v16f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i16> %a to <16 x float>
+ ret <16 x float> %1
+}
+
+define <16 x float> @v8i64_to_v16f32(<8 x i64> %a) {
+;CHECK-LABEL: v8i64_to_v16f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i64> %a to <16 x float>
+ ret <16 x float> %1
+}
+
+define <16 x float> @v16i32_to_v16f32(<16 x i32> %a) {
+;CHECK-LABEL: v16i32_to_v16f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i32> %a to <16 x float>
+ ret <16 x float> %1
+}
+
+define <16 x float> @v8f64_to_v16f32(<8 x double> %a) {
+;CHECK-LABEL: v8f64_to_v16f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x double> %a to <16 x float>
+ ret <16 x float> %1
+}
+
+define <8 x double> @v16f32_to_v8f64(<16 x float> %a) {
+;CHECK-LABEL: v16f32_to_v8f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x float> %a to <8 x double>
+ ret <8 x double> %1
+}
+
+define <8 x double> @v64i8_to_v8f64(<64 x i8> %a) {
+;CHECK-LABEL: v64i8_to_v8f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <64 x i8> %a to <8 x double>
+ ret <8 x double> %1
+}
+
+define <8 x double> @v32i16_to_v8f64(<32 x i16> %a) {
+;CHECK-LABEL: v32i16_to_v8f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i16> %a to <8 x double>
+ ret <8 x double> %1
+}
+
+define <8 x double> @v8i64_to_v8f64(<8 x i64> %a) {
+;CHECK-LABEL: v8i64_to_v8f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i64> %a to <8 x double>
+ ret <8 x double> %1
+}
+
+define <8 x double> @v16i32_to_v8f64(<16 x i32> %a) {
+;CHECK-LABEL: v16i32_to_v8f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i32> %a to <8 x double>
+ ret <8 x double> %1
+}
OpenPOWER on IntegriCloud