diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-23 14:01:18 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-23 14:01:18 +0000 |
commit | 4e9b9cbee98e4df128cdcd5bb1a41d77dacbb480 (patch) | |
tree | cfefc28f1b756258e5f9712ee4fced82c09a0e34 /llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll | |
parent | 09375d98b8f44569e857a5953e897d35d381a2a9 (diff) | |
download | bcm5719-llvm-4e9b9cbee98e4df128cdcd5bb1a41d77dacbb480.tar.gz bcm5719-llvm-4e9b9cbee98e4df128cdcd5bb1a41d77dacbb480.zip |
[X86][AVX512] Add support for v4i64 fptosi/fptoui/sitofp/uitofp on AVX512DQ-only targets
Use 512-bit instructions with subvector insertion/extraction like we do in a number of similar circumstances
llvm-svn: 287762
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll | 98 |
1 files changed, 70 insertions, 28 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index e1efe63abb7..3d472bb2c20 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -39,20 +39,41 @@ define void @sitofp_2i64_2f64() #0 { } define void @sitofp_4i64_4f64() #0 { -; CHECK-LABEL: @sitofp_4i64_4f64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double -; CHECK-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double -; CHECK-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 -; CHECK-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_4i64_4f64( +; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double +; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double +; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double +; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: ret void +; +; AVX256-LABEL: @sitofp_4i64_4f64( +; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; AVX256-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; AVX256-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 +; AVX256-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double +; AVX256-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double +; AVX256-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double +; AVX256-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double +; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; AVX256-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; AVX256-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 +; AVX256-NEXT: ret void +; +; AVX512-LABEL: @sitofp_4i64_4f64( +; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double> +; AVX512-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 +; AVX512-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 @@ -559,20 +580,41 @@ define void @sitofp_2i64_2f32() #0 { } define void @sitofp_4i64_4f32() #0 { -; CHECK-LABEL: @sitofp_4i64_4f32( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float -; CHECK-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float -; CHECK-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float -; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; CHECK-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; CHECK-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_4i64_4f32( +; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float +; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float +; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float +; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float +; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 +; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 +; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 +; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 +; SSE-NEXT: ret void +; +; AVX256-LABEL: @sitofp_4i64_4f32( +; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; AVX256-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; AVX256-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 +; AVX256-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float +; AVX256-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float +; AVX256-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float +; AVX256-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float +; AVX256-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 +; AVX256-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 +; AVX256-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 +; AVX256-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 +; AVX256-NEXT: ret void +; +; AVX512-LABEL: @sitofp_4i64_4f32( +; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> +; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; AVX512-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 |