diff options
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/extract.ll | 49 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll | 139 |
2 files changed, 175 insertions, 13 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll index 9a6ee2afc8e..9f08f30587d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll @@ -1,13 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -;CHECK: fextr -;CHECK-NOT: insertelement -;CHECK-NOT: extractelement -;CHECK: fadd <2 x double> -;CHECK: ret void define void @fextr(double* %ptr) { +; CHECK-LABEL: @fextr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef +; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.100000e+00>, [[LD]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 +; CHECK-NEXT: ret void +; entry: %LD = load <2 x double>, <2 x double>* undef %V0 = extractelement <2 x double> %LD, i32 0 @@ -21,11 +26,20 @@ entry: ret void } -;CHECK: fextr1 -;CHECK: insertelement -;CHECK: insertelement -;CHECK: ret void define void @fextr1(double* %ptr) { +; CHECK-LABEL: @fextr1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef +; CHECK-NEXT: [[V0:%.*]] = extractelement <2 x double> [[LD]], i32 0 +; CHECK-NEXT: [[V1:%.*]] = extractelement <2 x double> [[LD]], i32 1 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> <double 3.400000e+00, double 1.200000e+00>, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; entry: %LD = load <2 x double>, <2 x double>* undef %V0 = extractelement <2 x double> %LD, i32 0 @@ -39,11 +53,20 @@ entry: ret void } -;CHECK: fextr2 -;CHECK: insertelement -;CHECK: insertelement -;CHECK: ret void define void @fextr2(double* %ptr) { +; CHECK-LABEL: @fextr2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, <4 x double>* undef +; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0 +; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1 +; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> <double 5.500000e+00, double 6.600000e+00>, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; entry: %LD = load <4 x double>, <4 x double>* undef %V0 = extractelement <4 x double> %LD, i32 0 ; <--- invalid size. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll new file mode 100644 index 00000000000..0af5e870ad1 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -reassociate -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s + +define signext i8 @Foo(<32 x i8>* %__v) { +; CHECK-LABEL: @Foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32 +; CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 0 +; CHECK-NEXT: [[VECEXT_I_I_1_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 1 +; CHECK-NEXT: [[ADD_I_1_I:%.*]] = add i8 [[VECEXT_I_I_1_I]], [[VECEXT_I_I_I]] +; CHECK-NEXT: [[VECEXT_I_I_2_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 2 +; CHECK-NEXT: [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], [[VECEXT_I_I_2_I]] +; CHECK-NEXT: [[VECEXT_I_I_3_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 3 +; CHECK-NEXT: [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], [[VECEXT_I_I_3_I]] +; CHECK-NEXT: [[VECEXT_I_I_4_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 4 +; CHECK-NEXT: [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], [[VECEXT_I_I_4_I]] +; CHECK-NEXT: [[VECEXT_I_I_5_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 5 +; CHECK-NEXT: [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], [[VECEXT_I_I_5_I]] +; CHECK-NEXT: [[VECEXT_I_I_6_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 6 +; CHECK-NEXT: [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], [[VECEXT_I_I_6_I]] +; CHECK-NEXT: [[VECEXT_I_I_7_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 7 +; CHECK-NEXT: [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], [[VECEXT_I_I_7_I]] +; CHECK-NEXT: [[VECEXT_I_I_8_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 8 +; CHECK-NEXT: [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], [[VECEXT_I_I_8_I]] +; CHECK-NEXT: [[VECEXT_I_I_9_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 9 +; CHECK-NEXT: [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], [[VECEXT_I_I_9_I]] +; CHECK-NEXT: [[VECEXT_I_I_10_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 10 +; CHECK-NEXT: [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], [[VECEXT_I_I_10_I]] +; CHECK-NEXT: [[VECEXT_I_I_11_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 11 +; CHECK-NEXT: [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], [[VECEXT_I_I_11_I]] +; CHECK-NEXT: [[VECEXT_I_I_12_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 12 +; CHECK-NEXT: [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], [[VECEXT_I_I_12_I]] +; CHECK-NEXT: [[VECEXT_I_I_13_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 13 +; CHECK-NEXT: [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], [[VECEXT_I_I_13_I]] +; CHECK-NEXT: [[VECEXT_I_I_14_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 14 +; CHECK-NEXT: [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], [[VECEXT_I_I_14_I]] +; CHECK-NEXT: [[VECEXT_I_I_15_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 15 +; CHECK-NEXT: [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], [[VECEXT_I_I_15_I]] +; CHECK-NEXT: [[VECEXT_I_I_16_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 16 +; CHECK-NEXT: [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], [[VECEXT_I_I_16_I]] +; CHECK-NEXT: [[VECEXT_I_I_17_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 17 +; CHECK-NEXT: [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], [[VECEXT_I_I_17_I]] +; CHECK-NEXT: [[VECEXT_I_I_18_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 18 +; CHECK-NEXT: [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], [[VECEXT_I_I_18_I]] +; CHECK-NEXT: [[VECEXT_I_I_19_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 19 +; CHECK-NEXT: [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], [[VECEXT_I_I_19_I]] +; CHECK-NEXT: [[VECEXT_I_I_20_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 20 +; CHECK-NEXT: [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], [[VECEXT_I_I_20_I]] +; CHECK-NEXT: [[VECEXT_I_I_21_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 21 +; CHECK-NEXT: [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], [[VECEXT_I_I_21_I]] +; CHECK-NEXT: [[VECEXT_I_I_22_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 22 +; CHECK-NEXT: [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], [[VECEXT_I_I_22_I]] +; CHECK-NEXT: [[VECEXT_I_I_23_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 23 +; CHECK-NEXT: [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], [[VECEXT_I_I_23_I]] +; CHECK-NEXT: [[VECEXT_I_I_24_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 24 +; CHECK-NEXT: [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], [[VECEXT_I_I_24_I]] +; CHECK-NEXT: [[VECEXT_I_I_25_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 25 +; CHECK-NEXT: [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], [[VECEXT_I_I_25_I]] +; CHECK-NEXT: [[VECEXT_I_I_26_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 26 +; CHECK-NEXT: [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], [[VECEXT_I_I_26_I]] +; CHECK-NEXT: [[VECEXT_I_I_27_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 27 +; CHECK-NEXT: [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], [[VECEXT_I_I_27_I]] +; CHECK-NEXT: [[VECEXT_I_I_28_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 28 +; CHECK-NEXT: [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], [[VECEXT_I_I_28_I]] +; CHECK-NEXT: [[VECEXT_I_I_29_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 29 +; CHECK-NEXT: [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], [[VECEXT_I_I_29_I]] +; CHECK-NEXT: [[VECEXT_I_I_30_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 30 +; CHECK-NEXT: [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], [[VECEXT_I_I_30_I]] +; CHECK-NEXT: [[VECEXT_I_I_31_I:%.*]] = extractelement <32 x i8> [[TMP0]], i64 31 +; CHECK-NEXT: [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], [[VECEXT_I_I_31_I]] +; CHECK-NEXT: ret i8 [[ADD_I_31_I]] +; +entry: + %0 = load <32 x i8>, <32 x i8>* %__v, align 32 + %vecext.i.i.i = extractelement <32 x i8> %0, i64 0 + %vecext.i.i.1.i = extractelement <32 x i8> %0, i64 1 + %add.i.1.i = add i8 %vecext.i.i.1.i, %vecext.i.i.i + %vecext.i.i.2.i = extractelement <32 x i8> %0, i64 2 + %add.i.2.i = add i8 %vecext.i.i.2.i, %add.i.1.i + %vecext.i.i.3.i = extractelement <32 x i8> %0, i64 3 + %add.i.3.i = add i8 %vecext.i.i.3.i, %add.i.2.i + %vecext.i.i.4.i = extractelement <32 x i8> %0, i64 4 + %add.i.4.i = add i8 %vecext.i.i.4.i, %add.i.3.i + %vecext.i.i.5.i = extractelement <32 x i8> %0, i64 5 + %add.i.5.i = add i8 %vecext.i.i.5.i, %add.i.4.i + %vecext.i.i.6.i = extractelement <32 x i8> %0, i64 6 + %add.i.6.i = add i8 %vecext.i.i.6.i, %add.i.5.i + %vecext.i.i.7.i = extractelement <32 x i8> %0, i64 7 + %add.i.7.i = add i8 %vecext.i.i.7.i, %add.i.6.i + %vecext.i.i.8.i = extractelement <32 x i8> %0, i64 8 + %add.i.8.i = add i8 %vecext.i.i.8.i, %add.i.7.i + %vecext.i.i.9.i = extractelement <32 x i8> %0, i64 9 + %add.i.9.i = add i8 %vecext.i.i.9.i, %add.i.8.i + %vecext.i.i.10.i = extractelement <32 x i8> %0, i64 10 + %add.i.10.i = add i8 %vecext.i.i.10.i, %add.i.9.i + %vecext.i.i.11.i = extractelement <32 x i8> %0, i64 11 + %add.i.11.i = add i8 %vecext.i.i.11.i, %add.i.10.i + %vecext.i.i.12.i = extractelement <32 x i8> %0, i64 12 + %add.i.12.i = add i8 %vecext.i.i.12.i, %add.i.11.i + %vecext.i.i.13.i = extractelement <32 x i8> %0, i64 13 + %add.i.13.i = add i8 %vecext.i.i.13.i, %add.i.12.i + %vecext.i.i.14.i = extractelement <32 x i8> %0, i64 14 + %add.i.14.i = add i8 %vecext.i.i.14.i, %add.i.13.i + %vecext.i.i.15.i = extractelement <32 x i8> %0, i64 15 + %add.i.15.i = add i8 %vecext.i.i.15.i, %add.i.14.i + %vecext.i.i.16.i = extractelement <32 x i8> %0, i64 16 + %add.i.16.i = add i8 %vecext.i.i.16.i, %add.i.15.i + %vecext.i.i.17.i = extractelement <32 x i8> %0, i64 17 + %add.i.17.i = add i8 %vecext.i.i.17.i, %add.i.16.i + %vecext.i.i.18.i = extractelement <32 x i8> %0, i64 18 + %add.i.18.i = add i8 %vecext.i.i.18.i, %add.i.17.i + %vecext.i.i.19.i = extractelement <32 x i8> %0, i64 19 + %add.i.19.i = add i8 %vecext.i.i.19.i, %add.i.18.i + %vecext.i.i.20.i = extractelement <32 x i8> %0, i64 20 + %add.i.20.i = add i8 %vecext.i.i.20.i, %add.i.19.i + %vecext.i.i.21.i = extractelement <32 x i8> %0, i64 21 + %add.i.21.i = add i8 %vecext.i.i.21.i, %add.i.20.i + %vecext.i.i.22.i = extractelement <32 x i8> %0, i64 22 + %add.i.22.i = add i8 %vecext.i.i.22.i, %add.i.21.i + %vecext.i.i.23.i = extractelement <32 x i8> %0, i64 23 + %add.i.23.i = add i8 %vecext.i.i.23.i, %add.i.22.i + %vecext.i.i.24.i = extractelement <32 x i8> %0, i64 24 + %add.i.24.i = add i8 %vecext.i.i.24.i, %add.i.23.i + %vecext.i.i.25.i = extractelement <32 x i8> %0, i64 25 + %add.i.25.i = add i8 %vecext.i.i.25.i, %add.i.24.i + %vecext.i.i.26.i = extractelement <32 x i8> %0, i64 26 + %add.i.26.i = add i8 %vecext.i.i.26.i, %add.i.25.i + %vecext.i.i.27.i = extractelement <32 x i8> %0, i64 27 + %add.i.27.i = add i8 %vecext.i.i.27.i, %add.i.26.i + %vecext.i.i.28.i = extractelement <32 x i8> %0, i64 28 + %add.i.28.i = add i8 %vecext.i.i.28.i, %add.i.27.i + %vecext.i.i.29.i = extractelement <32 x i8> %0, i64 29 + %add.i.29.i = add i8 %vecext.i.i.29.i, %add.i.28.i + %vecext.i.i.30.i = extractelement <32 x i8> %0, i64 30 + %add.i.30.i = add i8 %vecext.i.i.30.i, %add.i.29.i + %vecext.i.i.31.i = extractelement <32 x i8> %0, i64 31 + %add.i.31.i = add i8 %vecext.i.i.31.i, %add.i.30.i + ret i8 %add.i.31.i +} |