diff options
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer')
5 files changed, 137 insertions, 133 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index dda8a0c3071..76f41647c62 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -31,50 +31,54 @@ define void @PR28330(i32 %n) { ; ; GATHER-LABEL: @PR28330( ; GATHER-NEXT: entry: -; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer -; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 -; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 -; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0 -; GATHER-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 -; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 -; GATHER-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 -; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0 -; GATHER-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 -; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0 -; GATHER-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 -; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; GATHER-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1 +; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80> -; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]] -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]] -; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] -; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]] -; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] -; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] -; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] -; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 -; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1 -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3 -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4 -; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5 -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6 -; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7 -; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]]) -; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]] -; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] +; GATHER-NEXT: [[TMPP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 +; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1 +; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2 +; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3 +; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4 +; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5 +; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6 +; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 +; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> +; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 +; GATHER-NEXT: [[TMPP20:%.*]] = add i32 [[TMPP17]], [[TMP19]] +; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 +; GATHER-NEXT: [[TMPP22:%.*]] = add i32 [[TMPP20]], [[TMP20]] +; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 +; GATHER-NEXT: [[TMPP24:%.*]] = add i32 [[TMPP22]], [[TMP21]] +; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 +; GATHER-NEXT: [[TMPP26:%.*]] = add i32 [[TMPP24]], [[TMP22]] +; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 +; GATHER-NEXT: [[TMPP28:%.*]] = add i32 [[TMPP26]], [[TMP23]] +; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 +; GATHER-NEXT: [[TMPP30:%.*]] = add i32 [[TMPP28]], [[TMP24]] +; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 +; GATHER-NEXT: [[TMPP32:%.*]] = add i32 [[TMPP30]], [[TMP25]] +; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 +; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 +; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 +; GATHER-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3 +; GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP23]], i32 4 +; GATHER-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP24]], i32 5 +; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6 +; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7 +; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 +; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]]) +; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[TMPP17]] +; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMPP32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR28330( @@ -179,50 +183,54 @@ define void @PR32038(i32 %n) { ; ; GATHER-LABEL: @PR32038( ; GATHER-NEXT: entry: -; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer -; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 -; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 -; GATHER-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0 -; GATHER-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 -; GATHER-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 -; GATHER-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 -; GATHER-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0 -; GATHER-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 -; GATHER-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0 -; GATHER-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 -; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; GATHER-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1 +; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80> -; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP3]] -; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]] -; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] -; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]] -; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] -; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] -; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] -; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 -; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1 -; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3 -; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4 -; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5 -; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6 -; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7 -; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]]) -; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], -5 -; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] +; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 +; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1 +; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2 +; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3 +; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4 +; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5 +; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6 +; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 +; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> +; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 +; GATHER-NEXT: [[TMPP20:%.*]] = add i32 -5, [[TMP19]] +; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 +; GATHER-NEXT: [[TMPP22:%.*]] = add i32 [[TMPP20]], [[TMP20]] +; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 +; GATHER-NEXT: [[TMPP24:%.*]] = add i32 [[TMPP22]], [[TMP21]] +; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 +; GATHER-NEXT: [[TMPP26:%.*]] = add i32 [[TMPP24]], [[TMP22]] +; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 +; GATHER-NEXT: [[TMPP28:%.*]] = add i32 [[TMPP26]], [[TMP23]] +; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 +; GATHER-NEXT: [[TMPP30:%.*]] = add i32 [[TMPP28]], [[TMP24]] +; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 +; GATHER-NEXT: [[TMPP32:%.*]] = add i32 [[TMPP30]], [[TMP25]] +; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 +; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 +; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 +; GATHER-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3 +; GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP23]], i32 4 +; GATHER-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP24]], i32 5 +; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6 +; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7 +; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 +; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]]) +; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5 +; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMPP32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 750a44736c9..e26eeec6308 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -387,14 +387,14 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; to do this backwards this backwards define <4 x i32> @reconstruct(<4 x i32> %c) #0 { ; CHECK-LABEL: @reconstruct( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0 -; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1 -; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2 -; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0 +; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1 +; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2 +; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3 ; CHECK-NEXT: ret <4 x i32> [[RD]] ; ; ZEROTHRESH-LABEL: @reconstruct( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index 557a83a7562..ee2875035c9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -12,19 +12,20 @@ ; CHECK-LABEL: @fn1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1), align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 2), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 8, i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> <i32 6, i32 0, i32 0, i32 0> -; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1) to <2 x i32>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 8, i32 3 +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP12]], <4 x i32> <i32 6, i32 0, i32 0, i32 0> +; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sign-extend.ll b/llvm/test/Transforms/SLPVectorizer/X86/sign-extend.ll index 21f17f310ab..360ce6ab076 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sign-extend.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sign-extend.ll @@ -34,18 +34,15 @@ entry: define <4 x i16> @truncate_v_v(<4 x i32> %lhs) { ; CHECK-LABEL: @truncate_v_v( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[LHS:%.*]], i32 0 -; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[VECEXT]] to i16 -; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i16> undef, i16 [[CONV]], i32 0 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[LHS]], i32 1 -; CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[VECEXT1]] to i16 -; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i16> [[VECINIT]], i16 [[CONV2]], i32 1 -; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[LHS]], i32 2 -; CHECK-NEXT: [[CONV5:%.*]] = trunc i32 [[VECEXT4]] to i16 -; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i16> [[VECINIT3]], i16 [[CONV5]], i32 2 -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[LHS]], i32 3 -; CHECK-NEXT: [[CONV8:%.*]] = trunc i32 [[VECEXT7]] to i16 -; CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <4 x i16> [[VECINIT6]], i16 [[CONV8]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[LHS:%.*]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i16> undef, i16 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i16> [[VECINIT]], i16 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i16> [[VECINIT3]], i16 [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3 +; CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <4 x i16> [[VECINIT6]], i16 [[TMP4]], i32 3 ; CHECK-NEXT: ret <4 x i16> [[VECINIT9]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll index 7558c724a15..c2f4b981af9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -11,34 +11,32 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb279: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> undef, float undef, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1 ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP13:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP1]], [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: -; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], undef ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: ; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> undef, [[TMP6]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP7]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: -; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]] -; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> -; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float -; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0 -; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double> +; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> undef, [[TMP11]] +; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; bb279: |