; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD @n = external local_unnamed_addr global i32, align 4 @arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 @arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 @res = external local_unnamed_addr global float, align 4 define float @baz() { ; CHECK-LABEL: @baz( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]] ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]] ; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]] ; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]] ; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]] ; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]] ; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]] ; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4 ; CHECK-NEXT: ret float [[ADD19_3]] ; ; THRESHOLD-LABEL: @baz( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]] ; THRESHOLD-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 ; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]] ; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]] ; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]] ; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]] ; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]] ; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]] ; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]] ; THRESHOLD-NEXT: store float [[ADD19_3]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[ADD19_3]] ; entry: %0 = load i32, i32* @n, align 4 %mul = mul nsw i32 %0, 3 %conv = sitofp i32 %mul to float %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 %mul4 = fmul fast float %2, %1 %add = fadd fast float %mul4, %conv %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 %mul4.1 = fmul fast float %4, %3 %add.1 = fadd fast float %mul4.1, %add %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 %mul4.2 = fmul fast float %6, %5 %add.2 = fadd fast float %mul4.2, %add.1 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 %mul4.3 = fmul fast float %8, %7 %add.3 = fadd fast float %mul4.3, %add.2 %add7 = fadd fast float %add.3, %conv %add19 = fadd fast float %mul4, %add7 %add19.1 = fadd fast float %mul4.1, %add19 %add19.2 = fadd fast float %mul4.2, %add19.1 %add19.3 = fadd fast float %mul4.3, %add19.2 store float %add19.3, float* @res, align 4 ret float %add19.3 } define float @bazz() { ; CHECK-LABEL: @bazz( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] ; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @bazz( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] ; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: %0 = load i32, i32* @n, align 4 %mul = mul nsw i32 %0, 3 %conv = sitofp i32 %mul to float %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 %mul4 = fmul fast float %2, %1 %add = fadd fast float %mul4, %conv %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 %mul4.1 = fmul fast float %4, %3 %add.1 = fadd fast float %mul4.1, %add %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 %mul4.2 = fmul fast float %6, %5 %add.2 = fadd fast float %mul4.2, %add.1 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 %mul4.3 = fmul fast float %8, %7 %add.3 = fadd fast float %mul4.3, %add.2 %mul5 = shl nsw i32 %0, 2 %conv6 = sitofp i32 %mul5 to float %add7 = fadd fast float %add.3, %conv6 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16 %mul18 = fmul fast float %10, %9 %add19 = fadd fast float %mul18, %add7 %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4 %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4 %mul18.1 = fmul fast float %12, %11 %add19.1 = fadd fast float %mul18.1, %add19 %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8 %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8 %mul18.2 = fmul fast float %14, %13 %add19.2 = fadd fast float %mul18.2, %add19.1 %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4 %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4 %mul18.3 = fmul fast float %16, %15 %add19.3 = fadd fast float %mul18.3, %add19.2 store float %add19.3, float* @res, align 4 ret float %add19.3 } define float @bazzz() { ; CHECK-LABEL: @bazzz( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 ; CHECK-NEXT: ret float [[TMP5]] ; ; THRESHOLD-LABEL: @bazzz( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[TMP5]] ; entry: %0 = load i32, i32* @n, align 4 %conv = sitofp i32 %0 to float %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 %mul = fmul fast float %2, %1 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 %mul.1 = fmul fast float %4, %3 %5 = fadd fast float %mul.1, %mul %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 %mul.2 = fmul fast float %7, %6 %8 = fadd fast float %mul.2, %5 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 %mul.3 = fmul fast float %10, %9 %11 = fadd fast float %mul.3, %8 %12 = fmul fast float %conv, %11 store float %12, float* @res, align 4 ret float %12 } define i32 @foo() { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; CHECK-NEXT: ret i32 [[CONV4]] ; ; THRESHOLD-LABEL: @foo( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; THRESHOLD-NEXT: ret i32 [[CONV4]] ; entry: %0 = load i32, i32* @n, align 4 %conv = sitofp i32 %0 to float %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 %mul = fmul fast float %2, %1 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 %mul.1 = fmul fast float %4, %3 %5 = fadd fast float %mul.1, %mul %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 %mul.2 = fmul fast float %7, %6 %8 = fadd fast float %mul.2, %5 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 %mul.3 = fmul fast float %10, %9 %11 = fadd fast float %mul.3, %8 %12 = fmul fast float %conv, %11 %conv4 = fptosi float %12 to i32 store i32 %conv4, i32* @n, align 4 ret i32 %conv4 } define float @bar() { ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 ; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 ; CHECK-NEXT: ret float [[TMP3]] ; ; THRESHOLD-LABEL: @bar( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 ; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[TMP3]] ; entry: %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 %mul = fmul fast float %1, %0 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 %mul3 = fmul fast float %3, %2 %cmp4 = fcmp fast ogt float %mul, %mul3 %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 %mul3.1 = fmul fast float %5, %4 %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1 %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 %mul3.2 = fmul fast float %7, %6 %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2 %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2 store float %max.0.mul3.2, float* @res, align 4 ret float %max.0.mul3.2 } define float @f(float* nocapture readonly %x) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 ; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 ; CHECK-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 ; CHECK-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 ; CHECK-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 ; CHECK-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 ; CHECK-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 ; CHECK-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 ; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 ; CHECK-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 ; CHECK-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 ; CHECK-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 ; CHECK-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 ; CHECK-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 ; CHECK-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 ; CHECK-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX10:%.*]] = fadd fast <16 x float> [[TMP1]], [[RDX_SHUF9]] ; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <16 x float> [[BIN_RDX10]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX12:%.*]] = fadd fast <16 x float> [[BIN_RDX10]], [[RDX_SHUF11]] ; CHECK-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <16 x float> [[BIN_RDX12]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX14:%.*]] = fadd fast <16 x float> [[BIN_RDX12]], [[RDX_SHUF13]] ; CHECK-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <16 x float> [[BIN_RDX14]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[OP_RDX]] ; ; THRESHOLD-LABEL: @f( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 ; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 ; THRESHOLD-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 ; THRESHOLD-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 ; THRESHOLD-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 ; THRESHOLD-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 ; THRESHOLD-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 ; THRESHOLD-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 ; THRESHOLD-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 ; THRESHOLD-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 ; THRESHOLD-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 ; THRESHOLD-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 ; THRESHOLD-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 ; THRESHOLD-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 ; THRESHOLD-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 ; THRESHOLD-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; THRESHOLD-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; THRESHOLD-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX10:%.*]] = fadd fast <16 x float> [[TMP1]], [[RDX_SHUF9]] ; THRESHOLD-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <16 x float> [[BIN_RDX10]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX12:%.*]] = fadd fast <16 x float> [[BIN_RDX10]], [[RDX_SHUF11]] ; THRESHOLD-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <16 x float> [[BIN_RDX12]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX14:%.*]] = fadd fast <16 x float> [[BIN_RDX12]], [[RDX_SHUF13]] ; THRESHOLD-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <16 x float> [[BIN_RDX14]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; THRESHOLD-NEXT: ret float [[OP_RDX]] ; entry: %0 = load float, float* %x, align 4 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1 %1 = load float, float* %arrayidx.1, align 4 %add.1 = fadd fast float %1, %0 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2 %2 = load float, float* %arrayidx.2, align 4 %add.2 = fadd fast float %2, %add.1 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3 %3 = load float, float* %arrayidx.3, align 4 %add.3 = fadd fast float %3, %add.2 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4 %4 = load float, float* %arrayidx.4, align 4 %add.4 = fadd fast float %4, %add.3 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5 %5 = load float, float* %arrayidx.5, align 4 %add.5 = fadd fast float %5, %add.4 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6 %6 = load float, float* %arrayidx.6, align 4 %add.6 = fadd fast float %6, %add.5 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7 %7 = load float, float* %arrayidx.7, align 4 %add.7 = fadd fast float %7, %add.6 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8 %8 = load float, float* %arrayidx.8, align 4 %add.8 = fadd fast float %8, %add.7 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9 %9 = load float, float* %arrayidx.9, align 4 %add.9 = fadd fast float %9, %add.8 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10 %10 = load float, float* %arrayidx.10, align 4 %add.10 = fadd fast float %10, %add.9 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11 %11 = load float, float* %arrayidx.11, align 4 %add.11 = fadd fast float %11, %add.10 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12 %12 = load float, float* %arrayidx.12, align 4 %add.12 = fadd fast float %12, %add.11 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13 %13 = load float, float* %arrayidx.13, align 4 %add.13 = fadd fast float %13, %add.12 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14 %14 = load float, float* %arrayidx.14, align 4 %add.14 = fadd fast float %14, %add.13 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15 %15 = load float, float* %arrayidx.15, align 4 %add.15 = fadd fast float %15, %add.14 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16 %16 = load float, float* %arrayidx.16, align 4 %add.16 = fadd fast float %16, %add.15 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17 %17 = load float, float* %arrayidx.17, align 4 %add.17 = fadd fast float %17, %add.16 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18 %18 = load float, float* %arrayidx.18, align 4 %add.18 = fadd fast float %18, %add.17 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19 %19 = load float, float* %arrayidx.19, align 4 %add.19 = fadd fast float %19, %add.18 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20 %20 = load float, float* %arrayidx.20, align 4 %add.20 = fadd fast float %20, %add.19 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21 %21 = load float, float* %arrayidx.21, align 4 %add.21 = fadd fast float %21, %add.20 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22 %22 = load float, float* %arrayidx.22, align 4 %add.22 = fadd fast float %22, %add.21 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23 %23 = load float, float* %arrayidx.23, align 4 %add.23 = fadd fast float %23, %add.22 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24 %24 = load float, float* %arrayidx.24, align 4 %add.24 = fadd fast float %24, %add.23 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25 %25 = load float, float* %arrayidx.25, align 4 %add.25 = fadd fast float %25, %add.24 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26 %26 = load float, float* %arrayidx.26, align 4 %add.26 = fadd fast float %26, %add.25 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27 %27 = load float, float* %arrayidx.27, align 4 %add.27 = fadd fast float %27, %add.26 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28 %28 = load float, float* %arrayidx.28, align 4 %add.28 = fadd fast float %28, %add.27 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29 %29 = load float, float* %arrayidx.29, align 4 %add.29 = fadd fast float %29, %add.28 %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30 %30 = load float, float* %arrayidx.30, align 4 %add.30 = fadd fast float %30, %add.29 %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31 %31 = load float, float* %arrayidx.31, align 4 %add.31 = fadd fast float %31, %add.30 %arrayidx.32 = getelementptr inbounds float, float* %x, i64 32 %32 = load float, float* %arrayidx.32, align 4 %add.32 = fadd fast float %32, %add.31 %arrayidx.33 = getelementptr inbounds float, float* %x, i64 33 %33 = load float, float* %arrayidx.33, align 4 %add.33 = fadd fast float %33, %add.32 %arrayidx.34 = getelementptr inbounds float, float* %x, i64 34 %34 = load float, float* %arrayidx.34, align 4 %add.34 = fadd fast float %34, %add.33 %arrayidx.35 = getelementptr inbounds float, float* %x, i64 35 %35 = load float, float* %arrayidx.35, align 4 %add.35 = fadd fast float %35, %add.34 %arrayidx.36 = getelementptr inbounds float, float* %x, i64 36 %36 = load float, float* %arrayidx.36, align 4 %add.36 = fadd fast float %36, %add.35 %arrayidx.37 = getelementptr inbounds float, float* %x, i64 37 %37 = load float, float* %arrayidx.37, align 4 %add.37 = fadd fast float %37, %add.36 %arrayidx.38 = getelementptr inbounds float, float* %x, i64 38 %38 = load float, float* %arrayidx.38, align 4 %add.38 = fadd fast float %38, %add.37 %arrayidx.39 = getelementptr inbounds float, float* %x, i64 39 %39 = load float, float* %arrayidx.39, align 4 %add.39 = fadd fast float %39, %add.38 %arrayidx.40 = getelementptr inbounds float, float* %x, i64 40 %40 = load float, float* %arrayidx.40, align 4 %add.40 = fadd fast float %40, %add.39 %arrayidx.41 = getelementptr inbounds float, float* %x, i64 41 %41 = load float, float* %arrayidx.41, align 4 %add.41 = fadd fast float %41, %add.40 %arrayidx.42 = getelementptr inbounds float, float* %x, i64 42 %42 = load float, float* %arrayidx.42, align 4 %add.42 = fadd fast float %42, %add.41 %arrayidx.43 = getelementptr inbounds float, float* %x, i64 43 %43 = load float, float* %arrayidx.43, align 4 %add.43 = fadd fast float %43, %add.42 %arrayidx.44 = getelementptr inbounds float, float* %x, i64 44 %44 = load float, float* %arrayidx.44, align 4 %add.44 = fadd fast float %44, %add.43 %arrayidx.45 = getelementptr inbounds float, float* %x, i64 45 %45 = load float, float* %arrayidx.45, align 4 %add.45 = fadd fast float %45, %add.44 %arrayidx.46 = getelementptr inbounds float, float* %x, i64 46 %46 = load float, float* %arrayidx.46, align 4 %add.46 = fadd fast float %46, %add.45 %arrayidx.47 = getelementptr inbounds float, float* %x, i64 47 %47 = load float, float* %arrayidx.47, align 4 %add.47 = fadd fast float %47, %add.46 ret float %add.47 } define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 ; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA]] ; ; THRESHOLD-LABEL: @f1( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 ; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; THRESHOLD-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA]] ; entry: %rem = srem i32 %a, %b %conv = sitofp i32 %rem to float %0 = load float, float* %x, align 4 %add = fadd fast float %0, %conv %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1 %1 = load float, float* %arrayidx.1, align 4 %add.1 = fadd fast float %1, %add %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2 %2 = load float, float* %arrayidx.2, align 4 %add.2 = fadd fast float %2, %add.1 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3 %3 = load float, float* %arrayidx.3, align 4 %add.3 = fadd fast float %3, %add.2 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4 %4 = load float, float* %arrayidx.4, align 4 %add.4 = fadd fast float %4, %add.3 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5 %5 = load float, float* %arrayidx.5, align 4 %add.5 = fadd fast float %5, %add.4 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6 %6 = load float, float* %arrayidx.6, align 4 %add.6 = fadd fast float %6, %add.5 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7 %7 = load float, float* %arrayidx.7, align 4 %add.7 = fadd fast float %7, %add.6 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8 %8 = load float, float* %arrayidx.8, align 4 %add.8 = fadd fast float %8, %add.7 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9 %9 = load float, float* %arrayidx.9, align 4 %add.9 = fadd fast float %9, %add.8 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10 %10 = load float, float* %arrayidx.10, align 4 %add.10 = fadd fast float %10, %add.9 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11 %11 = load float, float* %arrayidx.11, align 4 %add.11 = fadd fast float %11, %add.10 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12 %12 = load float, float* %arrayidx.12, align 4 %add.12 = fadd fast float %12, %add.11 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13 %13 = load float, float* %arrayidx.13, align 4 %add.13 = fadd fast float %13, %add.12 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14 %14 = load float, float* %arrayidx.14, align 4 %add.14 = fadd fast float %14, %add.13 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15 %15 = load float, float* %arrayidx.15, align 4 %add.15 = fadd fast float %15, %add.14 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16 %16 = load float, float* %arrayidx.16, align 4 %add.16 = fadd fast float %16, %add.15 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17 %17 = load float, float* %arrayidx.17, align 4 %add.17 = fadd fast float %17, %add.16 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18 %18 = load float, float* %arrayidx.18, align 4 %add.18 = fadd fast float %18, %add.17 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19 %19 = load float, float* %arrayidx.19, align 4 %add.19 = fadd fast float %19, %add.18 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20 %20 = load float, float* %arrayidx.20, align 4 %add.20 = fadd fast float %20, %add.19 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21 %21 = load float, float* %arrayidx.21, align 4 %add.21 = fadd fast float %21, %add.20 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22 %22 = load float, float* %arrayidx.22, align 4 %add.22 = fadd fast float %22, %add.21 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23 %23 = load float, float* %arrayidx.23, align 4 %add.23 = fadd fast float %23, %add.22 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24 %24 = load float, float* %arrayidx.24, align 4 %add.24 = fadd fast float %24, %add.23 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25 %25 = load float, float* %arrayidx.25, align 4 %add.25 = fadd fast float %25, %add.24 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26 %26 = load float, float* %arrayidx.26, align 4 %add.26 = fadd fast float %26, %add.25 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27 %27 = load float, float* %arrayidx.27, align 4 %add.27 = fadd fast float %27, %add.26 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28 %28 = load float, float* %arrayidx.28, align 4 %add.28 = fadd fast float %28, %add.27 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29 %29 = load float, float* %arrayidx.29, align 4 %add.29 = fadd fast float %29, %add.28 %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30 %30 = load float, float* %arrayidx.30, align 4 %add.30 = fadd fast float %30, %add.29 %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31 %31 = load float, float* %arrayidx.31, align 4 %add.31 = fadd fast float %31, %add.30 ret float %add.31 } define float @loadadd31(float* nocapture readonly %x) { ; CHECK-LABEL: @loadadd31( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <8 x float> [[TMP5]], [[RDX_SHUF7]] ; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <8 x float> [[BIN_RDX8]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX10:%.*]] = fadd fast <8 x float> [[BIN_RDX8]], [[RDX_SHUF9]] ; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[BIN_RDX10]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX12:%.*]] = fadd fast <8 x float> [[BIN_RDX10]], [[RDX_SHUF11]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[BIN_RDX12]], i32 0 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX14:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF13]] ; CHECK-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <4 x float> [[BIN_RDX14]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <4 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[BIN_RDX16]], i32 0 ; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] ; CHECK-NEXT: ret float [[TMP12]] ; ; THRESHOLD-LABEL: @loadadd31( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 ; THRESHOLD-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <8 x float> [[TMP5]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <8 x float> [[BIN_RDX8]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX10:%.*]] = fadd fast <8 x float> [[BIN_RDX8]], [[RDX_SHUF9]] ; THRESHOLD-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[BIN_RDX10]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX12:%.*]] = fadd fast <8 x float> [[BIN_RDX10]], [[RDX_SHUF11]] ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[BIN_RDX12]], i32 0 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] ; THRESHOLD-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX14:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF13]] ; THRESHOLD-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <4 x float> [[BIN_RDX14]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <4 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[BIN_RDX16]], i32 0 ; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] ; THRESHOLD-NEXT: ret float [[TMP12]] ; entry: %arrayidx = getelementptr inbounds float, float* %x, i64 1 %0 = load float, float* %arrayidx, align 4 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2 %1 = load float, float* %arrayidx.1, align 4 %add.1 = fadd fast float %1, %0 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3 %2 = load float, float* %arrayidx.2, align 4 %add.2 = fadd fast float %2, %add.1 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4 %3 = load float, float* %arrayidx.3, align 4 %add.3 = fadd fast float %3, %add.2 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5 %4 = load float, float* %arrayidx.4, align 4 %add.4 = fadd fast float %4, %add.3 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6 %5 = load float, float* %arrayidx.5, align 4 %add.5 = fadd fast float %5, %add.4 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7 %6 = load float, float* %arrayidx.6, align 4 %add.6 = fadd fast float %6, %add.5 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8 %7 = load float, float* %arrayidx.7, align 4 %add.7 = fadd fast float %7, %add.6 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9 %8 = load float, float* %arrayidx.8, align 4 %add.8 = fadd fast float %8, %add.7 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10 %9 = load float, float* %arrayidx.9, align 4 %add.9 = fadd fast float %9, %add.8 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11 %10 = load float, float* %arrayidx.10, align 4 %add.10 = fadd fast float %10, %add.9 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12 %11 = load float, float* %arrayidx.11, align 4 %add.11 = fadd fast float %11, %add.10 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13 %12 = load float, float* %arrayidx.12, align 4 %add.12 = fadd fast float %12, %add.11 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14 %13 = load float, float* %arrayidx.13, align 4 %add.13 = fadd fast float %13, %add.12 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15 %14 = load float, float* %arrayidx.14, align 4 %add.14 = fadd fast float %14, %add.13 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16 %15 = load float, float* %arrayidx.15, align 4 %add.15 = fadd fast float %15, %add.14 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17 %16 = load float, float* %arrayidx.16, align 4 %add.16 = fadd fast float %16, %add.15 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18 %17 = load float, float* %arrayidx.17, align 4 %add.17 = fadd fast float %17, %add.16 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19 %18 = load float, float* %arrayidx.18, align 4 %add.18 = fadd fast float %18, %add.17 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20 %19 = load float, float* %arrayidx.19, align 4 %add.19 = fadd fast float %19, %add.18 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21 %20 = load float, float* %arrayidx.20, align 4 %add.20 = fadd fast float %20, %add.19 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22 %21 = load float, float* %arrayidx.21, align 4 %add.21 = fadd fast float %21, %add.20 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23 %22 = load float, float* %arrayidx.22, align 4 %add.22 = fadd fast float %22, %add.21 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24 %23 = load float, float* %arrayidx.23, align 4 %add.23 = fadd fast float %23, %add.22 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25 %24 = load float, float* %arrayidx.24, align 4 %add.24 = fadd fast float %24, %add.23 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26 %25 = load float, float* %arrayidx.25, align 4 %add.25 = fadd fast float %25, %add.24 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27 %26 = load float, float* %arrayidx.26, align 4 %add.26 = fadd fast float %26, %add.25 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28 %27 = load float, float* %arrayidx.27, align 4 %add.27 = fadd fast float %27, %add.26 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29 %28 = load float, float* %arrayidx.28, align 4 %add.28 = fadd fast float %28, %add.27 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30 %29 = load float, float* %arrayidx.29, align 4 %add.29 = fadd fast float %29, %add.28 ret float %add.29 } define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-LABEL: @extra_args( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: %mul = mul nsw i32 %b, %a %conv = sitofp i32 %mul to float %0 = load float, float* %x, align 4 %add = fadd fast float %conv, 3.000000e+00 %add1 = fadd fast float %0, %add %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 %1 = load float, float* %arrayidx3, align 4 %add4 = fadd fast float %1, %add1 %add5 = fadd fast float %add4, %conv %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 %2 = load float, float* %arrayidx3.1, align 4 %add4.1 = fadd fast float %2, %add5 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 %3 = load float, float* %arrayidx3.2, align 4 %add4.2 = fadd fast float %3, %add4.1 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 %4 = load float, float* %arrayidx3.3, align 4 %add4.3 = fadd fast float %4, %add4.2 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 %5 = load float, float* %arrayidx3.4, align 4 %add4.4 = fadd fast float %5, %add4.3 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 %6 = load float, float* %arrayidx3.5, align 4 %add4.5 = fadd fast float %6, %add4.4 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 %7 = load float, float* %arrayidx3.6, align 4 %add4.6 = fadd fast float %7, %add4.5 ret float %add4.6 } define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-LABEL: @extra_args_same_several_times( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA7]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA7]] ; entry: %mul = mul nsw i32 %b, %a %conv = sitofp i32 %mul to float %0 = load float, float* %x, align 4 %add = fadd fast float %conv, 3.000000e+00 %add1 = fadd fast float %0, %add %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 %1 = load float, float* %arrayidx3, align 4 %add4 = fadd fast float %1, %add1 %add41 = fadd fast float %add4, 5.000000e+00 %add5 = fadd fast float %add41, %conv %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 %2 = load float, float* %arrayidx3.1, align 4 %add4.1 = fadd fast float %2, %add5 %add4.11 = fadd fast float %add4.1, 5.000000e+00 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 %3 = load float, float* %arrayidx3.2, align 4 %add4.2 = fadd fast float %3, %add4.11 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 %4 = load float, float* %arrayidx3.3, align 4 %add4.3 = fadd fast float %4, %add4.2 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 %5 = load float, float* %arrayidx3.4, align 4 %add4.4 = fadd fast float %5, %add4.3 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 %6 = load float, float* %arrayidx3.5, align 4 %add4.5 = fadd fast float %6, %add4.4 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 %7 = load float, float* %arrayidx3.6, align 4 %add4.6 = fadd fast float %7, %add4.5 ret float %add4.6 } define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @extra_args_no_replace( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: %mul = mul nsw i32 %b, %a %conv = sitofp i32 %mul to float %0 = load float, float* %x, align 4 %convc = sitofp i32 %c to float %addc = fadd fast float %convc, 3.000000e+00 %add = fadd fast float %conv, %addc %add1 = fadd fast float %0, %add %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 %1 = load float, float* %arrayidx3, align 4 %add4 = fadd fast float %1, %add1 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 %2 = load float, float* %arrayidx3.1, align 4 %add4.1 = fadd fast float %2, %add4 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 %3 = load float, float* %arrayidx3.2, align 4 %add4.2 = fadd fast float %3, %add4.1 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 %4 = load float, float* %arrayidx3.3, align 4 %add4.3 = fadd fast float %4, %add4.2 %add5 = fadd fast float %add4.3, %conv %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 %5 = load float, float* %arrayidx3.4, align 4 %add4.4 = fadd fast float %5, %add5 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 %6 = load float, float* %arrayidx3.5, align 4 %add4.5 = fadd fast float %6, %add4.4 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 %7 = load float, float* %arrayidx3.6, align 4 %add4.6 = fadd fast float %7, %add4.5 ret float %add4.6 } define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-LABEL: @wobble( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] ; CHECK-NEXT: ret i32 [[OP_EXTRA3]] ; ; THRESHOLD-LABEL: @wobble( ; THRESHOLD-NEXT: bb: ; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0 ; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 ; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 ; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 ; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 ; THRESHOLD-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]] ; bb: %x1 = xor i32 %arg, %bar %i1 = icmp eq i32 %x1, 0 %s1 = sext i1 %i1 to i32 %x2 = xor i32 %arg, %bar %i2 = icmp eq i32 %x2, 0 %s2 = sext i1 %i2 to i32 %x3 = xor i32 %arg, %bar %i3 = icmp eq i32 %x3, 0 %s3 = sext i1 %i3 to i32 %x4 = xor i32 %arg, %bar %i4 = icmp eq i32 %x4, 0 %s4 = sext i1 %i4 to i32 %r1 = add nuw i32 %arg, %s1 %r2 = add nsw i32 %r1, %s2 %r3 = add nsw i32 %r2, %s3 %r4 = add nsw i32 %r3, %s4 %r5 = add nsw i32 %r4, %x4 ret i32 %r5 }