diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-10-20 15:17:27 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-10-20 15:17:27 +0000 |
commit | 0c35aa114d34c4f8add2a532de3a797ef0c1b667 (patch) | |
tree | ff525ad259713faec1e43be5fa0fc1d8e5e689b9 | |
parent | 007416acc80d3436aadf9c82c3b6696333ffad34 (diff) | |
download | bcm5719-llvm-0c35aa114d34c4f8add2a532de3a797ef0c1b667.tar.gz bcm5719-llvm-0c35aa114d34c4f8add2a532de3a797ef0c1b667.zip |
[SLPVectorizer][X86] Add mul/and/or/xor unrolled reduction tests
We miss arithmetic reduction for everything but Add/FAdd (I assume because that's the only cases which x86 has horizontal ops for.....)
llvm-svn: 344849
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll | 358 |
1 files changed, 351 insertions, 7 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll index 8ee37df4e90..b5a96025764 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -3,19 +3,19 @@ ; RUN: opt -slp-vectorizer -slp-vectorize-hor -S -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -debug < %s 2>&1 | FileCheck --check-prefix=SSE2 %s ; REQUIRES: asserts -; int test(unsigned int *p) { -; int sum = 0; +; int test_add(unsigned int *p) { +; int result = 0; ; for (int i = 0; i < 8; i++) -; sum += p[i]; -; return sum; +; result += p[i]; +; return result; ; } ; Vector cost is 5, Scalar cost is 7 ; CHECK: Adding cost -2 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) ; Vector cost is 11, Scalar cost is 7 ; SSE2: Adding cost 4 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) -define i32 @test(i32* nocapture readonly %p) { -; CHECK-LABEL: @test( +define i32 @test_add(i32* nocapture readonly %p) { +; CHECK-LABEL: @test_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 @@ -42,7 +42,7 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; -; SSE2-LABEL: @test( +; SSE2-LABEL: @test_add( ; SSE2-NEXT: entry: ; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 @@ -94,3 +94,347 @@ entry: %mul.714 = add i32 %7, %mul.613 ret i32 %mul.714 } + +; int test_mul(unsigned int *p) { +; int result = 0; +; for (int i = 0; i < 8; i++) +; result *= p[i]; +; return result; +; } + +define i32 @test_mul(i32* nocapture readonly %p) { +; CHECK-LABEL: @test_mul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = mul i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[MUL_29:%.*]] = mul i32 [[TMP2]], [[MUL_18]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[MUL_310:%.*]] = mul i32 [[TMP3]], [[MUL_29]] +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[MUL_411:%.*]] = mul i32 [[TMP4]], [[MUL_310]] +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[MUL_512:%.*]] = mul i32 [[TMP5]], [[MUL_411]] +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[MUL_613:%.*]] = mul i32 [[TMP6]], [[MUL_512]] +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[MUL_714:%.*]] = mul i32 [[TMP7]], [[MUL_613]] +; CHECK-NEXT: ret i32 [[MUL_714]] +; +; SSE2-LABEL: @test_mul( +; SSE2-NEXT: entry: +; SSE2-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; SSE2-NEXT: [[MUL_18:%.*]] = mul i32 [[TMP1]], [[TMP0]] +; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; SSE2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; SSE2-NEXT: [[MUL_29:%.*]] = mul i32 [[TMP2]], [[MUL_18]] +; SSE2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; SSE2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; SSE2-NEXT: [[MUL_310:%.*]] = mul i32 [[TMP3]], [[MUL_29]] +; SSE2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; SSE2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; SSE2-NEXT: [[MUL_411:%.*]] = mul i32 [[TMP4]], [[MUL_310]] +; SSE2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; SSE2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; SSE2-NEXT: [[MUL_512:%.*]] = mul i32 [[TMP5]], [[MUL_411]] +; SSE2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; SSE2-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; SSE2-NEXT: [[MUL_613:%.*]] = mul i32 [[TMP6]], [[MUL_512]] +; SSE2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; SSE2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; SSE2-NEXT: [[MUL_714:%.*]] = mul i32 [[TMP7]], [[MUL_613]] +; SSE2-NEXT: ret i32 [[MUL_714]] +; +entry: + %0 = load i32, i32* %p, align 4 + %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 + %1 = load i32, i32* %arrayidx.1, align 4 + %mul.18 = mul i32 %1, %0 + %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2 + %2 = load i32, i32* %arrayidx.2, align 4 + %mul.29 = mul i32 %2, %mul.18 + %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3 + %3 = load i32, i32* %arrayidx.3, align 4 + %mul.310 = mul i32 %3, %mul.29 + %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4 + %4 = load i32, i32* %arrayidx.4, align 4 + %mul.411 = mul i32 %4, %mul.310 + %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5 + %5 = load i32, i32* %arrayidx.5, align 4 + %mul.512 = mul i32 %5, %mul.411 + %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6 + %6 = load i32, i32* %arrayidx.6, align 4 + %mul.613 = mul i32 %6, %mul.512 + %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7 + %7 = load i32, i32* %arrayidx.7, align 4 + %mul.714 = mul i32 %7, %mul.613 + ret i32 %mul.714 +} + +; int test_and(unsigned int *p) { +; int result = 0; +; for (int i = 0; i < 8; i++) +; result &= p[i]; +; return result; +; } + +define i32 @test_and(i32* nocapture readonly %p) { +; CHECK-LABEL: @test_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = and i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[MUL_29:%.*]] = and i32 [[TMP2]], [[MUL_18]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[MUL_310:%.*]] = and i32 [[TMP3]], [[MUL_29]] +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[MUL_411:%.*]] = and i32 [[TMP4]], [[MUL_310]] +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[MUL_512:%.*]] = and i32 [[TMP5]], [[MUL_411]] +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[MUL_613:%.*]] = and i32 [[TMP6]], [[MUL_512]] +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[MUL_714:%.*]] = and i32 [[TMP7]], [[MUL_613]] +; CHECK-NEXT: ret i32 [[MUL_714]] +; +; SSE2-LABEL: @test_and( +; SSE2-NEXT: entry: +; SSE2-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; SSE2-NEXT: [[MUL_18:%.*]] = and i32 [[TMP1]], [[TMP0]] +; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; SSE2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; SSE2-NEXT: [[MUL_29:%.*]] = and i32 [[TMP2]], [[MUL_18]] +; SSE2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; SSE2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; SSE2-NEXT: [[MUL_310:%.*]] = and i32 [[TMP3]], [[MUL_29]] +; SSE2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; SSE2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; SSE2-NEXT: [[MUL_411:%.*]] = and i32 [[TMP4]], [[MUL_310]] +; SSE2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; SSE2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; SSE2-NEXT: [[MUL_512:%.*]] = and i32 [[TMP5]], [[MUL_411]] +; SSE2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; SSE2-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; SSE2-NEXT: [[MUL_613:%.*]] = and i32 [[TMP6]], [[MUL_512]] +; SSE2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; SSE2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; SSE2-NEXT: [[MUL_714:%.*]] = and i32 [[TMP7]], [[MUL_613]] +; SSE2-NEXT: ret i32 [[MUL_714]] +; +entry: + %0 = load i32, i32* %p, align 4 + %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 + %1 = load i32, i32* %arrayidx.1, align 4 + %mul.18 = and i32 %1, %0 + %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2 + %2 = load i32, i32* %arrayidx.2, align 4 + %mul.29 = and i32 %2, %mul.18 + %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3 + %3 = load i32, i32* %arrayidx.3, align 4 + %mul.310 = and i32 %3, %mul.29 + %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4 + %4 = load i32, i32* %arrayidx.4, align 4 + %mul.411 = and i32 %4, %mul.310 + %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5 + %5 = load i32, i32* %arrayidx.5, align 4 + %mul.512 = and i32 %5, %mul.411 + %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6 + %6 = load i32, i32* %arrayidx.6, align 4 + %mul.613 = and i32 %6, %mul.512 + %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7 + %7 = load i32, i32* %arrayidx.7, align 4 + %mul.714 = and i32 %7, %mul.613 + ret i32 %mul.714 +} + +; int test_or(unsigned int *p) { +; int result = 0; +; for (int i = 0; i < 8; i++) +; result |= p[i]; +; return result; +; } + +define i32 @test_or(i32* nocapture readonly %p) { +; CHECK-LABEL: @test_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = or i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[MUL_29:%.*]] = or i32 [[TMP2]], [[MUL_18]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[MUL_310:%.*]] = or i32 [[TMP3]], [[MUL_29]] +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[MUL_411:%.*]] = or i32 [[TMP4]], [[MUL_310]] +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[MUL_512:%.*]] = or i32 [[TMP5]], [[MUL_411]] +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[MUL_613:%.*]] = or i32 [[TMP6]], [[MUL_512]] +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[MUL_714:%.*]] = or i32 [[TMP7]], [[MUL_613]] +; CHECK-NEXT: ret i32 [[MUL_714]] +; +; SSE2-LABEL: @test_or( +; SSE2-NEXT: entry: +; SSE2-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; SSE2-NEXT: [[MUL_18:%.*]] = or i32 [[TMP1]], [[TMP0]] +; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; SSE2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; SSE2-NEXT: [[MUL_29:%.*]] = or i32 [[TMP2]], [[MUL_18]] +; SSE2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; SSE2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; SSE2-NEXT: [[MUL_310:%.*]] = or i32 [[TMP3]], [[MUL_29]] +; SSE2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; SSE2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; SSE2-NEXT: [[MUL_411:%.*]] = or i32 [[TMP4]], [[MUL_310]] +; SSE2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; SSE2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; SSE2-NEXT: [[MUL_512:%.*]] = or i32 [[TMP5]], [[MUL_411]] +; SSE2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; SSE2-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; SSE2-NEXT: [[MUL_613:%.*]] = or i32 [[TMP6]], [[MUL_512]] +; SSE2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; SSE2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; SSE2-NEXT: [[MUL_714:%.*]] = or i32 [[TMP7]], [[MUL_613]] +; SSE2-NEXT: ret i32 [[MUL_714]] +; +entry: + %0 = load i32, i32* %p, align 4 + %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 + %1 = load i32, i32* %arrayidx.1, align 4 + %mul.18 = or i32 %1, %0 + %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2 + %2 = load i32, i32* %arrayidx.2, align 4 + %mul.29 = or i32 %2, %mul.18 + %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3 + %3 = load i32, i32* %arrayidx.3, align 4 + %mul.310 = or i32 %3, %mul.29 + %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4 + %4 = load i32, i32* %arrayidx.4, align 4 + %mul.411 = or i32 %4, %mul.310 + %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5 + %5 = load i32, i32* %arrayidx.5, align 4 + %mul.512 = or i32 %5, %mul.411 + %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6 + %6 = load i32, i32* %arrayidx.6, align 4 + %mul.613 = or i32 %6, %mul.512 + %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7 + %7 = load i32, i32* %arrayidx.7, align 4 + %mul.714 = or i32 %7, %mul.613 + ret i32 %mul.714 +} + +; int test_xor(unsigned int *p) { +; int result = 0; +; for (int i = 0; i < 8; i++) +; result ^= p[i]; +; return result; +; } + +define i32 @test_xor(i32* nocapture readonly %p) { +; CHECK-LABEL: @test_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = xor i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[MUL_29:%.*]] = xor i32 [[TMP2]], [[MUL_18]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[MUL_310:%.*]] = xor i32 [[TMP3]], [[MUL_29]] +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[MUL_411:%.*]] = xor i32 [[TMP4]], [[MUL_310]] +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[MUL_512:%.*]] = xor i32 [[TMP5]], [[MUL_411]] +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[MUL_613:%.*]] = xor i32 [[TMP6]], [[MUL_512]] +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[MUL_714:%.*]] = xor i32 [[TMP7]], [[MUL_613]] +; CHECK-NEXT: ret i32 [[MUL_714]] +; +; SSE2-LABEL: @test_xor( +; SSE2-NEXT: entry: +; SSE2-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 +; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; SSE2-NEXT: [[MUL_18:%.*]] = xor i32 [[TMP1]], [[TMP0]] +; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; SSE2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; SSE2-NEXT: [[MUL_29:%.*]] = xor i32 [[TMP2]], [[MUL_18]] +; SSE2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; SSE2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; SSE2-NEXT: [[MUL_310:%.*]] = xor i32 [[TMP3]], [[MUL_29]] +; SSE2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; SSE2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; SSE2-NEXT: [[MUL_411:%.*]] = xor i32 [[TMP4]], [[MUL_310]] +; SSE2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; SSE2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; SSE2-NEXT: [[MUL_512:%.*]] = xor i32 [[TMP5]], [[MUL_411]] +; SSE2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; SSE2-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; SSE2-NEXT: [[MUL_613:%.*]] = xor i32 [[TMP6]], [[MUL_512]] +; SSE2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; SSE2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; SSE2-NEXT: [[MUL_714:%.*]] = xor i32 [[TMP7]], [[MUL_613]] +; SSE2-NEXT: ret i32 [[MUL_714]] +; +entry: + %0 = load i32, i32* %p, align 4 + %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 + %1 = load i32, i32* %arrayidx.1, align 4 + %mul.18 = xor i32 %1, %0 + %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2 + %2 = load i32, i32* %arrayidx.2, align 4 + %mul.29 = xor i32 %2, %mul.18 + %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3 + %3 = load i32, i32* %arrayidx.3, align 4 + %mul.310 = xor i32 %3, %mul.29 + %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4 + %4 = load i32, i32* %arrayidx.4, align 4 + %mul.411 = xor i32 %4, %mul.310 + %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5 + %5 = load i32, i32* %arrayidx.5, align 4 + %mul.512 = xor i32 %5, %mul.411 + %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6 + %6 = load i32, i32* %arrayidx.6, align 4 + %mul.613 = xor i32 %6, %mul.512 + %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7 + %7 = load i32, i32* %arrayidx.7, align 4 + %mul.714 = xor i32 %7, %mul.613 + ret i32 %mul.714 +} |