diff options
author | Dehao Chen <dehao@google.com> | 2017-04-19 19:50:34 +0000 |
---|---|---|
committer | Dehao Chen <dehao@google.com> | 2017-04-19 19:50:34 +0000 |
commit | 58601674d2cae4a0c7b05997dc7da34b4f293a4f (patch) | |
tree | bfa00502482748ad6419ebcc924f7185072d2b54 /llvm/test | |
parent | 021a218dd23e6eefee7d4aefbfe511522f9a620f (diff) | |
download | bcm5719-llvm-58601674d2cae4a0c7b05997dc7da34b4f293a4f.tar.gz bcm5719-llvm-58601674d2cae4a0c7b05997dc7da34b4f293a4f.zip |
PR32710: Disable using PMADDWD for unsigned short.
Summary: PMADDWD can only handle signed short.
Reviewers: mkuper, wmi
Reviewed By: mkuper
Subscribers: andreadb, llvm-commits
Differential Revision: https://reviews.llvm.org/D32236
llvm-svn: 300737
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/madd.ll | 60 |
1 files changed, 55 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index fdc5ace8d9b..abc37450e2a 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -3,23 +3,26 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 -;SSE2-label: @_Z10test_shortPsS_i +;SSE2-LABEL: @_Z10test_shortPsS_i ;SSE2: movdqu ;SSE2-NEXT: movdqu ;SSE2-NEXT: pmaddwd ;SSE2-NEXT: paddd +;SSE2: ret -;AVX2-label: @_Z10test_shortPsS_i +;AVX2-LABEL: @_Z10test_shortPsS_i ;AVX2: vmovdqu ;AVX2-NEXT: vpmaddwd ;AVX2-NEXT: vinserti128 ;AVX2-NEXT: vpaddd +;AVX2: ret -;AVX512-label: @_Z10test_shortPsS_i +;AVX512-LABEL: @_Z10test_shortPsS_i ;AVX512: vmovdqu ;AVX512-NEXT: vpmaddwd ;AVX512-NEXT: vinserti128 ;AVX512-NEXT: vpaddd +;AVX512: ret define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 { entry: @@ -54,18 +57,65 @@ middle.block: ret i32 %13 } -;AVX2-label: @_Z9test_charPcS_i +;SSE2-LABEL: @test_unsigned_short +;SSE2-NOT: pmaddwd +;SSE2: ret + +;AVX2-LABEL: @test_unsigned_short +;AVX2-NOT: vpmaddwd +;AVX2: ret + +;AVX512-LABEL: @test_unsigned_short +;AVX512-NOT: vpmaddwd +;AVX512: ret + +define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 { +entry: + %3 = zext i32 %2 to i64 + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %vec.phi = phi <8 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ] + %4 = getelementptr inbounds i16, i16* %0, i64 %index + %5 = bitcast i16* %4 to <8 x i16>* + %wide.load = load <8 x i16>, <8 x i16>* %5, align 2 + %6 = zext <8 x i16> %wide.load to <8 x i32> + %7 = getelementptr inbounds i16, i16* %1, i64 %index + %8 = bitcast i16* %7 to <8 x i16>* + %wide.load14 = load <8 x i16>, <8 x i16>* %8, align 2 + %9 = zext <8 x i16> %wide.load14 to <8 x i32> + %10 = mul nsw <8 x i32> %9, %6 + %11 = add nsw <8 x i32> %10, %vec.phi + %index.next = add i64 %index, 8 + %12 = icmp eq i64 %index.next, %3 + br i1 %12, label %middle.block, label %vector.body + +middle.block: + %rdx.shuf = shufflevector <8 x i32> %11, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> + %bin.rdx = add <8 x i32> %11, %rdx.shuf + %rdx.shuf15 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %bin.rdx16 = add <8 x i32> %bin.rdx, %rdx.shuf15 + %rdx.shuf17 = shufflevector <8 x i32> %bin.rdx16, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %bin.rdx18 = add <8 x i32> %bin.rdx16, %rdx.shuf17 + %13 = extractelement <8 x i32> %bin.rdx18, i32 0 + ret i32 %13 +} + +;AVX2-LABEL: @_Z9test_charPcS_i ;AVX2: vpmovsxbw ;AVX2-NEXT: vpmovsxbw ;AVX2-NEXT: vpmaddwd ;AVX2-NEXT: vpaddd +;AVX2: ret -;AVX512-label: @_Z9test_charPcS_i +;AVX512-LABEL: @_Z9test_charPcS_i ;AVX512: vpmovsxbw ;AVX512-NEXT: vpmovsxbw ;AVX512-NEXT: vpmaddwd ;AVX512-NEXT: vinserti64x4 ;AVX512-NEXT: vpaddd +;AVX512: ret define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i32) local_unnamed_addr #0 { entry: |