diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-05-17 07:28:51 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-05-17 07:28:51 +0000 |
commit | 1d6a495d6df51a89c8493d18e1bf7384df757528 (patch) | |
tree | 9b7118ab8778687a6924f58eac3c318ed70d480a | |
parent | 2fd16632bc25cc1c7246f4f6ce7cd45240c06ead (diff) | |
download | bcm5719-llvm-1d6a495d6df51a89c8493d18e1bf7384df757528.tar.gz bcm5719-llvm-1d6a495d6df51a89c8493d18e1bf7384df757528.zip |
AVX-512: fixed a bug in mask operations - (i1 1) pattern
Filling k-reg with all-ones value was wrong,
(i1 1) should switch on only one bit in mask register
llvm-svn: 237536
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 4 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-mask-bugfix.ll | 57 |
2 files changed, 59 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ef7abd77e14..0470a5a7e39 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2156,8 +2156,8 @@ let Predicates = [HasAVX512] in { def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>; - def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>; - def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>; + def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; + def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; } def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; diff --git a/llvm/test/CodeGen/X86/avx512-mask-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-bugfix.ll new file mode 100755 index 00000000000..1940680f1c1 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-mask-bugfix.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +; ModuleID = 'foo.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind readnone +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) #0 + +; Function Attrs: nounwind readnone +declare i64 @llvm.cttz.i64(i64, i1) #0 + +; Function Attrs: nounwind +define void @foo(float* noalias %aFOO, float %b, i32 %a) { +allocas: + %full_mask_memory.i57 = alloca <8 x float> + %return_value_memory.i60 = alloca i1 + %cmp.i = icmp eq i32 %a, 65535 + br i1 %cmp.i, label %all_on, label %some_on + +all_on: + %mask0 = load <8 x float>, <8 x float>* %full_mask_memory.i57 + %v0.i.i.i70 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %mask0) #0 + %allon.i.i76 = icmp eq i32 %v0.i.i.i70, 65535 + br i1 %allon.i.i76, label %check_neighbors.i.i121, label %domixed.i.i100 + +domixed.i.i100: + br label %check_neighbors.i.i121 + +check_neighbors.i.i121: + %v1.i5.i.i116 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %mask0) #0 + %alleq.i.i120 = icmp eq i32 %v1.i5.i.i116, 65535 + br i1 %alleq.i.i120, label %all_equal.i.i123, label %not_all_equal.i.i124 + +; CHECK: kxnorw %k0, %k0, %k0 +; CHECK: kshiftrw $15, %k0, %k0 +; CHECK: jmp +; CHECK: kxorw %k0, %k0, %k0 + +all_equal.i.i123: + br label %reduce_equal___vyi.exit128 + +not_all_equal.i.i124: + br label %reduce_equal___vyi.exit128 + +reduce_equal___vyi.exit128: + %calltmp2.i125 = phi i1 [ true, %all_equal.i.i123 ], [ false, %not_all_equal.i.i124 ] + store i1 %calltmp2.i125, i1* %return_value_memory.i60 + %return_value.i126 = load i1, i1* %return_value_memory.i60 + %. = select i1 %return_value.i126, i32 1, i32 0 + %select_to_float = sitofp i32 %. to float + ret void + +some_on: + ret void +} + |