diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-10-21 11:50:54 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-10-21 11:50:54 +0000 |
| commit | 3ad76a1acdfa499f3ace3c087d0453dc0ae96064 (patch) | |
| tree | c122f2d1a832959e97cf23a4423f4b97693504c7 /llvm/test/CodeGen/X86/masked_memop.ll | |
| parent | bd3c08e30119fe795a1b671b72fa2138bf645ebd (diff) | |
| download | bcm5719-llvm-3ad76a1acdfa499f3ace3c087d0453dc0ae96064.tar.gz bcm5719-llvm-3ad76a1acdfa499f3ace3c087d0453dc0ae96064.zip | |
Masked Load/Store optimization for scalar code
When we have to convert the masked.load, masked.store to scalar code, we generate a chain of conditional basic blocks.
I added optimization for constant mask vector.
Differential Revision: http://reviews.llvm.org/D13855
llvm-svn: 250893
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_memop.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index cb1d29210ef..84ebdf1ef18 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -235,12 +235,49 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { ; AVX2-LABEL: test18 ; AVX2: vmaskmovps ; AVX2-NOT: blend +; AVX2: ret define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) { %mask = icmp eq <2 x i32> %trigger, zeroinitializer %res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef) ret <2 x float> %res } +; AVX_SCALAR-LABEL: test19 +; AVX_SCALAR: load <4 x float>, <4 x float>* %addr, align 4 + +define <4 x float> @test19(<4 x i32> %trigger, <4 x float>* %addr) { + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef) + ret <4 x float> %res +} + +; AVX_SCALAR-LABEL: test20 +; AVX_SCALAR: load float, {{.*}}, align 4 +; AVX_SCALAR: insertelement <4 x float> undef, float +; AVX_SCALAR: select <4 x i1> <i1 true, i1 false, i1 true, i1 true> + +define <4 x float> @test20(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %src0) { + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 16, <4 x i1><i1 true, i1 false, i1 true, i1 true>, <4 x float> %src0) + ret <4 x float> %res +} + +; AVX_SCALAR-LABEL: test21 +; AVX_SCALAR: store <4 x i32> %val +define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>) + ret void +} + +; AVX_SCALAR-LABEL: test22 +; AVX_SCALAR: extractelement <4 x i32> %val, i32 0 +; AVX_SCALAR: store i32 +define void @test22(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>) + ret void +} declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) |

