diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-02-25 07:05:12 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-02-25 07:05:12 +0000 |
commit | e5bbca6ae2946c47c407f27b02aab5b5cfb0ecd7 (patch) | |
tree | 66fadcce139df6398ff351898915e687cf62bdb4 /llvm/test/CodeGen | |
parent | 26e077178de8278f120810970370938f77f91fd4 (diff) | |
download | bcm5719-llvm-e5bbca6ae2946c47c407f27b02aab5b5cfb0ecd7.tar.gz bcm5719-llvm-e5bbca6ae2946c47c407f27b02aab5b5cfb0ecd7.zip |
Optimized loading (zextload) of i1 value from memory.
This patch is a partial revert of https://llvm.org/svn/llvm-project/llvm/trunk@237793.
Extra "and" causes performance degradation.
We assume that i1 is stored in zero-extended form. And store operation is responsible for zeroing upper bits.
Differential Revision: http://reviews.llvm.org/D17541
llvm-svn: 261828
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/and-encoding.ll | 27 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-insert-extract.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 23 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 2 |
5 files changed, 36 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/X86/and-encoding.ll b/llvm/test/CodeGen/X86/and-encoding.ll index f7bbac2a4bd..1a90bd0d6eb 100644 --- a/llvm/test/CodeGen/X86/and-encoding.ll +++ b/llvm/test/CodeGen/X86/and-encoding.ll @@ -15,27 +15,18 @@ define void @f1() { ret void } -define void @f2(i1 *%x, i16 *%y) { +define void @f2(i16 %x, i1 *%y) { ; CHECK-LABEL: f2: -; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01] - %a = load i1, i1* %x - %b = zext i1 %a to i16 - store i16 %b, i16* %y +; CHECK: andl $1, %edi # encoding: [0x83,0xe7,0x01] + %c = trunc i16 %x to i1 + store i1 %c, i1* %y ret void } -define i32 @f3(i1 *%x) { +define void @f3(i32 %x, i1 *%y) { ; CHECK-LABEL: f3: -; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01] - %a = load i1, i1* %x - %b = zext i1 %a to i32 - ret i32 %b -} - -define i64 @f4(i1 *%x) { -; CHECK-LABEL: f4: -; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01] - %a = load i1, i1* %x - %b = zext i1 %a to i64 - ret i64 %b +; CHECK: andl $1, %edi # encoding: [0x83,0xe7,0x01] + %c = trunc i32 %x to i1 + store i1 %c, i1* %y + ret void } diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 41ec62c7e04..ff98524c39a 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -190,7 +190,6 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ;CHECK-LABEL: test15 ;CHECK: movb (%rdi), %al -;CHECK: andb $1, %al ;CHECK: movw $-1, %ax ;CHECK: cmovew define i16 @test15(i1 *%addr) { @@ -202,7 +201,6 @@ define i16 @test15(i1 *%addr) { ;CHECK-LABEL: test16 ;CHECK: movb (%rdi), %al -;CHECK: andw $1, %ax ;CHECK: kmovw ;CHECK: kshiftlw $10 ;CHECK: korw diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 66307cd9f2c..10a314d5fce 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1586,7 +1586,6 @@ define void @f1(i32 %c) { ; KNL-LABEL: f1: ; KNL: ## BB#0: ## %entry ; KNL-NEXT: movzbl {{.*}}(%rip), %edi -; KNL-NEXT: andl $1, %edi ; KNL-NEXT: movl %edi, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k0 @@ -1601,7 +1600,6 @@ define void @f1(i32 %c) { ; SKX-LABEL: f1: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: movzbl {{.*}}(%rip), %edi -; SKX-NEXT: andl $1, %edi ; SKX-NEXT: movl %edi, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 @@ -1622,3 +1620,24 @@ entry: declare void @f2(i32) #1 +define void @store_i16_i1(i16 %x, i1 *%y) { +; CHECK-LABEL: store_i16_i1: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: retq + %c = trunc i16 %x to i1 + store i1 %c, i1* %y + ret void +} + +define void @store_i8_i1(i8 %x, i1 *%y) { +; CHECK-LABEL: store_i8_i1: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: retq + %c = trunc i8 %x to i1 + store i1 %c, i1* %y + ret void +} diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index a05d805a6ab..45b44ed7048 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1466,7 +1466,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 ; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SKX-NEXT: # implicit-def: %XMM0 -; SKX-NEXT: andb $1, %al +; SKX-NEXT: testb %al, %al ; SKX-NEXT: je .LBB29_2 ; SKX-NEXT: # BB#1: # %cond.load ; SKX-NEXT: vmovq %xmm1, %rax @@ -1474,7 +1474,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX-NEXT: .LBB29_2: # %else ; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp) ; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SKX-NEXT: andb $1, %al +; SKX-NEXT: testb %al, %al ; SKX-NEXT: je .LBB29_4 ; SKX-NEXT: # BB#3: # %cond.load1 ; SKX-NEXT: vpextrq $1, %xmm1, %rax @@ -1482,7 +1482,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX-NEXT: .LBB29_4: # %else2 ; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp) ; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SKX-NEXT: andb $1, %al +; SKX-NEXT: testb %al, %al ; SKX-NEXT: je .LBB29_6 ; SKX-NEXT: # BB#5: # %cond.load4 ; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1 @@ -1505,7 +1505,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: # implicit-def: %XMM1 -; SKX_32-NEXT: andb $1, %al +; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_2 ; SKX_32-NEXT: # BB#1: # %cond.load ; SKX_32-NEXT: vmovd %xmm2, %eax @@ -1513,7 +1513,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX_32-NEXT: .LBB29_2: # %else ; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: andb $1, %al +; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_4 ; SKX_32-NEXT: # BB#3: # %cond.load1 ; SKX_32-NEXT: vpextrd $1, %xmm2, %eax @@ -1522,7 +1522,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm0 ; SKX_32-NEXT: kmovb %k1, (%esp) ; SKX_32-NEXT: movb (%esp), %al -; SKX_32-NEXT: andb $1, %al +; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_6 ; SKX_32-NEXT: # BB#5: # %cond.load4 ; SKX_32-NEXT: vpextrd $2, %xmm2, %eax diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 609e2cc1158..5b6e773fe5d 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -811,8 +811,6 @@ end: ; ; Load the value of b. ; CHECK: movb _b(%rip), [[BOOL:%cl]] -; Extract i1 from the loaded value. -; CHECK-NEXT: andb $1, [[BOOL]] ; Create the zero value for the select assignment. ; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] ; CHECK-NEXT: testb [[BOOL]], [[BOOL]] |