summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-02-20 21:35:05 +0000
committerCraig Topper <craig.topper@intel.com>2019-02-20 21:35:05 +0000
commit55cc7eb5cbfc3cb5faa0e288ebc6aefdb916dccb (patch)
tree5a483ed7331fd3a2e6cc3eab0ee9b45e9f9fed20
parent198cc305e985accb3ba74f64e38fd5b3146fe6f4 (diff)
downloadbcm5719-llvm-55cc7eb5cbfc3cb5faa0e288ebc6aefdb916dccb.tar.gz
bcm5719-llvm-55cc7eb5cbfc3cb5faa0e288ebc6aefdb916dccb.zip
[X86] Add test cases to show missed opportunities to remove AND mask from BTC/BTS/BTR instructions when LHS of AND has known zeros.
We can currently remove the mask if the immediate has all ones in the LSBs, but if the LHS of the AND is known zero, then the immediate might have had bits removed. A similar issue also occurs with shifts and rotates. I'm preparing a common fix for all of them. llvm-svn: 354520
-rw-r--r--llvm/test/CodeGen/X86/btc_bts_btr.ll187
1 files changed, 187 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll
index 951794c4fa7..7459e879e78 100644
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -949,3 +949,190 @@ define void @btc_64_dont_fold(i64* %x, i64 %n) {
store i64 %3, i64* %x
ret void
}
+
+define i32 @btr_32_mask_zeros(i32 %x, i32 %n) {
+; X64-LABEL: btr_32_mask_zeros:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shlb $2, %sil
+; X64-NEXT: andb $28, %sil
+; X64-NEXT: btrl %esi, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: btr_32_mask_zeros:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: shlb $2, %cl
+; X86-NEXT: andb $28, %cl
+; X86-NEXT: btrl %ecx, %eax
+; X86-NEXT: retl
+ %1 = shl i32 %n, 2
+ %2 = and i32 %1, 31
+ %3 = shl i32 1, %2
+ %4 = xor i32 %3, -1
+ %5 = and i32 %x, %4
+ ret i32 %5
+}
+
+define i32 @bts_32_mask_zeros(i32 %x, i32 %n) {
+; X64-LABEL: bts_32_mask_zeros:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shlb $2, %sil
+; X64-NEXT: andb $28, %sil
+; X64-NEXT: btsl %esi, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: bts_32_mask_zeros:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: shlb $2, %cl
+; X86-NEXT: andb $28, %cl
+; X86-NEXT: btsl %ecx, %eax
+; X86-NEXT: retl
+ %1 = shl i32 %n, 2
+ %2 = and i32 %1, 31
+ %3 = shl i32 1, %2
+ %4 = or i32 %x, %3
+ ret i32 %4
+}
+
+define i32 @btc_32_mask_zeros(i32 %x, i32 %n) {
+; X64-LABEL: btc_32_mask_zeros:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shlb $2, %sil
+; X64-NEXT: andb $28, %sil
+; X64-NEXT: btcl %esi, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: btc_32_mask_zeros:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: shlb $2, %cl
+; X86-NEXT: andb $28, %cl
+; X86-NEXT: btcl %ecx, %eax
+; X86-NEXT: retl
+ %1 = shl i32 %n, 2
+ %2 = and i32 %1, 31
+ %3 = shl i32 1, %2
+ %4 = xor i32 %x, %3
+ ret i32 %4
+}
+
+define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
+; X64-LABEL: btr_64_mask_zeros:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlb $2, %sil
+; X64-NEXT: andb $60, %sil
+; X64-NEXT: btrq %rsi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: btr_64_mask_zeros:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: shlb $2, %ch
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $60, %cl
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $28, %cl
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %ch
+; X86-NEXT: je .LBB39_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB39_2:
+; X86-NEXT: notl %edx
+; X86-NEXT: notl %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+ %1 = shl i64 %n, 2
+ %2 = and i64 %1, 63
+ %3 = shl i64 1, %2
+ %4 = xor i64 %3, -1
+ %5 = and i64 %x, %4
+ ret i64 %5
+}
+
+define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
+; X64-LABEL: bts_64_mask_zeros:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlb $2, %sil
+; X64-NEXT: andb $60, %sil
+; X64-NEXT: btsq %rsi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: bts_64_mask_zeros:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: shlb $2, %ch
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $60, %cl
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $28, %cl
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %ch
+; X86-NEXT: je .LBB40_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB40_2:
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+ %1 = shl i64 %n, 2
+ %2 = and i64 %1, 63
+ %3 = shl i64 1, %2
+ %4 = or i64 %x, %3
+ ret i64 %4
+}
+
+define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
+; X64-LABEL: btc_64_mask_zeros:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlb $2, %sil
+; X64-NEXT: andb $60, %sil
+; X64-NEXT: btcq %rsi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: btc_64_mask_zeros:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: shlb $2, %ch
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $60, %cl
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: andb $28, %cl
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %ch
+; X86-NEXT: je .LBB41_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB41_2:
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+ %1 = shl i64 %n, 2
+ %2 = and i64 %1, 63
+ %3 = shl i64 1, %2
+ %4 = xor i64 %x, %3
+ ret i64 %4
+}
OpenPOWER on IntegriCloud