diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/memcmp.ll | 3319 |
1 files changed, 3235 insertions, 84 deletions
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 97116d991c1..1493879649e 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -1,17 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,SSE,X86-SSE1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512BW +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 declare i32 @memcmp(i8*, i8*, i64) @@ -189,7 +193,7 @@ define i1 @length2_eq_const(i8* %X) nounwind { ; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 ; X64-NEXT: setne %al ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } @@ -431,7 +435,7 @@ define i1 @length4_eq_const(i8* %X) nounwind { ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 ; X64-NEXT: sete %al ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 4) nounwind %c = icmp eq i32 %m, 0 ret i1 %c } @@ -679,7 +683,7 @@ define i1 @length8_eq_const(i8* %X) nounwind { ; X64-NEXT: cmpq %rax, (%rdi) ; X64-NEXT: setne %al ; X64-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 8) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } @@ -990,6 +994,17 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind { ; X86-SSE2-NEXT: setne %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length16_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu (%eax), %xmm1 +; X86-SSE41-NEXT: pxor %xmm0, %xmm1 +; X86-SSE41-NEXT: ptest %xmm1, %xmm1 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length16_eq: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1000,6 +1015,15 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind { ; X64-SSE2-NEXT: setne %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length16_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 +; X64-SSE41-NEXT: pxor %xmm0, %xmm1 +; X64-SSE41-NEXT: ptest %xmm1, %xmm1 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length16_eq: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1007,19 +1031,97 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind { ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: setne %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length16_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp ne i32 %call, 0 ret i1 %cmp } +define i1 @length16_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length16_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $16 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length16_lt: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jne .LBB33_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB33_3 +; X64-NEXT: .LBB33_2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: .LBB33_3: # %endblock +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length16_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $16 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length16_gt: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB34_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rcx +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB34_3 +; X64-NEXT: .LBB34_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: .LBB34_3: # %endblock +; X64-NEXT: testl %edx, %edx +; X64-NEXT: setg %al +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + define i1 @length16_eq_const(i8* %X) nounwind { ; X86-NOSSE-LABEL: length16_eq_const: ; X86-NOSSE: # %bb.0: @@ -1055,6 +1157,15 @@ define i1 @length16_eq_const(i8* %X) nounwind { ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length16_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length16_eq_const: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1064,6 +1175,14 @@ define i1 @length16_eq_const(i8* %X) nounwind { ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length16_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length16_eq_const: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1071,15 +1190,7 @@ define i1 @length16_eq_const(i8* %X) nounwind { ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length16_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 16) nounwind %c = icmp eq i32 %m, 0 ret i1 %c } @@ -1146,6 +1257,21 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind { ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length24_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length24_eq: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1160,6 +1286,19 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind { ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length24_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 +; X64-SSE41-NEXT: pxor %xmm0, %xmm1 +; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: por %xmm1, %xmm2 +; X64-SSE41-NEXT: ptest %xmm2, %xmm2 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length24_eq: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1171,23 +1310,65 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind { ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length24_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-AVX512-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp } +define i1 @length24_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length24_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length24_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length24_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length24_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + define i1 @length24_eq_const(i8* %X) nounwind { ; X86-NOSSE-LABEL: length24_eq_const: ; X86-NOSSE: # %bb.0: @@ -1226,6 +1407,18 @@ define i1 @length24_eq_const(i8* %X) nounwind { ; X86-SSE2-NEXT: setne %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length24_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: por %xmm1, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length24_eq_const: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1238,6 +1431,17 @@ define i1 @length24_eq_const(i8* %X) nounwind { ; X64-SSE2-NEXT: setne %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length24_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: por %xmm1, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length24_eq_const: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1248,18 +1452,364 @@ define i1 @length24_eq_const(i8* %X) nounwind { ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: setne %al ; X64-AVX-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length31: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $31 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl ; -; X64-AVX512-LABEL: length24_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 -; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind +; X64-LABEL: length31: +; X64: # %bb.0: +; X64-NEXT: movl $31, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(i8* %x, i8* %y) nounwind { +; X86-NOSSE-LABEL: length31_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length31_eq: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $31 +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: sete %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length31_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu (%eax), %xmm2 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl +; +; X86-SSE41-LABEL: length31_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length31_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-SSE41-LABEL: length31_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; +; X64-AVX-LABEL: length31_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length31_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $31 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length31_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $31, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length31_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $31 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length31_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $31, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length31_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length31_eq_prefer128: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $31 +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: sete %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length31_eq_prefer128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 +; X86-SSE2-NEXT: movdqu (%eax), %xmm2 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl +; +; X86-SSE41-LABEL: length31_eq_prefer128: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length31_eq_prefer128: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-SSE41-LABEL: length31_eq_prefer128: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; +; X64-AVX-LABEL: length31_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(i8* %X) nounwind { +; X86-NOSSE-LABEL: length31_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE1-LABEL: length31_eq_const: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $31 +; X86-SSE1-NEXT: pushl $.L.str +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: setne %al +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: length31_eq_const: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movdqu (%eax), %xmm0 +; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pand %xmm1, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: setne %al +; X86-SSE2-NEXT: retl +; +; X86-SSE41-LABEL: length31_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: por %xmm1, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; +; X64-SSE2-LABEL: length31_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 +; X64-SSE2-NEXT: pand %xmm1, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: retq +; +; X64-SSE41-LABEL: length31_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: por %xmm1, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; +; X64-AVX-LABEL: length31_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 31) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } @@ -1326,6 +1876,21 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind { ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length32_eq: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length32_eq: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1340,6 +1905,19 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind { ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length32_eq: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; ; X64-AVX1-LABEL: length32_eq: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0 @@ -1373,6 +1951,60 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind { ret i1 %cmp } +define i1 @length32_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length32_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $32 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length32_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $32, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length32_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $32 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length32_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $32, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { ; X86-NOSSE-LABEL: length32_eq_prefer128: ; X86-NOSSE: # %bb.0: @@ -1414,6 +2046,21 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"= ; X86-SSE2-NEXT: sete %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length32_eq_prefer128: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 +; X86-SSE41-NEXT: movdqu (%eax), %xmm2 +; X86-SSE41-NEXT: pxor %xmm0, %xmm2 +; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 +; X86-SSE41-NEXT: pxor %xmm1, %xmm0 +; X86-SSE41-NEXT: por %xmm2, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: sete %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length32_eq_prefer128: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1428,6 +2075,19 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"= ; X64-SSE2-NEXT: sete %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length32_eq_prefer128: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE41-NEXT: pxor %xmm0, %xmm2 +; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE41-NEXT: pxor %xmm1, %xmm0 +; X64-SSE41-NEXT: por %xmm2, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: sete %al +; X64-SSE41-NEXT: retq +; ; X64-AVX-LABEL: length32_eq_prefer128: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 @@ -1438,17 +2098,6 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"= ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: length32_eq_prefer128: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX512-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 -; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vptest %xmm0, %xmm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp @@ -1492,6 +2141,18 @@ define i1 @length32_eq_const(i8* %X) nounwind { ; X86-SSE2-NEXT: setne %al ; X86-SSE2-NEXT: retl ; +; X86-SSE41-LABEL: length32_eq_const: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movdqu (%eax), %xmm0 +; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1 +; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0 +; X86-SSE41-NEXT: por %xmm1, %xmm0 +; X86-SSE41-NEXT: ptest %xmm0, %xmm0 +; X86-SSE41-NEXT: setne %al +; X86-SSE41-NEXT: retl +; ; X64-SSE2-LABEL: length32_eq_const: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1504,6 +2165,17 @@ define i1 @length32_eq_const(i8* %X) nounwind { ; X64-SSE2-NEXT: setne %al ; X64-SSE2-NEXT: retq ; +; X64-SSE41-LABEL: length32_eq_const: +; X64-SSE41: # %bb.0: +; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1 +; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE41-NEXT: por %xmm1, %xmm0 +; X64-SSE41-NEXT: ptest %xmm0, %xmm0 +; X64-SSE41-NEXT: setne %al +; X64-SSE41-NEXT: retq +; ; X64-AVX1-LABEL: length32_eq_const: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0 @@ -1532,11 +2204,712 @@ define i1 @length32_eq_const(i8* %X) nounwind { ; X64-AVX512-NEXT: setne %al ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 32) nounwind %c = icmp ne i32 %m, 0 ret i1 %c } +define i32 @length48(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length48: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length48: +; X64: # %bb.0: +; X64-NEXT: movl $48, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length48_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length48_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $48, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length48_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $48, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length48_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: movq 32(%rdi), %rcx +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $8, %edx +; X64-AVX2-NEXT: vmovd %ecx, %xmm0 +; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $16, %edx +; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $24, %edx +; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $32, %rdx +; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1 +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: movq 40(%rdi), %rcx +; X64-AVX2-NEXT: shrq $48, %rdx +; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $56, %rdx +; X64-AVX2-NEXT: shrq $56, %rax +; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $8, %eax +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $16, %eax +; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $24, %eax +; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $32, %rax +; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $48, %rax +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq 32(%rsi), %rcx +; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $8, %edx +; X64-AVX2-NEXT: vmovd %ecx, %xmm2 +; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $16, %edx +; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %edx +; X64-AVX2-NEXT: shrl $24, %edx +; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $32, %rdx +; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq 40(%rsi), %rcx +; X64-AVX2-NEXT: shrq $48, %rdx +; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rdx +; X64-AVX2-NEXT: shrq $56, %rax +; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $8, %eax +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $16, %eax +; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movl %ecx, %eax +; X64-AVX2-NEXT: shrl $24, %eax +; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $32, %rax +; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: movq %rcx, %rax +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: shrq $48, %rax +; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: shrq $56, %rdx +; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length48_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movq 32(%rdi), %rcx +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $8, %edx +; X64-AVX512-NEXT: vmovd %ecx, %xmm0 +; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $16, %edx +; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $24, %edx +; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $32, %rdx +; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1 +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: movq 40(%rdi), %rcx +; X64-AVX512-NEXT: shrq $48, %rdx +; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $56, %rdx +; X64-AVX512-NEXT: shrq $56, %rax +; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $8, %eax +; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $16, %eax +; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $24, %eax +; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $32, %rax +; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $48, %rax +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq 32(%rsi), %rcx +; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $8, %edx +; X64-AVX512-NEXT: vmovd %ecx, %xmm2 +; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $16, %edx +; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %edx +; X64-AVX512-NEXT: shrl $24, %edx +; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $32, %rdx +; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq 40(%rsi), %rcx +; X64-AVX512-NEXT: shrq $48, %rdx +; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rdx +; X64-AVX512-NEXT: shrq $56, %rax +; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $8, %eax +; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $16, %eax +; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movl %ecx, %eax +; X64-AVX512-NEXT: shrl $24, %eax +; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $32, %rax +; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: movq %rcx, %rax +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX512-NEXT: shrq $48, %rax +; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; X64-AVX512-NEXT: shrq $56, %rdx +; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length48_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length48_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $48, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length48_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length48_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $48, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" { +; X86-LABEL: length48_eq_prefer128: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length48_eq_prefer128: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $48, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(i8* %X) nounwind { +; X86-LABEL: length48_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $48 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length48_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $48, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length48_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $48, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length48_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rbp +; X64-AVX2-NEXT: pushq %r15 +; X64-AVX2-NEXT: pushq %r14 +; X64-AVX2-NEXT: pushq %r12 +; X64-AVX2-NEXT: pushq %rbx +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: movq 40(%rdi), %rcx +; X64-AVX2-NEXT: movq %rcx, %r8 +; X64-AVX2-NEXT: shrq $56, %r8 +; X64-AVX2-NEXT: movq %rcx, %r9 +; X64-AVX2-NEXT: shrq $48, %r9 +; X64-AVX2-NEXT: movq %rcx, %r10 +; X64-AVX2-NEXT: shrq $32, %r10 +; X64-AVX2-NEXT: movl %ecx, %r11d +; X64-AVX2-NEXT: shrl $24, %r11d +; X64-AVX2-NEXT: movl %ecx, %r14d +; X64-AVX2-NEXT: shrl $16, %r14d +; X64-AVX2-NEXT: movl %ecx, %r15d +; X64-AVX2-NEXT: shrl $8, %r15d +; X64-AVX2-NEXT: movq 32(%rdi), %rdi +; X64-AVX2-NEXT: movq %rdi, %r12 +; X64-AVX2-NEXT: shrq $56, %r12 +; X64-AVX2-NEXT: movq %rdi, %rbx +; X64-AVX2-NEXT: shrq $48, %rbx +; X64-AVX2-NEXT: movq %rdi, %rdx +; X64-AVX2-NEXT: shrq $32, %rdx +; X64-AVX2-NEXT: movl %edi, %ebp +; X64-AVX2-NEXT: shrl $24, %ebp +; X64-AVX2-NEXT: movl %edi, %esi +; X64-AVX2-NEXT: shrl $16, %esi +; X64-AVX2-NEXT: vmovd %edi, %xmm1 +; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shrl $8, %eax +; X64-AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX2-NEXT: shrq $40, %rdi +; X64-AVX2-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1 +; X64-AVX2-NEXT: shrq $40, %rcx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rbx +; X64-AVX2-NEXT: popq %r12 +; X64-AVX2-NEXT: popq %r14 +; X64-AVX2-NEXT: popq %r15 +; X64-AVX2-NEXT: popq %rbp +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length48_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: pushq %rbp +; X64-AVX512-NEXT: pushq %r15 +; X64-AVX512-NEXT: pushq %r14 +; X64-AVX512-NEXT: pushq %r12 +; X64-AVX512-NEXT: pushq %rbx +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: movq 40(%rdi), %rcx +; X64-AVX512-NEXT: movq %rcx, %r8 +; X64-AVX512-NEXT: shrq $56, %r8 +; X64-AVX512-NEXT: movq %rcx, %r9 +; X64-AVX512-NEXT: shrq $48, %r9 +; X64-AVX512-NEXT: movq %rcx, %r10 +; X64-AVX512-NEXT: shrq $32, %r10 +; X64-AVX512-NEXT: movl %ecx, %r11d +; X64-AVX512-NEXT: shrl $24, %r11d +; X64-AVX512-NEXT: movl %ecx, %r14d +; X64-AVX512-NEXT: shrl $16, %r14d +; X64-AVX512-NEXT: movl %ecx, %r15d +; X64-AVX512-NEXT: shrl $8, %r15d +; X64-AVX512-NEXT: movq 32(%rdi), %rdi +; X64-AVX512-NEXT: movq %rdi, %r12 +; X64-AVX512-NEXT: shrq $56, %r12 +; X64-AVX512-NEXT: movq %rdi, %rbx +; X64-AVX512-NEXT: shrq $48, %rbx +; X64-AVX512-NEXT: movq %rdi, %rdx +; X64-AVX512-NEXT: shrq $32, %rdx +; X64-AVX512-NEXT: movl %edi, %ebp +; X64-AVX512-NEXT: shrl $24, %ebp +; X64-AVX512-NEXT: movl %edi, %esi +; X64-AVX512-NEXT: shrl $16, %esi +; X64-AVX512-NEXT: vmovd %edi, %xmm1 +; X64-AVX512-NEXT: movl %edi, %eax +; X64-AVX512-NEXT: shrl $8, %eax +; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX512-NEXT: shrq $40, %rdi +; X64-AVX512-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1 +; X64-AVX512-NEXT: shrq $40, %rcx +; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: popq %rbx +; X64-AVX512-NEXT: popq %r12 +; X64-AVX512-NEXT: popq %r14 +; X64-AVX512-NEXT: popq %r15 +; X64-AVX512-NEXT: popq %rbp +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length63: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length63: +; X64: # %bb.0: +; X64-NEXT: movl $63, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length63_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length63_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $63, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length63_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $63, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length63_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length63_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length63_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length63_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $63, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length63_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length63_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $63, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(i8* %X) nounwind { +; X86-LABEL: length63_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $63 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length63_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $63, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length63_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $63, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length63_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vptest %ymm0, %ymm0 +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: length63_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + define i32 @length64(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length64: ; X86: # %bb.0: @@ -1569,15 +2942,15 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind { ; X86-NEXT: setne %al ; X86-NEXT: retl ; -; X64-SSE2-LABEL: length64_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq +; X64-SSE-LABEL: length64_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $64, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: length64_eq: ; X64-AVX1: # %bb.0: @@ -1623,6 +2996,60 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind { ret i1 %cmp } +define i1 @length64_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length64_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length64_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $64, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length64_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $64 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length64_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $64, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + define i1 @length64_eq_const(i8* %X) nounwind { ; X86-LABEL: length64_eq_const: ; X86: # %bb.0: @@ -1636,16 +3063,16 @@ define i1 @length64_eq_const(i8* %X) nounwind { ; X86-NEXT: sete %al ; X86-NEXT: retl ; -; X64-SSE2-LABEL: length64_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $.L.str, %esi -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq +; X64-SSE-LABEL: length64_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $64, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: length64_eq_const: ; X64-AVX1: # %bb.0: @@ -1687,7 +3114,1731 @@ define i1 @length64_eq_const(i8* %X) nounwind { ; X64-AVX512BW-NEXT: setb %al ; X64-AVX512BW-NEXT: vzeroupper ; X64-AVX512BW-NEXT: retq - %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length96: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length96: +; X64: # %bb.0: +; X64-NEXT: movl $96, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length96_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length96_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length96_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $96, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length96_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $96, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length96_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: movq 80(%rdi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm0 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X64-AVX512F-NEXT: movq 88(%rdi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X64-AVX512F-NEXT: movq 64(%rdi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm2 +; X64-AVX512F-NEXT: movq 72(%rdi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: movq 80(%rsi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm3 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 +; X64-AVX512F-NEXT: movq 88(%rsi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3 +; X64-AVX512F-NEXT: movq 64(%rsi), %rax +; X64-AVX512F-NEXT: vmovd %eax, %xmm4 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm4, %xmm4 +; X64-AVX512F-NEXT: movq 72(%rsi), %rax +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm4, %xmm4 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm1 +; X64-AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm2, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length96_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: movq 80(%rdi), %rcx +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm0 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 88(%rdi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 64(%rdi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm1 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm2 +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm1 +; X64-AVX512BW-NEXT: movq 72(%rdi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq 80(%rsi), %rcx +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm3 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq 88(%rsi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq 64(%rsi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %ecx, %xmm4 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq 72(%rsi), %rcx +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rax +; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $8, %eax +; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $16, %eax +; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movl %ecx, %eax +; X64-AVX512BW-NEXT: shrl $24, %eax +; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $32, %rax +; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: movq %rcx, %rax +; X64-AVX512BW-NEXT: shrq $40, %rcx +; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: shrq $48, %rax +; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm4, %xmm4 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm2 +; X64-AVX512BW-NEXT: vpcmpeqb %zmm2, %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length96_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length96_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $96, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length96_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length96_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $96, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(i8* %X) nounwind { +; X86-LABEL: length96_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $96 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length96_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length96_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $96, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length96_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $.L.str, %esi +; X64-AVX2-NEXT: movl $96, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length96_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: movq 72(%rdi), %rax +; X64-AVX512F-NEXT: movq 64(%rdi), %rcx +; X64-AVX512F-NEXT: vmovd %ecx, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rcx +; X64-AVX512F-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: shrq $32, %rax +; X64-AVX512F-NEXT: movq 88(%rdi), %rcx +; X64-AVX512F-NEXT: movq 80(%rdi), %rdx +; X64-AVX512F-NEXT: vmovd %edx, %xmm2 +; X64-AVX512F-NEXT: shrq $32, %rdx +; X64-AVX512F-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2 +; X64-AVX512F-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 +; X64-AVX512F-NEXT: shrq $32, %rcx +; X64-AVX512F-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2 +; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length96_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: movq 80(%rdi), %rax +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: vmovd %eax, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 88(%rdi), %rax +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rcx +; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $8, %ecx +; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $16, %ecx +; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $24, %ecx +; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $32, %rcx +; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq 64(%rdi), %rax +; X64-AVX512BW-NEXT: shrq $48, %rcx +; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $8, %edx +; X64-AVX512BW-NEXT: vmovd %eax, %xmm1 +; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $16, %edx +; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %edx +; X64-AVX512BW-NEXT: shrl $24, %edx +; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $32, %rdx +; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm2 +; X64-AVX512BW-NEXT: movq 72(%rdi), %rax +; X64-AVX512BW-NEXT: shrq $48, %rdx +; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rdx +; X64-AVX512BW-NEXT: shrq $56, %rcx +; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $8, %ecx +; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $16, %ecx +; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movl %eax, %ecx +; X64-AVX512BW-NEXT: shrl $24, %ecx +; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $32, %rcx +; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: movq %rax, %rcx +; X64-AVX512BW-NEXT: shrq $40, %rax +; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: shrq $48, %rcx +; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: shrq $56, %rdx +; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm2, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length127: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length127: +; X64: # %bb.0: +; X64-NEXT: movl $127, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length127_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length127_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length127_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $127, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length127_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $127, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length127_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd 63(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length127_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb 63(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length127_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length127_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $127, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length127_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length127_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $127, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(i8* %X) nounwind { +; X86-LABEL: length127_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $127 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length127_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length127_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $127, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length127_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $.L.str, %esi +; X64-AVX2-NEXT: movl $127, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length127_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length127_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length128: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length128: +; X64: # %bb.0: +; X64-NEXT: movl $128, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length128_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length128_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length128_eq: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $128, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length128_eq: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $128, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length128_eq: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setae %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length128_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length128_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length128_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $128, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length128_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length128_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $128, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(i8* %X) nounwind { +; X86-LABEL: length128_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $128 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-SSE-LABEL: length128_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: length128_eq_const: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: movl $.L.str, %esi +; X64-AVX1-NEXT: movl $128, %edx +; X64-AVX1-NEXT: callq memcmp +; X64-AVX1-NEXT: testl %eax, %eax +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: popq %rcx +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: length128_eq_const: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: pushq %rax +; X64-AVX2-NEXT: movl $.L.str, %esi +; X64-AVX2-NEXT: movl $128, %edx +; X64-AVX2-NEXT: callq memcmp +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: popq %rcx +; X64-AVX2-NEXT: retq +; +; X64-AVX512F-LABEL: length128_eq_const: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512F-NEXT: vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512F-NEXT: kortestw %k0, %k0 +; X64-AVX512F-NEXT: setb %al +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length128_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k1 +; X64-AVX512BW-NEXT: vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1} +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length192: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length192: +; X64: # %bb.0: +; X64-NEXT: movl $192, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length192_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length192_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length192_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length192_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length192_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length192_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(i8* %X) nounwind { +; X86-LABEL: length192_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $192 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length192_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $192, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length255: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length255: +; X64: # %bb.0: +; X64-NEXT: movl $255, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length255_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length255_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length255_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length255_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length255_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length255_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(i8* %X) nounwind { +; X86-LABEL: length255_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $255 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length255_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $255, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length256: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length256: +; X64: # %bb.0: +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length256_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length256_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length256_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length256_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length256_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length256_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(i8* %X) nounwind { +; X86-LABEL: length256_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length256_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $256, %edx # imm = 0x100 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length384: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length384: +; X64: # %bb.0: +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length384_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length384_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length384_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length384_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length384_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length384_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(i8* %X) nounwind { +; X86-LABEL: length384_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $384 # imm = 0x180 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length384_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $384, %edx # imm = 0x180 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length511: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length511: +; X64: # %bb.0: +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length511_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length511_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length511_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length511_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length511_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length511_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(i8* %X) nounwind { +; X86-LABEL: length511_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $511 # imm = 0x1FF +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length511_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $511, %edx # imm = 0x1FF +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length512: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length512: +; X64: # %bb.0: +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length512_eq: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length512_eq: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length512_lt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: shrl $31, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; +; X64-LABEL: length512_lt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(i8* %x, i8* %y) nounwind { +; X86-LABEL: length512_gt: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length512_gt: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(i8* %X) nounwind { +; X86-LABEL: length512_eq_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length512_eq_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $512, %edx # imm = 0x200 +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 512) nounwind %c = icmp eq i32 %m, 0 ret i1 %c } |

