diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/fshl.ll | 284 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fshr.ll | 286 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/funnel-shift.ll | 38 |
3 files changed, 353 insertions, 255 deletions
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index 1e2a2fc3627..b161763263a 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -58,20 +58,11 @@ define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X86-FAST-LABEL: var_shift_i16: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: andl $15, %ecx -; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: shldw %cl, %si, %dx -; X86-FAST-NEXT: testw %cx, %cx -; X86-FAST-NEXT: je .LBB1_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edx, %eax -; X86-FAST-NEXT: .LBB1_2: -; X86-FAST-NEXT: # kill: def $ax killed $ax killed $eax -; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-FAST-NEXT: andb $15, %cl +; X86-FAST-NEXT: shldw %cl, %dx, %ax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i16: @@ -79,17 +70,16 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: andl $15, %edx +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-SLOW-NEXT: andb $15, %dl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: movl $16, %ecx -; X86-SLOW-NEXT: subl %edx, %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: movb $16, %cl +; X86-SLOW-NEXT: subb %dl, %cl ; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testw %dx, %dx +; X86-SLOW-NEXT: testb %dl, %dl ; X86-SLOW-NEXT: je .LBB1_2 ; X86-SLOW-NEXT: # %bb.1: ; X86-SLOW-NEXT: orl %esi, %edi @@ -103,27 +93,25 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X64-FAST-LABEL: var_shift_i16: ; X64-FAST: # %bb.0: ; X64-FAST-NEXT: movl %edx, %ecx -; X64-FAST-NEXT: andl $15, %ecx ; X64-FAST-NEXT: movl %edi, %eax +; X64-FAST-NEXT: andb $15, %cl +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-FAST-NEXT: shldw %cl, %si, %ax -; X64-FAST-NEXT: testw %cx, %cx -; X64-FAST-NEXT: cmovel %edi, %eax ; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i16: ; X64-SLOW: # %bb.0: ; X64-SLOW-NEXT: movzwl %si, %eax -; X64-SLOW-NEXT: andl $15, %edx +; X64-SLOW-NEXT: andb $15, %dl ; X64-SLOW-NEXT: movl %edi, %esi ; X64-SLOW-NEXT: movl %edx, %ecx ; X64-SLOW-NEXT: shll %cl, %esi -; X64-SLOW-NEXT: movl $16, %ecx -; X64-SLOW-NEXT: subl %edx, %ecx -; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-SLOW-NEXT: movb $16, %cl +; X64-SLOW-NEXT: subb %dl, %cl ; X64-SLOW-NEXT: shrl %cl, %eax ; X64-SLOW-NEXT: orl %esi, %eax -; X64-SLOW-NEXT: testw %dx, %dx +; X64-SLOW-NEXT: testb %dl, %dl ; X64-SLOW-NEXT: cmovel %edi, %eax ; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax ; X64-SLOW-NEXT: retq @@ -134,19 +122,10 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-FAST-LABEL: var_shift_i32: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: andl $31, %ecx -; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: shldl %cl, %esi, %edx -; X86-FAST-NEXT: testl %ecx, %ecx -; X86-FAST-NEXT: je .LBB2_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edx, %eax -; X86-FAST-NEXT: .LBB2_2: -; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: shldl %cl, %edx, %eax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i32: @@ -154,17 +133,16 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: andl $31, %edx ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: andb $31, %dl ; X86-SLOW-NEXT: movl %edx, %ecx -; X86-SLOW-NEXT: negl %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: negb %cl ; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testl %edx, %edx +; X86-SLOW-NEXT: testb %dl, %dl ; X86-SLOW-NEXT: je .LBB2_2 ; X86-SLOW-NEXT: # %bb.1: ; X86-SLOW-NEXT: orl %esi, %edi @@ -177,26 +155,23 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X64-FAST-LABEL: var_shift_i32: ; X64-FAST: # %bb.0: ; X64-FAST-NEXT: movl %edx, %ecx -; X64-FAST-NEXT: andl $31, %ecx ; X64-FAST-NEXT: movl %edi, %eax +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-FAST-NEXT: shldl %cl, %esi, %eax -; X64-FAST-NEXT: testl %ecx, %ecx -; X64-FAST-NEXT: cmovel %edi, %eax ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i32: ; X64-SLOW: # %bb.0: ; X64-SLOW-NEXT: movl %esi, %eax -; X64-SLOW-NEXT: andl $31, %edx ; X64-SLOW-NEXT: movl %edi, %esi ; X64-SLOW-NEXT: movl %edx, %ecx ; X64-SLOW-NEXT: shll %cl, %esi +; X64-SLOW-NEXT: andb $31, %dl ; X64-SLOW-NEXT: movl %edx, %ecx -; X64-SLOW-NEXT: negl %ecx -; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-SLOW-NEXT: negb %cl ; X64-SLOW-NEXT: shrl %cl, %eax ; X64-SLOW-NEXT: orl %esi, %eax -; X64-SLOW-NEXT: testl %edx, %edx +; X64-SLOW-NEXT: testb %dl, %dl ; X64-SLOW-NEXT: cmovel %edi, %eax ; X64-SLOW-NEXT: retq %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) @@ -204,85 +179,166 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { } define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { -; X86-LABEL: var_shift_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: andl $63, %ebx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: shll %cl, %edi -; X86-NEXT: shldl %cl, %eax, %ebp -; X86-NEXT: testb $32, %bl -; X86-NEXT: je .LBB3_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: .LBB3_2: -; X86-NEXT: movb $64, %cl -; X86-NEXT: subb %bl, %cl -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB3_3 -; X86-NEXT: # %bb.4: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: jne .LBB3_6 -; X86-NEXT: jmp .LBB3_7 -; X86-NEXT: .LBB3_3: -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: je .LBB3_7 -; X86-NEXT: .LBB3_6: -; X86-NEXT: orl %esi, %ebp -; X86-NEXT: orl %ecx, %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: .LBB3_7: -; X86-NEXT: addl $4, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86-FAST-LABEL: var_shift_i64: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: pushl %ebp +; X86-FAST-NEXT: pushl %ebx +; X86-FAST-NEXT: pushl %edi +; X86-FAST-NEXT: pushl %esi +; X86-FAST-NEXT: pushl %eax +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-FAST-NEXT: andl $63, %ebx +; X86-FAST-NEXT: movl %eax, %edi +; X86-FAST-NEXT: movl %ebx, %ecx +; X86-FAST-NEXT: shll %cl, %edi +; X86-FAST-NEXT: shldl %cl, %eax, %ebp +; X86-FAST-NEXT: testb $32, %bl +; X86-FAST-NEXT: je .LBB3_2 +; X86-FAST-NEXT: # %bb.1: +; X86-FAST-NEXT: movl %edi, %ebp +; X86-FAST-NEXT: xorl %edi, %edi +; X86-FAST-NEXT: .LBB3_2: +; X86-FAST-NEXT: movb $64, %cl +; X86-FAST-NEXT: subb %bl, %cl +; X86-FAST-NEXT: movl %edx, %esi +; X86-FAST-NEXT: shrl %cl, %esi +; X86-FAST-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill +; X86-FAST-NEXT: testb $32, %cl +; X86-FAST-NEXT: jne .LBB3_3 +; X86-FAST-NEXT: # %bb.4: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-FAST-NEXT: testl %ebx, %ebx +; X86-FAST-NEXT: jne .LBB3_6 +; X86-FAST-NEXT: jmp .LBB3_7 +; X86-FAST-NEXT: .LBB3_3: +; X86-FAST-NEXT: movl %esi, %ecx +; X86-FAST-NEXT: xorl %esi, %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: testl %ebx, %ebx +; X86-FAST-NEXT: je .LBB3_7 +; X86-FAST-NEXT: .LBB3_6: +; X86-FAST-NEXT: orl %esi, %ebp +; X86-FAST-NEXT: orl %ecx, %edi +; X86-FAST-NEXT: movl %edi, %eax +; X86-FAST-NEXT: movl %ebp, %edx +; X86-FAST-NEXT: .LBB3_7: +; X86-FAST-NEXT: addl $4, %esp +; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: popl %edi +; X86-FAST-NEXT: popl %ebx +; X86-FAST-NEXT: popl %ebp +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: var_shift_i64: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: pushl %ebp +; X86-SLOW-NEXT: pushl %ebx +; X86-SLOW-NEXT: pushl %edi +; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: subl $8, %esp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: andl $63, %ebx +; X86-SLOW-NEXT: movb $64, %dh +; X86-SLOW-NEXT: subb %bl, %dh +; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movb %dh, %cl +; X86-SLOW-NEXT: shrl %cl, %eax +; X86-SLOW-NEXT: movb %dh, %dl +; X86-SLOW-NEXT: andb $31, %dl +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: negb %cl +; X86-SLOW-NEXT: movl %esi, %ebp +; X86-SLOW-NEXT: shll %cl, %ebp +; X86-SLOW-NEXT: testb %dl, %dl +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: je .LBB3_2 +; X86-SLOW-NEXT: # %bb.1: +; X86-SLOW-NEXT: orl %eax, %ebp +; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: .LBB3_2: +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SLOW-NEXT: movl %ebp, %eax +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %eax +; X86-SLOW-NEXT: movb %bl, %ch +; X86-SLOW-NEXT: andb $31, %ch +; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: negb %cl +; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: testb %ch, %ch +; X86-SLOW-NEXT: je .LBB3_4 +; X86-SLOW-NEXT: # %bb.3: +; X86-SLOW-NEXT: orl %edi, %eax +; X86-SLOW-NEXT: movl %eax, %ebp +; X86-SLOW-NEXT: .LBB3_4: +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl %eax, %edi +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: testb $32, %bl +; X86-SLOW-NEXT: je .LBB3_6 +; X86-SLOW-NEXT: # %bb.5: +; X86-SLOW-NEXT: movl %edi, %ebp +; X86-SLOW-NEXT: xorl %edi, %edi +; X86-SLOW-NEXT: .LBB3_6: +; X86-SLOW-NEXT: movb %dh, %cl +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: testb $32, %dh +; X86-SLOW-NEXT: jne .LBB3_7 +; X86-SLOW-NEXT: # %bb.8: +; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-SLOW-NEXT: testl %ebx, %ebx +; X86-SLOW-NEXT: jne .LBB3_10 +; X86-SLOW-NEXT: jmp .LBB3_11 +; X86-SLOW-NEXT: .LBB3_7: +; X86-SLOW-NEXT: movl %esi, %ecx +; X86-SLOW-NEXT: xorl %esi, %esi +; X86-SLOW-NEXT: testl %ebx, %ebx +; X86-SLOW-NEXT: je .LBB3_11 +; X86-SLOW-NEXT: .LBB3_10: +; X86-SLOW-NEXT: orl %esi, %ebp +; X86-SLOW-NEXT: orl %ecx, %edi +; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edi, %eax +; X86-SLOW-NEXT: .LBB3_11: +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SLOW-NEXT: addl $8, %esp +; X86-SLOW-NEXT: popl %esi +; X86-SLOW-NEXT: popl %edi +; X86-SLOW-NEXT: popl %ebx +; X86-SLOW-NEXT: popl %ebp +; X86-SLOW-NEXT: retl ; ; X64-FAST-LABEL: var_shift_i64: ; X64-FAST: # %bb.0: ; X64-FAST-NEXT: movq %rdx, %rcx -; X64-FAST-NEXT: andl $63, %ecx ; X64-FAST-NEXT: movq %rdi, %rax +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-FAST-NEXT: shldq %cl, %rsi, %rax -; X64-FAST-NEXT: testq %rcx, %rcx -; X64-FAST-NEXT: cmoveq %rdi, %rax ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i64: ; X64-SLOW: # %bb.0: ; X64-SLOW-NEXT: movq %rsi, %rax -; X64-SLOW-NEXT: andl $63, %edx ; X64-SLOW-NEXT: movq %rdi, %rsi ; X64-SLOW-NEXT: movl %edx, %ecx ; X64-SLOW-NEXT: shlq %cl, %rsi +; X64-SLOW-NEXT: andb $63, %dl ; X64-SLOW-NEXT: movl %edx, %ecx -; X64-SLOW-NEXT: negl %ecx -; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-SLOW-NEXT: negb %cl ; X64-SLOW-NEXT: shrq %cl, %rax ; X64-SLOW-NEXT: orq %rsi, %rax -; X64-SLOW-NEXT: testq %rdx, %rdx +; X64-SLOW-NEXT: testb %dl, %dl ; X64-SLOW-NEXT: cmoveq %rdi, %rax ; X64-SLOW-NEXT: retq %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index 965e83e16d0..10bf28d2abf 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -58,20 +58,11 @@ define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X86-FAST-LABEL: var_shift_i16: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: andl $15, %ecx -; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: shrdw %cl, %si, %dx -; X86-FAST-NEXT: testw %cx, %cx -; X86-FAST-NEXT: je .LBB1_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edx, %eax -; X86-FAST-NEXT: .LBB1_2: -; X86-FAST-NEXT: # kill: def $ax killed $ax killed $eax -; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-FAST-NEXT: andb $15, %cl +; X86-FAST-NEXT: shrdw %cl, %dx, %ax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i16: @@ -79,17 +70,16 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: andl $15, %edx +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-SLOW-NEXT: andb $15, %dl ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl $16, %ecx -; X86-SLOW-NEXT: subl %edx, %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: movb $16, %cl +; X86-SLOW-NEXT: subb %dl, %cl ; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: testw %dx, %dx +; X86-SLOW-NEXT: testb %dl, %dl ; X86-SLOW-NEXT: je .LBB1_2 ; X86-SLOW-NEXT: # %bb.1: ; X86-SLOW-NEXT: orl %edi, %esi @@ -103,26 +93,24 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X64-FAST-LABEL: var_shift_i16: ; X64-FAST: # %bb.0: ; X64-FAST-NEXT: movl %edx, %ecx -; X64-FAST-NEXT: andl $15, %ecx ; X64-FAST-NEXT: movl %esi, %eax +; X64-FAST-NEXT: andb $15, %cl +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-FAST-NEXT: shrdw %cl, %di, %ax -; X64-FAST-NEXT: testw %cx, %cx -; X64-FAST-NEXT: cmovel %esi, %eax ; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i16: ; X64-SLOW: # %bb.0: ; X64-SLOW-NEXT: movzwl %si, %eax -; X64-SLOW-NEXT: andl $15, %edx +; X64-SLOW-NEXT: andb $15, %dl ; X64-SLOW-NEXT: movl %edx, %ecx ; X64-SLOW-NEXT: shrl %cl, %eax -; X64-SLOW-NEXT: movl $16, %ecx -; X64-SLOW-NEXT: subl %edx, %ecx -; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-SLOW-NEXT: movb $16, %cl +; X64-SLOW-NEXT: subb %dl, %cl ; X64-SLOW-NEXT: shll %cl, %edi ; X64-SLOW-NEXT: orl %edi, %eax -; X64-SLOW-NEXT: testw %dx, %dx +; X64-SLOW-NEXT: testb %dl, %dl ; X64-SLOW-NEXT: cmovel %esi, %eax ; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax ; X64-SLOW-NEXT: retq @@ -133,19 +121,10 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-FAST-LABEL: var_shift_i32: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: andl $31, %ecx -; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: shrdl %cl, %esi, %edx -; X86-FAST-NEXT: testl %ecx, %ecx -; X86-FAST-NEXT: je .LBB2_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edx, %eax -; X86-FAST-NEXT: .LBB2_2: -; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: shrdl %cl, %edx, %eax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i32: @@ -153,17 +132,16 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: andl $31, %edx ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: andb $31, %dl ; X86-SLOW-NEXT: movl %edx, %ecx -; X86-SLOW-NEXT: negl %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: negb %cl ; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: testl %edx, %edx +; X86-SLOW-NEXT: testb %dl, %dl ; X86-SLOW-NEXT: je .LBB2_2 ; X86-SLOW-NEXT: # %bb.1: ; X86-SLOW-NEXT: orl %edi, %esi @@ -176,26 +154,23 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X64-FAST-LABEL: var_shift_i32: ; X64-FAST: # %bb.0: ; X64-FAST-NEXT: movl %edx, %ecx -; X64-FAST-NEXT: andl $31, %ecx ; X64-FAST-NEXT: movl %esi, %eax +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-FAST-NEXT: shrdl %cl, %edi, %eax -; X64-FAST-NEXT: testl %ecx, %ecx -; X64-FAST-NEXT: cmovel %esi, %eax ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i32: ; X64-SLOW: # %bb.0: ; X64-SLOW-NEXT: movl %edi, %eax -; X64-SLOW-NEXT: andl $31, %edx ; X64-SLOW-NEXT: movl %esi, %edi ; X64-SLOW-NEXT: movl %edx, %ecx ; X64-SLOW-NEXT: shrl %cl, %edi +; X64-SLOW-NEXT: andb $31, %dl ; X64-SLOW-NEXT: movl %edx, %ecx -; X64-SLOW-NEXT: negl %ecx -; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-SLOW-NEXT: negb %cl ; X64-SLOW-NEXT: shll %cl, %eax ; X64-SLOW-NEXT: orl %edi, %eax -; X64-SLOW-NEXT: testl %edx, %edx +; X64-SLOW-NEXT: testb %dl, %dl ; X64-SLOW-NEXT: cmovel %esi, %eax ; X64-SLOW-NEXT: retq %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) @@ -203,81 +178,164 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { } define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { -; X86-LABEL: var_shift_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: andl $63, %ebx -; X86-NEXT: movb $64, %cl -; X86-NEXT: subb %bl, %cl -; X86-NEXT: movl %eax, %edi -; X86-NEXT: shll %cl, %edi -; X86-NEXT: shldl %cl, %eax, %esi -; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB3_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edi, %esi -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: .LBB3_2: -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrdl %cl, %edx, %eax -; X86-NEXT: testb $32, %bl -; X86-NEXT: je .LBB3_4 -; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: .LBB3_4: -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: je .LBB3_6 -; X86-NEXT: # %bb.5: -; X86-NEXT: orl %ebp, %esi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %esi, %edx -; X86-NEXT: .LBB3_6: -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: addl $4, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86-FAST-LABEL: var_shift_i64: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: pushl %ebp +; X86-FAST-NEXT: pushl %ebx +; X86-FAST-NEXT: pushl %edi +; X86-FAST-NEXT: pushl %esi +; X86-FAST-NEXT: pushl %eax +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-FAST-NEXT: andl $63, %ebx +; X86-FAST-NEXT: movb $64, %cl +; X86-FAST-NEXT: subb %bl, %cl +; X86-FAST-NEXT: movl %eax, %edi +; X86-FAST-NEXT: shll %cl, %edi +; X86-FAST-NEXT: shldl %cl, %eax, %esi +; X86-FAST-NEXT: testb $32, %cl +; X86-FAST-NEXT: je .LBB3_2 +; X86-FAST-NEXT: # %bb.1: +; X86-FAST-NEXT: movl %edi, %esi +; X86-FAST-NEXT: xorl %edi, %edi +; X86-FAST-NEXT: .LBB3_2: +; X86-FAST-NEXT: movl %edx, %ebp +; X86-FAST-NEXT: movl %ebx, %ecx +; X86-FAST-NEXT: shrl %cl, %ebp +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: shrdl %cl, %edx, %eax +; X86-FAST-NEXT: testb $32, %bl +; X86-FAST-NEXT: je .LBB3_4 +; X86-FAST-NEXT: # %bb.3: +; X86-FAST-NEXT: movl %ebp, %eax +; X86-FAST-NEXT: xorl %ebp, %ebp +; X86-FAST-NEXT: .LBB3_4: +; X86-FAST-NEXT: testl %ebx, %ebx +; X86-FAST-NEXT: je .LBB3_6 +; X86-FAST-NEXT: # %bb.5: +; X86-FAST-NEXT: orl %ebp, %esi +; X86-FAST-NEXT: orl %eax, %edi +; X86-FAST-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl %esi, %edx +; X86-FAST-NEXT: .LBB3_6: +; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-FAST-NEXT: addl $4, %esp +; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: popl %edi +; X86-FAST-NEXT: popl %ebx +; X86-FAST-NEXT: popl %ebp +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: var_shift_i64: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: pushl %ebp +; X86-SLOW-NEXT: pushl %ebx +; X86-SLOW-NEXT: pushl %edi +; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: subl $8, %esp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: andl $63, %ebx +; X86-SLOW-NEXT: movb $64, %al +; X86-SLOW-NEXT: subb %bl, %al +; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: shll %cl, %edx +; X86-SLOW-NEXT: movb %al, %ch +; X86-SLOW-NEXT: andb $31, %ch +; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: negb %cl +; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: testb %ch, %ch +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SLOW-NEXT: je .LBB3_2 +; X86-SLOW-NEXT: # %bb.1: +; X86-SLOW-NEXT: orl %edi, %edx +; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: .LBB3_2: +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shrl %cl, %edx +; X86-SLOW-NEXT: movb %bl, %ah +; X86-SLOW-NEXT: andb $31, %ah +; X86-SLOW-NEXT: movb %ah, %cl +; X86-SLOW-NEXT: negb %cl +; X86-SLOW-NEXT: movl %ebp, %edi +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: testb %ah, %ah +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SLOW-NEXT: je .LBB3_4 +; X86-SLOW-NEXT: # %bb.3: +; X86-SLOW-NEXT: orl %edx, %edi +; X86-SLOW-NEXT: movl %edi, %ebp +; X86-SLOW-NEXT: .LBB3_4: +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: testb $32, %bl +; X86-SLOW-NEXT: je .LBB3_6 +; X86-SLOW-NEXT: # %bb.5: +; X86-SLOW-NEXT: movl %edi, %ebp +; X86-SLOW-NEXT: xorl %edi, %edi +; X86-SLOW-NEXT: .LBB3_6: +; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: shll %cl, %esi +; X86-SLOW-NEXT: testb $32, %al +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SLOW-NEXT: jne .LBB3_7 +; X86-SLOW-NEXT: # %bb.8: +; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SLOW-NEXT: testl %ebx, %ebx +; X86-SLOW-NEXT: jne .LBB3_10 +; X86-SLOW-NEXT: jmp .LBB3_11 +; X86-SLOW-NEXT: .LBB3_7: +; X86-SLOW-NEXT: movl %esi, %eax +; X86-SLOW-NEXT: xorl %esi, %esi +; X86-SLOW-NEXT: testl %ebx, %ebx +; X86-SLOW-NEXT: je .LBB3_11 +; X86-SLOW-NEXT: .LBB3_10: +; X86-SLOW-NEXT: orl %ebp, %esi +; X86-SLOW-NEXT: orl %edi, %eax +; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, %edx +; X86-SLOW-NEXT: .LBB3_11: +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SLOW-NEXT: addl $8, %esp +; X86-SLOW-NEXT: popl %esi +; X86-SLOW-NEXT: popl %edi +; X86-SLOW-NEXT: popl %ebx +; X86-SLOW-NEXT: popl %ebp +; X86-SLOW-NEXT: retl ; ; X64-FAST-LABEL: var_shift_i64: ; X64-FAST: # %bb.0: ; X64-FAST-NEXT: movq %rdx, %rcx -; X64-FAST-NEXT: andl $63, %ecx ; X64-FAST-NEXT: movq %rsi, %rax +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-FAST-NEXT: shrdq %cl, %rdi, %rax -; X64-FAST-NEXT: testq %rcx, %rcx -; X64-FAST-NEXT: cmoveq %rsi, %rax ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i64: ; X64-SLOW: # %bb.0: ; X64-SLOW-NEXT: movq %rdi, %rax -; X64-SLOW-NEXT: andl $63, %edx ; X64-SLOW-NEXT: movq %rsi, %rdi ; X64-SLOW-NEXT: movl %edx, %ecx ; X64-SLOW-NEXT: shrq %cl, %rdi +; X64-SLOW-NEXT: andb $63, %dl ; X64-SLOW-NEXT: movl %edx, %ecx -; X64-SLOW-NEXT: negl %ecx -; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-SLOW-NEXT: negb %cl ; X64-SLOW-NEXT: shlq %cl, %rax ; X64-SLOW-NEXT: orq %rdi, %rax -; X64-SLOW-NEXT: testq %rdx, %rdx +; X64-SLOW-NEXT: testb %dl, %dl ; X64-SLOW-NEXT: cmoveq %rsi, %rax ; X64-SLOW-NEXT: retq %tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) @@ -315,7 +373,7 @@ define i16 @const_shift_i16(i16 %x, i16 %y) nounwind { ; X86-FAST: # %bb.0: ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shldw $9, %cx, %ax +; X86-FAST-NEXT: shrdw $7, %cx, %ax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: const_shift_i16: @@ -330,8 +388,8 @@ define i16 @const_shift_i16(i16 %x, i16 %y) nounwind { ; ; X64-FAST-LABEL: const_shift_i16: ; X64-FAST: # %bb.0: -; X64-FAST-NEXT: movl %edi, %eax -; X64-FAST-NEXT: shldw $9, %si, %ax +; X64-FAST-NEXT: movl %esi, %eax +; X64-FAST-NEXT: shrdw $7, %di, %ax ; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax ; X64-FAST-NEXT: retq ; @@ -352,7 +410,7 @@ define i32 @const_shift_i32(i32 %x, i32 %y) nounwind { ; X86-FAST: # %bb.0: ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shldl $25, %ecx, %eax +; X86-FAST-NEXT: shrdl $7, %ecx, %eax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: const_shift_i32: diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index 65b6709db5f..0969d6d190c 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -14,31 +14,23 @@ declare i32 @llvm.fshr.i32(i32, i32, i32) declare i64 @llvm.fshr.i64(i64, i64, i64) declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -; General case - all operands can be variables - x86 has shld, but the mask and cmov are not needed? +; General case - all operands can be variables define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X32-SSE2-LABEL: fshl_i32: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %esi +; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: andl $31, %ecx -; X32-SSE2-NEXT: movl %esi, %eax +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE2-NEXT: shldl %cl, %edx, %eax -; X32-SSE2-NEXT: testl %ecx, %ecx -; X32-SSE2-NEXT: cmovel %esi, %eax -; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i32: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %edx, %ecx -; X64-AVX2-NEXT: andl $31, %ecx ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-AVX2-NEXT: shldl %cl, %esi, %eax -; X64-AVX2-NEXT: testl %ecx, %ecx -; X64-AVX2-NEXT: cmovel %edi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) ret i32 %f @@ -212,31 +204,23 @@ define i8 @fshl_i8_const_fold() nounwind { ; Repeat everything for funnel shift right. -; General case - all operands can be variables - x86 has 'shrd', but the mask and cmov are not needed? +; General case - all operands can be variables define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X32-SSE2-LABEL: fshr_i32: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %esi +; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: andl $31, %ecx -; X32-SSE2-NEXT: movl %esi, %eax +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE2-NEXT: shrdl %cl, %edx, %eax -; X32-SSE2-NEXT: testl %ecx, %ecx -; X32-SSE2-NEXT: cmovel %esi, %eax -; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %edx, %ecx -; X64-AVX2-NEXT: andl $31, %ecx ; X64-AVX2-NEXT: movl %esi, %eax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax -; X64-AVX2-NEXT: testl %ecx, %ecx -; X64-AVX2-NEXT: cmovel %esi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) ret i32 %f @@ -341,7 +325,7 @@ define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind { ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shldl $23, %ecx, %eax +; X32-SSE2-NEXT: shrdl $9, %ecx, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32_const_shift: @@ -353,14 +337,14 @@ define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind { ret i32 %f } -; Check modulo math on shift amount. 41-32=9, but right-shift became left, so 32-9=23. +; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23. define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind { ; X32-SSE2-LABEL: fshr_i32_const_overshift: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shldl $23, %ecx, %eax +; X32-SSE2-NEXT: shrdl $9, %ecx, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32_const_overshift: |