diff options
| -rw-r--r-- | llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll | 247 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll | 118 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/urem-seteq-nonzero.ll | 434 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll | 434 |
4 files changed, 1233 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll new file mode 100644 index 00000000000..6fe10ce5d5b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @t32_3_1(i32 %X) nounwind { +; CHECK-LABEL: t32_3_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #33 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 3 + %cmp = icmp eq i32 %urem, 1 + ret i1 %cmp +} + +define i1 @t32_3_2(i32 %X) nounwind { +; CHECK-LABEL: t32_3_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #33 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 3 + %cmp = icmp eq i32 %urem, 2 + ret i1 %cmp +} + + +define i1 @t32_5_1(i32 %X) nounwind { +; CHECK-LABEL: t32_5_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: add w8, w8, w8, lsl #2 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 1 + ret i1 %cmp +} + +define i1 @t32_5_2(i32 %X) nounwind { +; CHECK-LABEL: t32_5_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: add w8, w8, w8, lsl #2 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 2 + ret i1 %cmp +} + +define i1 @t32_5_3(i32 %X) nounwind { +; CHECK-LABEL: t32_5_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: add w8, w8, w8, lsl #2 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: cmp w8, #3 // =3 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 3 + ret i1 %cmp +} + +define i1 @t32_5_4(i32 %X) nounwind { +; CHECK-LABEL: t32_5_4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: add w8, w8, w8, lsl #2 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: cmp w8, #4 // =4 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 4 + ret i1 %cmp +} + + +define i1 @t32_6_1(i32 %X) nounwind { +; CHECK-LABEL: t32_6_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: msub w8, w8, w9, w0 +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 1 + ret i1 %cmp +} + +define i1 @t32_6_2(i32 %X) nounwind { +; CHECK-LABEL: t32_6_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: msub w8, w8, w9, w0 +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 2 + ret i1 %cmp +} + +define i1 @t32_6_3(i32 %X) nounwind { +; CHECK-LABEL: t32_6_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: msub w8, w8, w9, w0 +; CHECK-NEXT: cmp w8, #3 // =3 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 3 + ret i1 %cmp +} + +define i1 @t32_6_4(i32 %X) nounwind { +; CHECK-LABEL: t32_6_4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: msub w8, w8, w9, w0 +; CHECK-NEXT: cmp w8, #4 // =4 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 4 + ret i1 %cmp +} + +define i1 @t32_6_5(i32 %X) nounwind { +; CHECK-LABEL: t32_6_5: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #34 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: msub w8, w8, w9, w0 +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 5 + ret i1 %cmp +} + +;------------------------------------------------------------------------------- +; Other widths. + +define i1 @t16_3_2(i16 %X) nounwind { +; CHECK-LABEL: t16_3_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: umull x9, w8, w9 +; CHECK-NEXT: lsr x9, x9, #33 +; CHECK-NEXT: add w9, w9, w9, lsl #1 +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i16 %X, 3 + %cmp = icmp eq i16 %urem, 2 + ret i1 %cmp +} + +define i1 @t8_3_2(i8 %X) nounwind { +; CHECK-LABEL: t8_3_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: umull x9, w8, w9 +; CHECK-NEXT: lsr x9, x9, #33 +; CHECK-NEXT: add w9, w9, w9, lsl #1 +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i8 %X, 3 + %cmp = icmp eq i8 %urem, 2 + ret i1 %cmp +} + +define i1 @t64_3_2(i64 %X) nounwind { +; CHECK-LABEL: t64_3_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: movk x8, #43691 +; CHECK-NEXT: umulh x8, x0, x8 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: add x8, x8, x8, lsl #1 +; CHECK-NEXT: sub x8, x0, x8 +; CHECK-NEXT: cmp x8, #2 // =2 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %urem = urem i64 %X, 3 + %cmp = icmp eq i64 %urem, 2 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll new file mode 100644 index 00000000000..f45b5598eae --- /dev/null +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define <4 x i1> @t32_3(<4 x i32> %X) nounwind { +; CHECK-LABEL: t32_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: adrp x9, .LCPI0_0 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_0] +; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ushr v1.4s, v1.4s, #1 +; CHECK-NEXT: movi v3.4s, #3 +; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret + %urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3> + %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_5(<4 x i32> %X) nounwind { +; CHECK-LABEL: t32_5: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: adrp x9, .LCPI1_0 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_0] +; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ushr v1.4s, v1.4s, #2 +; CHECK-NEXT: movi v3.4s, #5 +; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret + %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5> + %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind { +; CHECK-LABEL: t32_6_part0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: adrp x9, .LCPI2_0 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_0] +; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ushr v1.4s, v1.4s, #2 +; CHECK-NEXT: movi v3.4s, #6 +; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret + %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6> + %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind { +; CHECK-LABEL: t32_6_part1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: adrp x9, .LCPI3_0 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_0] +; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ushr v1.4s, v1.4s, #2 +; CHECK-NEXT: movi v3.4s, #6 +; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret + %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6> + %cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind { +; CHECK-LABEL: t32_tautological: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: adrp x8, .LCPI4_3 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: adrp x8, .LCPI4_4 +; CHECK-NEXT: umull2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v5.4s +; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI4_4] +; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s +; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b +; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v5.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret + %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3> + %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2> + ret <4 x i1> %cmp +} diff --git a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll new file mode 100644 index 00000000000..f8a7d7ba519 --- /dev/null +++ b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll @@ -0,0 +1,434 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64 + +define i1 @t32_3_1(i32 %X) nounwind { +; X86-LABEL: t32_3_1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $1, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_3_1: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $33, %rcx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 3 + %cmp = icmp eq i32 %urem, 1 + ret i1 %cmp +} + +define i1 @t32_3_2(i32 %X) nounwind { +; X86-LABEL: t32_3_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $2, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_3_2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $33, %rcx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $2, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 3 + %cmp = icmp eq i32 %urem, 2 + ret i1 %cmp +} + + +define i1 @t32_5_1(i32 %X) nounwind { +; X86-LABEL: t32_5_1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl $2, %edx +; X86-NEXT: leal (%edx,%edx,4), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $1, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_5_1: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: leal (%rcx,%rcx,4), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 1 + ret i1 %cmp +} + +define i1 @t32_5_2(i32 %X) nounwind { +; X86-LABEL: t32_5_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl $2, %edx +; X86-NEXT: leal (%edx,%edx,4), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $2, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_5_2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: leal (%rcx,%rcx,4), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $2, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 2 + ret i1 %cmp +} + +define i1 @t32_5_3(i32 %X) nounwind { +; X86-LABEL: t32_5_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl $2, %edx +; X86-NEXT: leal (%edx,%edx,4), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $3, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_5_3: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: leal (%rcx,%rcx,4), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $3, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 3 + ret i1 %cmp +} + +define i1 @t32_5_4(i32 %X) nounwind { +; X86-LABEL: t32_5_4: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl $2, %edx +; X86-NEXT: leal (%edx,%edx,4), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $4, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_5_4: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: leal (%rcx,%rcx,4), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $4, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 5 + %cmp = icmp eq i32 %urem, 4 + ret i1 %cmp +} + + +define i1 @t32_6_1(i32 %X) nounwind { +; X86-LABEL: t32_6_1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: andl $-2, %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $1, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_6_1: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 1 + ret i1 %cmp +} + +define i1 @t32_6_2(i32 %X) nounwind { +; X86-LABEL: t32_6_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: andl $-2, %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $2, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_6_2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $2, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 2 + ret i1 %cmp +} + +define i1 @t32_6_3(i32 %X) nounwind { +; X86-LABEL: t32_6_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: andl $-2, %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $3, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_6_3: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $3, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 3 + ret i1 %cmp +} + +define i1 @t32_6_4(i32 %X) nounwind { +; X86-LABEL: t32_6_4: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: andl $-2, %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $4, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_6_4: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $4, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 4 + ret i1 %cmp +} + +define i1 @t32_6_5(i32 %X) nounwind { +; X86-LABEL: t32_6_5: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: shrl %edx +; X86-NEXT: andl $-2, %edx +; X86-NEXT: leal (%edx,%edx,2), %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: cmpl $5, %ecx +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t32_6_5: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: shrq $34, %rcx +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpl $5, %edi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i32 %X, 6 + %cmp = icmp eq i32 %urem, 5 + ret i1 %cmp +} + +;------------------------------------------------------------------------------- +; Other widths. + +define i1 @t16_3_2(i16 %X) nounwind { +; X86-LABEL: t16_3_2: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull $43691, %eax, %ecx # imm = 0xAAAB +; X86-NEXT: shrl $17, %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmpw $2, %ax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t16_3_2: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: imull $43691, %eax, %eax # imm = 0xAAAB +; X64-NEXT: shrl $17, %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: subl %eax, %edi +; X64-NEXT: cmpw $2, %di +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i16 %X, 3 + %cmp = icmp eq i16 %urem, 2 + ret i1 %cmp +} + +define i1 @t8_3_2(i8 %X) nounwind { +; X86-LABEL: t8_3_2: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull $171, %eax, %ecx +; X86-NEXT: shrl $9, %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: subb %cl, %al +; X86-NEXT: cmpb $2, %al +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: t8_3_2: +; X64: # %bb.0: +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: imull $171, %eax, %ecx +; X64-NEXT: shrl $9, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-NEXT: subb %cl, %al +; X64-NEXT: cmpb $2, %al +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i8 %X, 3 + %cmp = icmp eq i8 %urem, 2 + ret i1 %cmp +} + +define i1 @t64_3_2(i64 %X) nounwind { +; X86-LABEL: t64_3_2: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $3 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __umoddi3 +; X86-NEXT: addl $16, %esp +; X86-NEXT: xorl $2, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: sete %al +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: t64_3_2: +; X64: # %bb.0: +; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq %rdx +; X64-NEXT: leaq (%rdx,%rdx,2), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: cmpq $2, %rdi +; X64-NEXT: sete %al +; X64-NEXT: retq + %urem = urem i64 %X, 3 + %cmp = icmp eq i64 %urem, 2 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll new file mode 100644 index 00000000000..b580e39c728 --- /dev/null +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll @@ -0,0 +1,434 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE41 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL + +define <4 x i1> @t32_3(<4 x i32> %X) nounwind { +; CHECK-SSE2-LABEL: t32_3: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: psrld $1, %xmm2 +; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 +; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1 +; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1 +; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: retq +; +; CHECK-SSE41-LABEL: t32_3: +; CHECK-SSE41: # %bb.0: +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 +; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-SSE41-NEXT: psrld $1, %xmm2 +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: retq +; +; CHECK-AVX1-LABEL: t32_3: +; CHECK-AVX1: # %bb.0: +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: retq +; +; CHECK-AVX2-LABEL: t32_3: +; CHECK-AVX2: # %bb.0: +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3] +; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512VL-LABEL: t32_3: +; CHECK-AVX512VL: # %bb.0: +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: retq + %urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3> + %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_5(<4 x i32> %X) nounwind { +; CHECK-SSE2-LABEL: t32_5: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: psrld $2, %xmm2 +; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 +; CHECK-SSE2-NEXT: pslld $2, %xmm1 +; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1 +; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: retq +; +; CHECK-SSE41-LABEL: t32_5: +; CHECK-SSE41: # %bb.0: +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] +; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 +; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-SSE41-NEXT: psrld $2, %xmm2 +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: retq +; +; CHECK-AVX1-LABEL: t32_5: +; CHECK-AVX1: # %bb.0: +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: retq +; +; CHECK-AVX2-LABEL: t32_5: +; CHECK-AVX2: # %bb.0: +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5] +; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512VL-LABEL: t32_5: +; CHECK-AVX512VL: # %bb.0: +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: retq + %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5> + %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind { +; CHECK-SSE2-LABEL: t32_6_part0: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: psrld $2, %xmm2 +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: retq +; +; CHECK-SSE41-LABEL: t32_6_part0: +; CHECK-SSE41: # %bb.0: +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 +; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-SSE41-NEXT: psrld $2, %xmm2 +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: retq +; +; CHECK-AVX1-LABEL: t32_6_part0: +; CHECK-AVX1: # %bb.0: +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: retq +; +; CHECK-AVX2-LABEL: t32_6_part0: +; CHECK-AVX2: # %bb.0: +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] +; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512VL-LABEL: t32_6_part0: +; CHECK-AVX512VL: # %bb.0: +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: retq + %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6> + %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind { +; CHECK-SSE2-LABEL: t32_6_part1: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: psrld $2, %xmm2 +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: retq +; +; CHECK-SSE41-LABEL: t32_6_part1: +; CHECK-SSE41: # %bb.0: +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 +; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-SSE41-NEXT: psrld $2, %xmm2 +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: retq +; +; CHECK-AVX1-LABEL: t32_6_part1: +; CHECK-AVX1: # %bb.0: +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: retq +; +; CHECK-AVX2-LABEL: t32_6_part1: +; CHECK-AVX2: # %bb.0: +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6] +; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512VL-LABEL: t32_6_part1: +; CHECK-AVX512VL: # %bb.0: +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531] +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: retq + %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6> + %cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0> + ret <4 x i1> %cmp +} + +define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind { +; CHECK-SSE2-LABEL: t32_tautological: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531] +; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,2,2] +; CHECK-SSE2-NEXT: psrld $1, %xmm2 +; CHECK-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,2,3] +; CHECK-SSE2-NEXT: movapd %xmm1, %xmm3 +; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: retq +; +; CHECK-SSE41-LABEL: t32_tautological: +; CHECK-SSE41: # %bb.0: +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531] +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 +; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 +; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] +; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 +; CHECK-SSE41-NEXT: psrld $1, %xmm2 +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7] +; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 +; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: retq +; +; CHECK-AVX1-LABEL: t32_tautological: +; CHECK-AVX1: # %bb.0: +; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531] +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 +; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm2 +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] +; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: retq +; +; CHECK-AVX2-LABEL: t32_tautological: +; CHECK-AVX2: # %bb.0: +; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531] +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 +; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3] +; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512VL-LABEL: t32_tautological: +; CHECK-AVX512VL: # %bb.0: +; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531] +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 +; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3] +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: retq + %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3> + %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2> + ret <4 x i1> %cmp +} |

