summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/stack-folding-int-sse42.ll')
-rw-r--r--llvm/test/CodeGen/X86/stack-folding-int-sse42.ll116
1 files changed, 63 insertions, 53 deletions
diff --git a/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll b/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll
index 7f307681178..cabfa92e330 100644
--- a/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -151,28 +151,31 @@ define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pabsb
;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
- ret <16 x i8> %2
+ %2 = icmp sgt <16 x i8> %a0, zeroinitializer
+ %3 = sub <16 x i8> zeroinitializer, %a0
+ %4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3
+ ret <16 x i8> %4
}
-declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pabsd
;CHECK: pabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
- ret <4 x i32> %2
+ %2 = icmp sgt <4 x i32> %a0, zeroinitializer
+ %3 = sub <4 x i32> zeroinitializer, %a0
+ %4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3
+ ret <4 x i32> %4
}
-declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pabsw
;CHECK: pabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
- ret <8 x i16> %2
+ %2 = icmp sgt <8 x i16> %a0, zeroinitializer
+ %3 = sub <8 x i16> zeroinitializer, %a0
+ %4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3
+ ret <8 x i16> %4
}
-declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_packssdw
@@ -346,10 +349,9 @@ define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_pblendw
;CHECK: pblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %2
}
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
;CHECK-LABEL: stack_fold_pclmulqdq
@@ -622,109 +624,109 @@ define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
;CHECK-LABEL: stack_fold_pmaxsb
;CHECK: pmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
- ret <16 x i8> %2
+ %2 = icmp sgt <16 x i8> %a0, %a1
+ %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
+ ret <16 x i8> %3
}
-declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmaxsd
;CHECK: pmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
- ret <4 x i32> %2
+ %2 = icmp sgt <4 x i32> %a0, %a1
+ %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_pmaxsw
;CHECK: pmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
- ret <8 x i16> %2
+ %2 = icmp sgt <8 x i16> %a0, %a1
+ %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
+ ret <8 x i16> %3
}
-declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
;CHECK-LABEL: stack_fold_pmaxub
;CHECK: pmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
- ret <16 x i8> %2
+ %2 = icmp ugt <16 x i8> %a0, %a1
+ %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
+ ret <16 x i8> %3
}
-declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmaxud
;CHECK: pmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
- ret <4 x i32> %2
+ %2 = icmp ugt <4 x i32> %a0, %a1
+ %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_pmaxuw
;CHECK: pmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
- ret <8 x i16> %2
+ %2 = icmp ugt <8 x i16> %a0, %a1
+ %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
+ ret <8 x i16> %3
}
-declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
;CHECK-LABEL: stack_fold_pminsb
;CHECK: pminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
- ret <16 x i8> %2
+ %2 = icmp slt <16 x i8> %a0, %a1
+ %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
+ ret <16 x i8> %3
}
-declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pminsd
;CHECK: pminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
- ret <4 x i32> %2
+ %2 = icmp slt <4 x i32> %a0, %a1
+ %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_pminsw
;CHECK: pminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
- ret <8 x i16> %2
+ %2 = icmp slt <8 x i16> %a0, %a1
+ %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
+ ret <8 x i16> %3
}
-declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
;CHECK-LABEL: stack_fold_pminub
;CHECK: pminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
- ret <16 x i8> %2
+ %2 = icmp ult <16 x i8> %a0, %a1
+ %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
+ ret <16 x i8> %3
}
-declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pminud
;CHECK: pminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
- ret <4 x i32> %2
+ %2 = icmp ult <4 x i32> %a0, %a1
+ %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_pminuw
;CHECK: pminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
- ret <8 x i16> %2
+ %2 = icmp ult <8 x i16> %a0, %a1
+ %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
+ ret <8 x i16> %3
}
-declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbd
@@ -838,10 +840,15 @@ define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmuldq
;CHECK: pmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
- ret <2 x i64> %2
+ %2 = bitcast <4 x i32> %a0 to <2 x i64>
+ %3 = bitcast <4 x i32> %a1 to <2 x i64>
+ %4 = shl <2 x i64> %2, <i64 32, i64 32>
+ %5 = ashr <2 x i64> %4, <i64 32, i64 32>
+ %6 = shl <2 x i64> %3, <i64 32, i64 32>
+ %7 = ashr <2 x i64> %6, <i64 32, i64 32>
+ %8 = mul <2 x i64> %5, %7
+ ret <2 x i64> %8
}
-declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
;CHECK-LABEL: stack_fold_pmulhrsw
@@ -890,10 +897,13 @@ define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmuludq
;CHECK: pmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
- ret <2 x i64> %2
+ %2 = bitcast <4 x i32> %a0 to <2 x i64>
+ %3 = bitcast <4 x i32> %a1 to <2 x i64>
+ %4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295>
+ %5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295>
+ %6 = mul <2 x i64> %4, %5
+ ret <2 x i64> %6
}
-declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
;CHECK-LABEL: stack_fold_por
OpenPOWER on IntegriCloud