diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-08-24 06:14:18 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-08-24 06:14:18 +0000 |
commit | de92622aa55d19d0e00b5b1ea61328105e7573b1 (patch) | |
tree | 3f06c72ec928a0dbe2d3dd07e9bc56357aa2c1d7 /llvm/test/CodeGen/X86/avx-arith.ll | |
parent | 7a53498f20f0695bfb1e4fea8c139b02e1a30746 (diff) | |
download | bcm5719-llvm-de92622aa55d19d0e00b5b1ea61328105e7573b1.tar.gz bcm5719-llvm-de92622aa55d19d0e00b5b1ea61328105e7573b1.zip |
Break 256-bit vector int add/sub/mul into two 128-bit operations to avoid costly scalarization. Fixes PR10711.
llvm-svn: 138427
Diffstat (limited to 'llvm/test/CodeGen/X86/avx-arith.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/avx-arith.ll | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx-arith.ll b/llvm/test/CodeGen/X86/avx-arith.ll index 553e8acda97..59988ca8b68 100644 --- a/llvm/test/CodeGen/X86/avx-arith.ll +++ b/llvm/test/CodeGen/X86/avx-arith.ll @@ -131,3 +131,131 @@ entry: } declare float @sqrtf(float) readnone + + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %x = add <4 x i64> %i, %j + ret <4 x i64> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpaddd %xmm +; CHECK-NEXT: vpaddd %xmm +; CHECK-NEXT: vinsertf128 $1 +define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %x = add <8 x i32> %i, %j + ret <8 x i32> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpaddw %xmm +; CHECK-NEXT: vpaddw %xmm +; CHECK-NEXT: vinsertf128 $1 +define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %x = add <16 x i16> %i, %j + ret <16 x i16> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpaddb %xmm +; CHECK-NEXT: vpaddb %xmm +; CHECK-NEXT: vinsertf128 $1 +define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %x = add <32 x i8> %i, %j + ret <32 x i8> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpsubq %xmm +; CHECK-NEXT: vpsubq %xmm +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %x = sub <4 x i64> %i, %j + ret <4 x i64> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpsubd %xmm +; CHECK-NEXT: vpsubd %xmm +; CHECK-NEXT: vinsertf128 $1 +define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %x = sub <8 x i32> %i, %j + ret <8 x i32> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpsubw %xmm +; CHECK-NEXT: vpsubw %xmm +; CHECK-NEXT: vinsertf128 $1 +define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %x = sub <16 x i16> %i, %j + ret <16 x i16> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpsubb %xmm +; CHECK-NEXT: vpsubb %xmm +; CHECK-NEXT: vinsertf128 $1 +define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %x = sub <32 x i8> %i, %j + ret <32 x i8> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpmulld %xmm +; CHECK-NEXT: vpmulld %xmm +; CHECK-NEXT: vinsertf128 $1 +define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %x = mul <8 x i32> %i, %j + ret <8 x i32> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpmullw %xmm +; CHECK-NEXT: vpmullw %xmm +; CHECK-NEXT: vinsertf128 $1 +define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %x = mul <16 x i16> %i, %j + ret <16 x i16> %x +} + +; CHECK: vextractf128 $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsrlq $32, %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsllq $32, %xmm +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsrlq $32, %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsllq $32, %xmm +; CHECK-NEXT: vpsrlq $32, %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsllq $32, %xmm +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpsrlq $32, %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsllq $32, %xmm +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %x = mul <4 x i64> %i, %j + ret <4 x i64> %x +} + |