diff options
| author | Chad Rosier <mcrosier@apple.com> | 2012-03-09 02:00:48 +0000 |
|---|---|---|
| committer | Chad Rosier <mcrosier@apple.com> | 2012-03-09 02:00:48 +0000 |
| commit | a281afc6769d30d99d88baefa0c9468739805000 (patch) | |
| tree | 55f273de7e75eaa8a7fccbce3c83a7849d101ac0 | |
| parent | 3c38d435c68aa08b85724a7a01246a05a5b22670 (diff) | |
| download | bcm5719-llvm-a281afc6769d30d99d88baefa0c9468739805000.tar.gz bcm5719-llvm-a281afc6769d30d99d88baefa0c9468739805000.zip | |
Fix a regression from r147481.
Original commit message from r147481:
DAGCombine for transforming 128->256 casts into a vmovaps, rather
then a vxorps + vinsertf128 pair if the original vector came from a load.
Fix:
Unaligned loads need to generate a vmovups.
rdar://10974078
llvm-svn: 152366
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-shuffle.ll | 12 |
3 files changed, 20 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 4f9f089d5d4..ae3ed1bcb32 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; +// Like 'X86vzload', but always requires 128-bit vector alignment. +def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{ + return cast<MemSDNode>(N)->getAlignment() >= 16; +}]>; + // Like 'load', but always requires 256-bit vector alignment. def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 32; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c6d1d192cfe..36526ad7a58 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in { } let Predicates = [HasAVX] in { -def : Pat<(v4i64 (X86vzload addr:$src)), +def : Pat<(v4i64 (alignedX86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; +def : Pat<(v4i64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; } //===---------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index 947d79f9e4c..54f01e966d5 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -135,3 +135,15 @@ define <4 x i32> @test15(<2 x i32>%x) nounwind readnone { ret <4 x i32>%x1 } +; rdar://10974078 +define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp { +entry: + %0 = bitcast float* %f to <4 x float>* + %1 = load <4 x float>* %0, align 8 +; CHECK: test16 +; CHECK: vmovups +; CHECK-NOT: vxorps +; CHECK-NOT: vinsertf128 + %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> + ret <8 x float> %shuffle.i +} |

