diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-09-16 09:16:48 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-09-16 09:16:48 +0000 |
| commit | 23f78c1662cfd665f99ea8d2d33e667532f4c432 (patch) | |
| tree | d04994c469b7153e5916b1a251b16cb0c595a239 | |
| parent | 0d1b519f78650bb2bbd3f67ff01120316cffd8df (diff) | |
| download | bcm5719-llvm-23f78c1662cfd665f99ea8d2d33e667532f4c432.tar.gz bcm5719-llvm-23f78c1662cfd665f99ea8d2d33e667532f4c432.zip | |
[X86] Add isel patterns to be able to fold loads into VPERM2F128 even when the load is on the first input to the SDNode.
We just need to toggle bits 1 and 5 of the immediate and swap the sources. The peephole pass could trigger commuting/folding for this later, but its easy enough to fix in isel.
Disable the peephole pass on the main vperm2x128 test so we know we're doing this through isel.
llvm-svn: 313455
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vperm2x128.ll | 4 |
2 files changed, 23 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 501fea5f1e0..18cd3e0de74 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7670,12 +7670,28 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), Sched<[WriteFShuffleLd, ReadAfterLd]>; } +// Immediate transform to help with commuting. +def Perm2XCommuteImm : SDNodeXForm<imm, [{ + return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); +}]>; + +let Predicates = [HasAVX] in { +// Pattern with load in other operand. +def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2), + VR256:$src1, (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; +} + let Predicates = [HasAVX1Only] in { def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +// Pattern with load in other operand. +def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), + VR256:$src1, (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; } //===----------------------------------------------------------------------===// @@ -8083,6 +8099,11 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (i8 imm:$src3)))]>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; +let Predicates = [HasAVX2] in +def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), + VR256:$src1, (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll index fa2841dfcb1..97b68af5c05 100644 --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: shuffle_v8f32_45670123: |

