diff options
| author | Nadav Rotem <nadav.rotem@intel.com> | 2011-11-10 06:54:20 +0000 |
|---|---|---|
| committer | Nadav Rotem <nadav.rotem@intel.com> | 2011-11-10 06:54:20 +0000 |
| commit | 0a2f797deca72bc07b3ef7e96e32d165ed9d3c48 (patch) | |
| tree | ba0b66f14de813566348f0a1bff00f2e4c7b1897 | |
| parent | d62306a4819a67684ab342d5bd530df2c7ee9148 (diff) | |
| download | bcm5719-llvm-0a2f797deca72bc07b3ef7e96e32d165ed9d3c48.tar.gz bcm5719-llvm-0a2f797deca72bc07b3ef7e96e32d165ed9d3c48.zip | |
AVX2: Add variable shift from memory.
Note: These patterns only works in some cases because
many times the load sd node is bitcasted from a load
node of a different type.
llvm-svn: 144266
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ff4f749168d..91c84dd6a1a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7692,6 +7692,7 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, let Predicates = [HasAVX2] in { + def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), (VPSLLVDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), @@ -7702,7 +7703,6 @@ let Predicates = [HasAVX2] in { (VPSRLVQrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), (VPSLLVDYrr VR256:$src1, VR256:$src2)>; def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), @@ -7713,6 +7713,29 @@ let Predicates = [HasAVX2] in { (VPSRLVQYrr VR256:$src1, VR256:$src2)>; def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), (VPSRAVDYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv2i64 addr:$src2))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (shl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), + (VPSLLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (srl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + (VPSRLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (srl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), + (VPSRLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (sra (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + (VPSRAVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (shl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + (VPSLLVDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (shl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), + (VPSLLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (srl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + (VPSRLVDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (srl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), + (VPSRLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (sra (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + (VPSRAVDYrm VR256:$src1, addr:$src2)>; } |

