From b49eb3ab4b7563d97a871dcd7dae4bd383c6c42f Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 6 Nov 2015 23:16:48 +0000 Subject: [X86] Fold (trunc (i32 (zextload i16))) into vbroadcast. When matching non-LSB-extracting truncating broadcasts, we now insert the necessary SRL. If the scalar resulted from a load, the SRL will be folded into it, creating a narrower, offset, load. However, i16 loads aren't Desirable, so we get i16->i32 zextloads. We already catch i16 aextloads; catch these as well. llvm-svn: 252363 --- llvm/lib/Target/X86/X86InstrSSE.td | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'llvm/lib/Target') diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 17edb500d66..e4ff9b34345 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8365,6 +8365,12 @@ let Predicates = [HasAVX2] in { (VPBROADCASTWrm addr:$src)>; def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), (VPBROADCASTWYrm addr:$src)>; + def : Pat<(v8i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWYrm addr:$src)>; // Provide aliases for broadcast from the same register class that // automatically does the extract. -- cgit v1.2.3