diff options
| author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:44:20 +0000 |
|---|---|---|
| committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:44:20 +0000 |
| commit | f2968d58cb519ec9c772efba9b55a920c826737a (patch) | |
| tree | abefe6c842542f09d50e8f4bc2368f85b3c6f007 /llvm/lib/Target/SystemZ | |
| parent | 33435c4c9c1dcecd109aef3d23d46ea43618d9ae (diff) | |
| download | bcm5719-llvm-f2968d58cb519ec9c772efba9b55a920c826737a.tar.gz bcm5719-llvm-f2968d58cb519ec9c772efba9b55a920c826737a.zip | |
[SystemZ] Add support for IBM z14 processor (3/3)
This adds support for the new 128-bit vector float instructions of z14.
Note that these instructions actually only operate on the f128 type,
since only each 128-bit vector register can hold only one 128-bit
float value. However, this is still preferable to the legacy 128-bit
float instructions, since those operate on pairs of floating-point
registers (so we can hold at most 8 values in registers), while the
new instructions use single vector registers (so we hold up to 32
value in registers).
Adding support includes:
- Enabling the instructions for the assembler/disassembler.
- CodeGen for the instructions. This includes allocating the f128
type now to the VR128BitRegClass instead of FP128BitRegClass.
- Scheduler description support for the instructions.
Note that for a small number of operations, we have no new vector
instructions (like integer <-> 128-bit float conversions), and so
we use the legacy instruction and then reformat the operand
(i.e. copy between a pair of floating-point registers and a
vector register).
llvm-svn: 308196
Diffstat (limited to 'llvm/lib/Target/SystemZ')
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZFeatures.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 28 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrFP.td | 88 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 31 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrVector.td | 70 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZRegisterInfo.td | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 17 |
9 files changed, 211 insertions, 43 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td index ec62ebadd1f..fda9c30fe3f 100644 --- a/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -217,6 +217,7 @@ def FeatureVectorEnhancements1 : SystemZFeature< "vector-enhancements-1", "VectorEnhancements1", "Assume that the vector enhancements facility 1 is installed" >; +def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">; def FeatureVectorPackedDecimal : SystemZFeature< "vector-packed-decimal", "VectorPackedDecimal", diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index a806c9b89ee..2d916d2e152 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -101,7 +101,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } - addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + if (Subtarget.hasVectorEnhancements1()) + addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); + else + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); @@ -453,12 +456,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f128, Legal); + setOperationAction(ISD::FMINNUM, MVT::f128, Legal); + setOperationAction(ISD::FMINNAN, MVT::f128, Legal); } // We have fused multiply-addition for f32 and f64 but not f128. setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); - setOperationAction(ISD::FMA, MVT::f128, Expand); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FMA, MVT::f128, Legal); + else + setOperationAction(ISD::FMA, MVT::f128, Expand); + + // We don't have a copysign instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant @@ -466,6 +481,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + // We don't have extending load instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + } + // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); @@ -530,7 +551,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { case MVT::f64: return true; case MVT::f128: - return false; + return Subtarget.hasVectorEnhancements1(); default: break; } @@ -6176,6 +6197,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: + case SystemZ::SelectVR128: return emitSelect(MI, MBB, 0); case SystemZ::CondStore8Mux: diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 10172bd4520..02aeaadad0d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. -def SelectF32 : SelectWrapper<FP32>; -def SelectF64 : SelectWrapper<FP64>; -def SelectF128 : SelectWrapper<FP128>; +def SelectF32 : SelectWrapper<f32, FP32>; +def SelectF64 : SelectWrapper<f64, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in + def SelectF128 : SelectWrapper<f128, FP128>; +let Predicates = [FeatureVectorEnhancements1] in + def SelectVR128 : SelectWrapper<f128, VR128>; defm CondStoreF32 : CondStores<FP32, nonvolatile_store, nonvolatile_load, bdxaddr20only>; @@ -69,8 +72,9 @@ let Defs = [CC], usesCustomInserter = 1 in { let Predicates = [FeatureVector] in { defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>; defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>; - defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>; } +let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in + defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>; // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; @@ -83,8 +87,12 @@ let isCodeGenOnly = 1 in { } // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in @@ -92,8 +100,12 @@ let isCodeGenOnly = 1 in def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>; // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. @@ -101,12 +113,14 @@ class CopySign128<RegisterOperand cls, dag upper> : Pat<(fcopysign FP128:$src1, cls:$src2), (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; -def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), - FP32:$src2)>; -def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), - FP64:$src2)>; -def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), - (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP32:$src2)>; + def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP64:$src2)>; + def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +} defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>; defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>; @@ -166,20 +180,32 @@ def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, Requires<[FeatureFPExtension]>; -def : Pat<(f32 (fpround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; -def : Pat<(f64 (fpround FP128:$src)), - (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f32 (fpround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; + def : Pat<(f64 (fpround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +} // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; + def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; +} // Extend memory floating-point values to wider representations. def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; +def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (extloadf32 bdxaddr12only:$src)), + (LXEB bdxaddr12only:$src)>; + def : Pat<(f128 (extloadf64 bdxaddr12only:$src)), + (LXDB bdxaddr12only:$src)>; +} // Convert a signed integer register value to a floating-point one. def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; @@ -426,16 +452,18 @@ def : Pat<(fmul (f64 (fpextend FP32:$src1)), // f128 multiplication of two FP64 registers. def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), - (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), - FP64:$src1, subreg_h64), FP64:$src2)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), - (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), - bdxaddr12only:$addr)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; // Fused multiply-add. def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index f36e58889af..033a0a879d3 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -4672,10 +4672,10 @@ class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. -class SelectWrapper<RegisterOperand cls> +class SelectWrapper<ValueType vt, RegisterOperand cls> : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), - [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index eaa694bee5e..4533f4fdf21 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -869,6 +869,37 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Move 128-bit floating-point values between VR128 and FP128. + if (SystemZ::VR128BitRegClass.contains(DestReg) && + SystemZ::FP128BitRegClass.contains(SrcReg)) { + unsigned SrcRegHi = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned SrcRegLo = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) + .addReg(SrcRegHi, getKillRegState(KillSrc)) + .addReg(SrcRegLo, getKillRegState(KillSrc)); + return; + } + if (SystemZ::FP128BitRegClass.contains(DestReg) && + SystemZ::VR128BitRegClass.contains(SrcReg)) { + unsigned DestRegHi = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned DestRegLo = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + if (DestRegHi != SrcReg) + copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); + BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + return; + } + // Everything else needs only one instruction. unsigned Opcode; if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 46a71731a90..f64c0d15ef8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -324,9 +324,9 @@ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { // Select instructions //===----------------------------------------------------------------------===// -def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>; -def Select32 : SelectWrapper<GR32>; -def Select64 : SelectWrapper<GR64>; +def Select32Mux : SelectWrapper<i32, GRX32>, Requires<[FeatureHighWord]>; +def Select32 : SelectWrapper<i32, GR32>; +def Select64 : SelectWrapper<i64, GR64>; // We don't define 32-bit Mux stores if we don't have STOCFH, because the // low-only STOC should then always be used if possible. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 9d8f74cdd98..c9a02d9c808 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -938,6 +938,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; } // Convert from fixed 64-bit. @@ -973,6 +974,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; } // Load FP integer. @@ -984,8 +986,10 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; defm : VectorRounding<VFISB, v128sb>; defm : VectorRounding<WFISB, v32sb>; + defm : VectorRounding<WFIXB, v128xb>; } // Load lengthened. @@ -998,6 +1002,9 @@ let Predicates = [FeatureVector] in { def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; + def : Pat<(f128 (fpextend (f32 VR32:$src))), + (WFLLD (WLDEB VR32:$src))>; } // Load rounded. @@ -1012,6 +1019,10 @@ let Predicates = [FeatureVector] in { def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; + def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>; + def : Pat<(f32 (fpround (f128 VR128:$src))), + (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; } // Maximum. @@ -1029,10 +1040,13 @@ let Predicates = [FeatureVector] in { v128sb, v128sb, 2, 0>; def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; defm : VectorMax<VFMAXDB, v128db>; defm : VectorMax<WFMAXDB, v64db>; defm : VectorMax<VFMAXSB, v128sb>; defm : VectorMax<WFMAXSB, v32sb>; + defm : VectorMax<WFMAXXB, v128xb>; } // Minimum. @@ -1050,10 +1064,13 @@ let Predicates = [FeatureVector] in { v128sb, v128sb, 2, 0>; def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; defm : VectorMin<VFMINDB, v128db>; defm : VectorMin<WFMINDB, v64db>; defm : VectorMin<VFMINSB, v128sb>; defm : VectorMin<WFMINSB, v32sb>; + defm : VectorMin<WFMINXB, v128xb>; } // Multiply. @@ -1063,6 +1080,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; } // Multiply and add. @@ -1072,6 +1090,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; } // Multiply and subtract. @@ -1081,6 +1100,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; } // Negative multiply and add. @@ -1090,6 +1110,7 @@ let Predicates = [FeatureVector] in { def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; } // Negative multiply and subtract. @@ -1099,6 +1120,7 @@ let Predicates = [FeatureVector] in { def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; } // Perform sign operation. @@ -1108,6 +1130,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>; def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>; + def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>; } // Load complement. @@ -1116,6 +1139,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>; def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>; + def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>; } // Load negative. @@ -1124,6 +1148,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>; def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>; + def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>; } // Load positive. @@ -1132,6 +1157,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>; def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>; + def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>; } // Square root. @@ -1141,6 +1167,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; } // Subtract. @@ -1150,6 +1177,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; } // Test data class immediate. @@ -1160,6 +1188,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>; def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>; + def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>; } } } @@ -1175,6 +1204,7 @@ let Predicates = [FeatureVector] in { def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; } } @@ -1184,6 +1214,7 @@ let Predicates = [FeatureVector] in { def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>; } } @@ -1198,6 +1229,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 0>; defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; } // Compare and signal equal. @@ -1210,6 +1243,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 4>; defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 12>; + defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 12>; } // Compare high. @@ -1223,6 +1258,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 0>; defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; } // Compare and signal high. @@ -1235,6 +1272,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 4>; defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 12>; + defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 12>; } // Compare high or equal. @@ -1248,6 +1287,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 0>; defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; } // Compare and signal high or equal. @@ -1260,6 +1301,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 4>; defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 12>; + defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 12>; } } @@ -1272,36 +1315,49 @@ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>; //===----------------------------------------------------------------------===// // Replicating scalars @@ -1378,6 +1434,20 @@ let AddedComplexity = 4 in { } //===----------------------------------------------------------------------===// +// Support for 128-bit floating-point values in vector registers +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(f128 (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + + def : Pat<(f128 fpimm0), (VZERO)>; + def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>; +} + +//===----------------------------------------------------------------------===// // String instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index ef5a2642bd2..52ba1a58401 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -260,10 +260,10 @@ defm VF128 : SystemZRegClass<"VF128", // All vector registers. defm VR128 : SystemZRegClass<"VR128", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, - (add (sequence "V%u", 0, 7), - (sequence "V%u", 16, 31), - (sequence "V%u", 8, 15))>; + [f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; // Attaches a ValueType to a register operand, to make the instruction // definitions easier. @@ -283,6 +283,7 @@ def v128g : TypedReg<v2i64, VR128>; def v128q : TypedReg<v16i8, VR128>; def v128sb : TypedReg<v4f32, VR128>; def v128db : TypedReg<v2f64, VR128>; +def v128xb : TypedReg<f128, VR128>; def v128any : TypedReg<untyped, VR128>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index f9407eb179d..f11177af91a 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -753,7 +753,7 @@ def : InstRW<[], (instregex "Insn.*")>; // FP: Select instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXa], (instregex "SelectF(32|64|128)$")>; +def : InstRW<[FXa], (instregex "Select(F32|F64|F128|VR128)$")>; def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>; @@ -1319,18 +1319,23 @@ def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; def : InstRW<[VecBF], (instregex "VFL(L|R)$")>; def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>; def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>; +def : InstRW<[VecBF2], (instregex "WFLLD$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFLRX$")>; def : InstRW<[VecBF2], (instregex "VFI$")>; def : InstRW<[VecBF], (instregex "VFIDB$")>; def : InstRW<[VecBF], (instregex "WFIDB$")>; def : InstRW<[VecBF2], (instregex "VFISB$")>; def : InstRW<[VecBF], (instregex "WFISB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFIXB$")>; // Sign operations def : InstRW<[VecXsPm], (instregex "VFPSO$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[VecXsPm], (instregex "WFPSOXB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)XB$")>; // Minimum / maximum def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>; @@ -1338,11 +1343,13 @@ def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>; def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>; def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>; def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(MAX|MIN)XB$")>; // Test data class def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WFTCIXB$")>; // Add / subtract def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; @@ -1350,6 +1357,7 @@ def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>; def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>; def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WF(A|S)XB$")>; // Multiply / multiply-and-add/subtract def : InstRW<[VecBF2], (instregex "VFM$")>; @@ -1357,19 +1365,23 @@ def : InstRW<[VecBF], (instregex "VFMDB$")>; def : InstRW<[VecBF], (instregex "WFMDB$")>; def : InstRW<[VecBF2], (instregex "VFMSB$")>; def : InstRW<[VecBF], (instregex "WFMSB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WFMXB$")>; def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>; def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>; def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>; def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>; def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WF(N)?M(A|S)XB$")>; // Divide / square root def : InstRW<[VecFPd], (instregex "VFD$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>; +def : InstRW<[VecFPd], (instregex "WFDXB$")>; def : InstRW<[VecFPd], (instregex "VFSQ$")>; def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>; +def : InstRW<[VecFPd], (instregex "WFSQXB$")>; //===----------------------------------------------------------------------===// // Vector: Floating-point comparison @@ -1380,13 +1392,16 @@ def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>; def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>; def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>; def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(C|K)(E|H|HE)XB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)(E|H|HE)XBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)XB$")>; //===----------------------------------------------------------------------===// // Vector: Floating-point insertion and extraction |

