diff options
author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:44:20 +0000 |
---|---|---|
committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:44:20 +0000 |
commit | f2968d58cb519ec9c772efba9b55a920c826737a (patch) | |
tree | abefe6c842542f09d50e8f4bc2368f85b3c6f007 | |
parent | 33435c4c9c1dcecd109aef3d23d46ea43618d9ae (diff) | |
download | bcm5719-llvm-f2968d58cb519ec9c772efba9b55a920c826737a.tar.gz bcm5719-llvm-f2968d58cb519ec9c772efba9b55a920c826737a.zip |
[SystemZ] Add support for IBM z14 processor (3/3)
This adds support for the new 128-bit vector float instructions of z14.
Note that these instructions actually only operate on the f128 type,
since only each 128-bit vector register can hold only one 128-bit
float value. However, this is still preferable to the legacy 128-bit
float instructions, since those operate on pairs of floating-point
registers (so we can hold at most 8 values in registers), while the
new instructions use single vector registers (so we hold up to 32
value in registers).
Adding support includes:
- Enabling the instructions for the assembler/disassembler.
- CodeGen for the instructions. This includes allocating the f128
type now to the VR128BitRegClass instead of FP128BitRegClass.
- Scheduler description support for the instructions.
Note that for a small number of operations, we have no new vector
instructions (like integer <-> 128-bit float conversions), and so
we use the legacy instruction and then reformat the operand
(i.e. copy between a pair of floating-point registers and a
vector register).
llvm-svn: 308196
32 files changed, 2066 insertions, 55 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td index ec62ebadd1f..fda9c30fe3f 100644 --- a/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -217,6 +217,7 @@ def FeatureVectorEnhancements1 : SystemZFeature< "vector-enhancements-1", "VectorEnhancements1", "Assume that the vector enhancements facility 1 is installed" >; +def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">; def FeatureVectorPackedDecimal : SystemZFeature< "vector-packed-decimal", "VectorPackedDecimal", diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index a806c9b89ee..2d916d2e152 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -101,7 +101,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } - addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + if (Subtarget.hasVectorEnhancements1()) + addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); + else + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); @@ -453,12 +456,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f128, Legal); + setOperationAction(ISD::FMINNUM, MVT::f128, Legal); + setOperationAction(ISD::FMINNAN, MVT::f128, Legal); } // We have fused multiply-addition for f32 and f64 but not f128. setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); - setOperationAction(ISD::FMA, MVT::f128, Expand); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FMA, MVT::f128, Legal); + else + setOperationAction(ISD::FMA, MVT::f128, Expand); + + // We don't have a copysign instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant @@ -466,6 +481,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + // We don't have extending load instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + } + // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); @@ -530,7 +551,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { case MVT::f64: return true; case MVT::f128: - return false; + return Subtarget.hasVectorEnhancements1(); default: break; } @@ -6176,6 +6197,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: + case SystemZ::SelectVR128: return emitSelect(MI, MBB, 0); case SystemZ::CondStore8Mux: diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 10172bd4520..02aeaadad0d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. -def SelectF32 : SelectWrapper<FP32>; -def SelectF64 : SelectWrapper<FP64>; -def SelectF128 : SelectWrapper<FP128>; +def SelectF32 : SelectWrapper<f32, FP32>; +def SelectF64 : SelectWrapper<f64, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in + def SelectF128 : SelectWrapper<f128, FP128>; +let Predicates = [FeatureVectorEnhancements1] in + def SelectVR128 : SelectWrapper<f128, VR128>; defm CondStoreF32 : CondStores<FP32, nonvolatile_store, nonvolatile_load, bdxaddr20only>; @@ -69,8 +72,9 @@ let Defs = [CC], usesCustomInserter = 1 in { let Predicates = [FeatureVector] in { defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>; defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>; - defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>; } +let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in + defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>; // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; @@ -83,8 +87,12 @@ let isCodeGenOnly = 1 in { } // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in @@ -92,8 +100,12 @@ let isCodeGenOnly = 1 in def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>; // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. @@ -101,12 +113,14 @@ class CopySign128<RegisterOperand cls, dag upper> : Pat<(fcopysign FP128:$src1, cls:$src2), (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; -def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), - FP32:$src2)>; -def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), - FP64:$src2)>; -def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), - (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP32:$src2)>; + def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + FP64:$src2)>; + def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64), + (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +} defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>; defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>; @@ -166,20 +180,32 @@ def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, Requires<[FeatureFPExtension]>; -def : Pat<(f32 (fpround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; -def : Pat<(f64 (fpround FP128:$src)), - (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f32 (fpround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; + def : Pat<(f64 (fpround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +} // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; + def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; +} // Extend memory floating-point values to wider representations. def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; +def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (extloadf32 bdxaddr12only:$src)), + (LXEB bdxaddr12only:$src)>; + def : Pat<(f128 (extloadf64 bdxaddr12only:$src)), + (LXDB bdxaddr12only:$src)>; +} // Convert a signed integer register value to a floating-point one. def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; @@ -426,16 +452,18 @@ def : Pat<(fmul (f64 (fpextend FP32:$src1)), // f128 multiplication of two FP64 registers. def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), - (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), - FP64:$src1, subreg_h64), FP64:$src2)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), - (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), - bdxaddr12only:$addr)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; // Fused multiply-add. def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index f36e58889af..033a0a879d3 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -4672,10 +4672,10 @@ class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. -class SelectWrapper<RegisterOperand cls> +class SelectWrapper<ValueType vt, RegisterOperand cls> : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), - [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index eaa694bee5e..4533f4fdf21 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -869,6 +869,37 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Move 128-bit floating-point values between VR128 and FP128. + if (SystemZ::VR128BitRegClass.contains(DestReg) && + SystemZ::FP128BitRegClass.contains(SrcReg)) { + unsigned SrcRegHi = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned SrcRegLo = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) + .addReg(SrcRegHi, getKillRegState(KillSrc)) + .addReg(SrcRegLo, getKillRegState(KillSrc)); + return; + } + if (SystemZ::FP128BitRegClass.contains(DestReg) && + SystemZ::VR128BitRegClass.contains(SrcReg)) { + unsigned DestRegHi = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned DestRegLo = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + if (DestRegHi != SrcReg) + copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); + BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + return; + } + // Everything else needs only one instruction. unsigned Opcode; if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 46a71731a90..f64c0d15ef8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -324,9 +324,9 @@ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { // Select instructions //===----------------------------------------------------------------------===// -def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>; -def Select32 : SelectWrapper<GR32>; -def Select64 : SelectWrapper<GR64>; +def Select32Mux : SelectWrapper<i32, GRX32>, Requires<[FeatureHighWord]>; +def Select32 : SelectWrapper<i32, GR32>; +def Select64 : SelectWrapper<i64, GR64>; // We don't define 32-bit Mux stores if we don't have STOCFH, because the // low-only STOC should then always be used if possible. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 9d8f74cdd98..c9a02d9c808 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -938,6 +938,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; } // Convert from fixed 64-bit. @@ -973,6 +974,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; } // Load FP integer. @@ -984,8 +986,10 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; defm : VectorRounding<VFISB, v128sb>; defm : VectorRounding<WFISB, v32sb>; + defm : VectorRounding<WFIXB, v128xb>; } // Load lengthened. @@ -998,6 +1002,9 @@ let Predicates = [FeatureVector] in { def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; + def : Pat<(f128 (fpextend (f32 VR32:$src))), + (WFLLD (WLDEB VR32:$src))>; } // Load rounded. @@ -1012,6 +1019,10 @@ let Predicates = [FeatureVector] in { def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; + def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>; + def : Pat<(f32 (fpround (f128 VR128:$src))), + (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; } // Maximum. @@ -1029,10 +1040,13 @@ let Predicates = [FeatureVector] in { v128sb, v128sb, 2, 0>; def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; defm : VectorMax<VFMAXDB, v128db>; defm : VectorMax<WFMAXDB, v64db>; defm : VectorMax<VFMAXSB, v128sb>; defm : VectorMax<WFMAXSB, v32sb>; + defm : VectorMax<WFMAXXB, v128xb>; } // Minimum. @@ -1050,10 +1064,13 @@ let Predicates = [FeatureVector] in { v128sb, v128sb, 2, 0>; def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; defm : VectorMin<VFMINDB, v128db>; defm : VectorMin<WFMINDB, v64db>; defm : VectorMin<VFMINSB, v128sb>; defm : VectorMin<WFMINSB, v32sb>; + defm : VectorMin<WFMINXB, v128xb>; } // Multiply. @@ -1063,6 +1080,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; } // Multiply and add. @@ -1072,6 +1090,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; } // Multiply and subtract. @@ -1081,6 +1100,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; } // Negative multiply and add. @@ -1090,6 +1110,7 @@ let Predicates = [FeatureVector] in { def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; } // Negative multiply and subtract. @@ -1099,6 +1120,7 @@ let Predicates = [FeatureVector] in { def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; } // Perform sign operation. @@ -1108,6 +1130,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>; def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>; + def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>; } // Load complement. @@ -1116,6 +1139,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>; def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>; + def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>; } // Load negative. @@ -1124,6 +1148,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>; def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>; + def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>; } // Load positive. @@ -1132,6 +1157,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>; def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>; + def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>; } // Square root. @@ -1141,6 +1167,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; } // Subtract. @@ -1150,6 +1177,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; } // Test data class immediate. @@ -1160,6 +1188,7 @@ let Predicates = [FeatureVector] in { let Predicates = [FeatureVectorEnhancements1] in { def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>; def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>; + def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>; } } } @@ -1175,6 +1204,7 @@ let Predicates = [FeatureVector] in { def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; } } @@ -1184,6 +1214,7 @@ let Predicates = [FeatureVector] in { def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>; } } @@ -1198,6 +1229,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 0>; defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; } // Compare and signal equal. @@ -1210,6 +1243,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 4>; defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 12>; + defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 12>; } // Compare high. @@ -1223,6 +1258,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 0>; defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; } // Compare and signal high. @@ -1235,6 +1272,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 4>; defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 12>; + defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 12>; } // Compare high or equal. @@ -1248,6 +1287,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 0>; defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; } // Compare and signal high or equal. @@ -1260,6 +1301,8 @@ let Predicates = [FeatureVector] in { v128f, v128sb, 2, 4>; defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 12>; + defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 12>; } } @@ -1272,36 +1315,49 @@ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>; //===----------------------------------------------------------------------===// // Replicating scalars @@ -1378,6 +1434,20 @@ let AddedComplexity = 4 in { } //===----------------------------------------------------------------------===// +// Support for 128-bit floating-point values in vector registers +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(f128 (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + + def : Pat<(f128 fpimm0), (VZERO)>; + def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>; +} + +//===----------------------------------------------------------------------===// // String instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index ef5a2642bd2..52ba1a58401 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -260,10 +260,10 @@ defm VF128 : SystemZRegClass<"VF128", // All vector registers. defm VR128 : SystemZRegClass<"VR128", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, - (add (sequence "V%u", 0, 7), - (sequence "V%u", 16, 31), - (sequence "V%u", 8, 15))>; + [f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; // Attaches a ValueType to a register operand, to make the instruction // definitions easier. @@ -283,6 +283,7 @@ def v128g : TypedReg<v2i64, VR128>; def v128q : TypedReg<v16i8, VR128>; def v128sb : TypedReg<v4f32, VR128>; def v128db : TypedReg<v2f64, VR128>; +def v128xb : TypedReg<f128, VR128>; def v128any : TypedReg<untyped, VR128>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index f9407eb179d..f11177af91a 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -753,7 +753,7 @@ def : InstRW<[], (instregex "Insn.*")>; // FP: Select instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXa], (instregex "SelectF(32|64|128)$")>; +def : InstRW<[FXa], (instregex "Select(F32|F64|F128|VR128)$")>; def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>; def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>; @@ -1319,18 +1319,23 @@ def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; def : InstRW<[VecBF], (instregex "VFL(L|R)$")>; def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>; def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>; +def : InstRW<[VecBF2], (instregex "WFLLD$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFLRX$")>; def : InstRW<[VecBF2], (instregex "VFI$")>; def : InstRW<[VecBF], (instregex "VFIDB$")>; def : InstRW<[VecBF], (instregex "WFIDB$")>; def : InstRW<[VecBF2], (instregex "VFISB$")>; def : InstRW<[VecBF], (instregex "WFISB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFIXB$")>; // Sign operations def : InstRW<[VecXsPm], (instregex "VFPSO$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[VecXsPm], (instregex "WFPSOXB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)XB$")>; // Minimum / maximum def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>; @@ -1338,11 +1343,13 @@ def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>; def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>; def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>; def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(MAX|MIN)XB$")>; // Test data class def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WFTCIXB$")>; // Add / subtract def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; @@ -1350,6 +1357,7 @@ def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>; def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>; def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WF(A|S)XB$")>; // Multiply / multiply-and-add/subtract def : InstRW<[VecBF2], (instregex "VFM$")>; @@ -1357,19 +1365,23 @@ def : InstRW<[VecBF], (instregex "VFMDB$")>; def : InstRW<[VecBF], (instregex "WFMDB$")>; def : InstRW<[VecBF2], (instregex "VFMSB$")>; def : InstRW<[VecBF], (instregex "WFMSB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WFMXB$")>; def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>; def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>; def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>; def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>; def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WF(N)?M(A|S)XB$")>; // Divide / square root def : InstRW<[VecFPd], (instregex "VFD$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>; +def : InstRW<[VecFPd], (instregex "WFDXB$")>; def : InstRW<[VecFPd], (instregex "VFSQ$")>; def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>; +def : InstRW<[VecFPd], (instregex "WFSQXB$")>; //===----------------------------------------------------------------------===// // Vector: Floating-point comparison @@ -1380,13 +1392,16 @@ def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>; def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>; def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>; def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(C|K)(E|H|HE)XB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)(E|H|HE)XBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)XB$")>; //===----------------------------------------------------------------------===// // Vector: Floating-point insertion and extraction diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll index ccb69642a2c..cab6c116bc0 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll @@ -28,8 +28,11 @@ define double @f2(double %f) { declare fp128 @llvm.fabs.f128(fp128 %f) define void @f3(fp128 *%ptr, fp128 *%ptr2) { ; CHECK-LABEL: f3: -; CHECK: lpxbr -; CHECK: dxbr +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: wflpxb [[POSREG1:%v[0-9]+]], [[REG1]] +; CHECK: wfdxb [[RES:%v[0-9]+]], [[POSREG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %orig = load fp128 , fp128 *%ptr %abs = call fp128 @llvm.fabs.f128(fp128 %orig) diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll index 59064795b98..606bce3de36 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll @@ -30,8 +30,11 @@ define double @f2(double %f) { declare fp128 @llvm.fabs.f128(fp128 %f) define void @f3(fp128 *%ptr, fp128 *%ptr2) { ; CHECK-LABEL: f3: -; CHECK: lnxbr -; CHECK: dxbr +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: wflnxb [[NEGREG1:%v[0-9]+]], [[REG1]] +; CHECK: wfdxb [[RES:%v[0-9]+]], [[NEGREG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %orig = load fp128 , fp128 *%ptr %abs = call fp128 @llvm.fabs.f128(fp128 %orig) diff --git a/llvm/test/CodeGen/SystemZ/fp-add-04.ll b/llvm/test/CodeGen/SystemZ/fp-add-04.ll new file mode 100644 index 00000000000..186f37ca518 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-add-04.ll @@ -0,0 +1,17 @@ +; Test 128-bit floating-point addition on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = fadd fp128 %f1, %f2 + store fp128 %sum, fp128 *%ptr1 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-06.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-06.ll new file mode 100644 index 00000000000..e146b51e4fb --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-cmp-06.ll @@ -0,0 +1,33 @@ +; Test f128 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r5) +; CHECK: wfcxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %cond = fcmp oeq fp128 %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero -- it is not worthwhile to copy to +; FP pairs just so we can use LTXBR, so simply load up a zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) { +; CHECK-LABEL: f2: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfcxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = fcmp oeq fp128 %f, 0xL00000000000000000000000000000000 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/llvm/test/CodeGen/SystemZ/fp-const-11.ll b/llvm/test/CodeGen/SystemZ/fp-const-11.ll new file mode 100644 index 00000000000..8523f2786c3 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-const-11.ll @@ -0,0 +1,40 @@ +; Test loads of f128 floating-point constants on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s -check-prefix=CONST + +; Test loading zero. +define void @f1(fp128 *%x) { +; CHECK-LABEL: f1: +; CHECK: vzero [[REG:%v[0-9]+]] +; CHECK: vst [[REG]], 0(%r2) +; CHECK: br %r14 + store fp128 0xL00000000000000000000000000000000, fp128 *%x + ret void +} + +; Test loading of negative floating-point zero. +define void @f2(fp128 *%x) { +; CHECK-LABEL: f2: +; CHECK: vzero [[REG:%v[0-9]+]] +; CHECK: wflnxb [[REG]], [[REG]] +; CHECK: vst [[REG]], 0(%r2) +; CHECK: br %r14 + store fp128 0xL00000000000000008000000000000000, fp128 *%x + ret void +} + +; Test loading of a 128-bit floating-point constant. This value would +; actually fit within the 32-bit format, but we don't have extending +; loads into vector registers. +define void @f3(fp128 *%x) { +; CHECK-LABEL: f3: +; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} +; CHECK: vl [[REG:%v[0-9]+]], 0([[REGISTER]]) +; CHECK: vst [[REG]], 0(%r2) +; CHECK: br %r14 +; CONST: .quad 4611404543484231680 +; CONST: .quad 0 + store fp128 0xL00000000000000003fff000002000000, fp128 *%x + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-15.ll b/llvm/test/CodeGen/SystemZ/fp-conv-15.ll new file mode 100644 index 00000000000..61100016c42 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-conv-15.ll @@ -0,0 +1,50 @@ +; Test f128 floating-point truncations/extensions on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f128->f64. +define double @f1(fp128 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wflrx %f0, [[REG]], 0, 0 +; CHECK: br %r14 + %val = load fp128, fp128 *%ptr + %res = fptrunc fp128 %val to double + ret double %res +} + +; Test f128->f32. +define float @f2(fp128 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wflrx %f0, [[REG]], 0, 3 +; CHECK: ledbra %f0, 0, %f0, 0 +; CHECK: br %r14 + %val = load fp128, fp128 *%ptr + %res = fptrunc fp128 %val to float + ret float %res +} + +; Test f64->f128. +define void @f3(fp128 *%dst, double %val) { +; CHECK-LABEL: f3: +; CHECK: wflld [[RES:%v[0-9]+]], %f0 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Test f32->f128. +define void @f4(fp128 *%dst, float %val) { +; CHECK-LABEL: f4: +; CHECK: ldebr %f0, %f0 +; CHECK: wflld [[RES:%v[0-9]+]], %f0 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-16.ll b/llvm/test/CodeGen/SystemZ/fp-conv-16.ll new file mode 100644 index 00000000000..4f9bb865694 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-conv-16.ll @@ -0,0 +1,99 @@ +; Test f128 floating-point conversion to/from integers on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test signed i32->f128. +define void @f1(i32 %i, fp128 *%dst) { +; CHECK-LABEL: f1: +; CHECK: cxfbr %f0, %r2 +; CHECK: vmrhg %v0, %v0, %v2 +; CHECK: vst %v0, 0(%r3) +; CHECK: br %r14 + %conv = sitofp i32 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} + +; Test signed i64->f128. +define void @f2(i64 %i, fp128 *%dst) { +; CHECK-LABEL: f2: +; CHECK: cxgbr %f0, %r2 +; CHECK: vmrhg %v0, %v0, %v2 +; CHECK: vst %v0, 0(%r3) +; CHECK: br %r14 + %conv = sitofp i64 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} + +; Test unsigned i32->f128. +define void @f3(i32 %i, fp128 *%dst) { +; CHECK-LABEL: f3: +; CHECK: cxlfbr %f0, 0, %r2, 0 +; CHECK: vmrhg %v0, %v0, %v2 +; CHECK: vst %v0, 0(%r3) +; CHECK: br %r14 + %conv = uitofp i32 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} + +; Test unsigned i64->f128. +define void @f4(i64 %i, fp128 *%dst) { +; CHECK-LABEL: f4: +; CHECK: cxlgbr %f0, 0, %r2, 0 +; CHECK: vmrhg %v0, %v0, %v2 +; CHECK: vst %v0, 0(%r3) +; CHECK: br %r14 + %conv = uitofp i64 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} + +; Test signed f128->i32. +define i32 @f5(fp128 *%src) { +; CHECK-LABEL: f5: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: cfxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = fptosi fp128 %f to i32 + ret i32 %conv +} + +; Test signed f128->i64. +define i64 @f6(fp128 *%src) { +; CHECK-LABEL: f6: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: cgxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = fptosi fp128 %f to i64 + ret i64 %conv +} + +; Test unsigned f128->i32. +define i32 @f7(fp128 *%src) { +; CHECK-LABEL: f7: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: clfxbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %f = load fp128 , fp128 *%src + %conv = fptoui fp128 %f to i32 + ret i32 %conv +} + +; Test unsigned f128->i64. +define i64 @f8(fp128 *%src) { +; CHECK-LABEL: f8: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: clgxbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %f = load fp128 , fp128 *%src + %conv = fptoui fp128 %f to i64 + ret i64 %conv +} diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll new file mode 100644 index 00000000000..657c0e18767 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll @@ -0,0 +1,81 @@ +; Test f128 copysign operations on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @copysignf(float, float) readnone +declare double @copysign(double, double) readnone +; FIXME: not really the correct prototype for SystemZ. +declare fp128 @copysignl(fp128, fp128) readnone + +; Test f32 copies in which the sign comes from an f128. +define float @f1(float %a, fp128 *%bptr) { +; CHECK-LABEL: f1: +; CHECK: vl %v[[REG:[0-9]+]], 0(%r2) +; CHECK: cpsdr %f0, %f[[REG]], %f0 +; CHECK: br %r14 + %bl = load volatile fp128, fp128 *%bptr + %b = fptrunc fp128 %bl to float + %res = call float @copysignf(float %a, float %b) readnone + ret float %res +} + +; Test f64 copies in which the sign comes from an f128. +define double @f2(double %a, fp128 *%bptr) { +; CHECK-LABEL: f2: +; CHECK: vl %v[[REG:[0-9]+]], 0(%r2) +; CHECK: cpsdr %f0, %f[[REG]], %f0 +; CHECK: br %r14 + %bl = load volatile fp128, fp128 *%bptr + %b = fptrunc fp128 %bl to double + %res = call double @copysign(double %a, double %b) readnone + ret double %res +} + +; Test f128 copies in which the sign comes from an f32. +define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) { +; CHECK-LABEL: f7: +; CHECK: vl [[REG1:%v[0-7]+]], 0(%r3) +; CHECK: tmlh +; CHECK: wflnxb [[REG1]], [[REG1]] +; CHECK: wflpxb [[REG1]], [[REG1]] +; CHECK: vst [[REG1]], 0(%r2) +; CHECK: br %r14 + %a = load volatile fp128, fp128 *%aptr + %b = fpext float %bf to fp128 + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, fp128 *%cptr + ret void +} + +; As above, but the sign comes from an f64. +define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) { +; CHECK-LABEL: f8: +; CHECK: vl [[REG1:%v[0-7]+]], 0(%r3) +; CHECK: tmhh +; CHECK: wflnxb [[REG1]], [[REG1]] +; CHECK: wflpxb [[REG1]], [[REG1]] +; CHECK: vst [[REG1]], 0(%r2) +; CHECK: br %r14 + %a = load volatile fp128, fp128 *%aptr + %b = fpext double %bd to fp128 + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, fp128 *%cptr + ret void +} + +; As above, but the sign comes from an f128. +define void @f9(fp128 *%cptr, fp128 *%aptr, fp128 *%bptr) { +; CHECK-LABEL: f9: +; CHECK: vl [[REG1:%v[0-7]+]], 0(%r3) +; CHECK: vl [[REG2:%v[0-7]+]], 0(%r4) +; CHECK: tm +; CHECK: wflnxb [[REG1]], [[REG1]] +; CHECK: wflpxb [[REG1]], [[REG1]] +; CHECK: vst [[REG1]], 0(%r2) +; CHECK: br %r14 + %a = load volatile fp128, fp128 *%aptr + %b = load volatile fp128, fp128 *%bptr + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, fp128 *%cptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-div-04.ll b/llvm/test/CodeGen/SystemZ/fp-div-04.ll new file mode 100644 index 00000000000..54e87f46c84 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-div-04.ll @@ -0,0 +1,17 @@ +; Test 128-bit floating-point division on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfdxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = fdiv fp128 %f1, %f2 + store fp128 %sum, fp128 *%ptr1 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-move-13.ll b/llvm/test/CodeGen/SystemZ/fp-move-13.ll new file mode 100644 index 00000000000..d6c53eaceee --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-move-13.ll @@ -0,0 +1,46 @@ +; Test f128 moves on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; VR-to-VR moves. Since f128s are passed by reference, +; we need to force a copy by other means. +define void @f1(fp128 *%x) { +; CHECK-LABEL: f1: +; CHECK: vlr +; CHECK: vleig +; CHECK: br %r14 + %val = load volatile fp128 , fp128 *%x + %t1 = bitcast fp128 %val to <2 x i64> + %t2 = insertelement <2 x i64> %t1, i64 0, i32 0 + %res = bitcast <2 x i64> %t2 to fp128 + store volatile fp128 %res, fp128 *%x + store volatile fp128 %val, fp128 *%x + ret void +} + +; Test 128-bit moves from GPRs to VRs. i128 isn't a legitimate type, +; so this goes through memory. +define void @f2(fp128 *%a, i128 *%b) { +; CHECK-LABEL: f2: +; CHECK: lg +; CHECK: lg +; CHECK: stg +; CHECK: stg +; CHECK: br %r14 + %val = load i128 , i128 *%b + %res = bitcast i128 %val to fp128 + store fp128 %res, fp128 *%a + ret void +} + +; Test 128-bit moves from VRs to GPRs, with the same restriction as f2. +define void @f3(fp128 *%a, i128 *%b) { +; CHECK-LABEL: f3: +; CHECK: vl +; CHECK: vst + %val = load fp128 , fp128 *%a + %res = bitcast fp128 %val to i128 + store i128 %res, i128 *%b + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-11.ll b/llvm/test/CodeGen/SystemZ/fp-mul-11.ll new file mode 100644 index 00000000000..ef45bf184a4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-mul-11.ll @@ -0,0 +1,32 @@ +; Test 128-bit floating-point multiplication on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = fmul fp128 %f1, %f2 + store fp128 %sum, fp128 *%ptr1 + ret void +} + +define void @f2(double %f1, double %f2, fp128 *%dst) { +; CHECK-LABEL: f2: +; CHECK-DAG: wflld [[REG1:%v[0-9]+]], %f0 +; CHECK-DAG: wflld [[REG2:%v[0-9]+]], %f2 +; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-12.ll b/llvm/test/CodeGen/SystemZ/fp-mul-12.ll new file mode 100644 index 00000000000..331f9a30c27 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-mul-12.ll @@ -0,0 +1,72 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3) + +define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4) +; CHECK: wfmaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]] +; CHECK: vst [[RES]], 0(%r5) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %f3 = load fp128, fp128 *%ptr3 + %res = call fp128 @llvm.fma.f128 (fp128 %f1, fp128 %f2, fp128 %f3) + store fp128 %res, fp128 *%dst + ret void +} + +define void @f2(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) { +; CHECK-LABEL: f2: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4) +; CHECK: wfmsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]] +; CHECK: vst [[RES]], 0(%r5) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %f3 = load fp128, fp128 *%ptr3 + %neg = fsub fp128 0xL00000000000000008000000000000000, %f3 + %res = call fp128 @llvm.fma.f128 (fp128 %f1, fp128 %f2, fp128 %neg) + store fp128 %res, fp128 *%dst + ret void +} + +define void @f3(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) { +; CHECK-LABEL: f3: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4) +; CHECK: wfnmaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]] +; CHECK: vst [[RES]], 0(%r5) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %f3 = load fp128, fp128 *%ptr3 + %res = call fp128 @llvm.fma.f128 (fp128 %f1, fp128 %f2, fp128 %f3) + %negres = fsub fp128 0xL00000000000000008000000000000000, %res + store fp128 %negres, fp128 *%dst + ret void +} + +define void @f4(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) { +; CHECK-LABEL: f4: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4) +; CHECK: wfnmsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]] +; CHECK: vst [[RES]], 0(%r5) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %f3 = load fp128, fp128 *%ptr3 + %neg = fsub fp128 0xL00000000000000008000000000000000, %f3 + %res = call fp128 @llvm.fma.f128 (fp128 %f1, fp128 %f2, fp128 %neg) + %negres = fsub fp128 0xL00000000000000008000000000000000, %res + store fp128 %negres, fp128 *%dst + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll index c904d19947b..38fb3a58d40 100644 --- a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll @@ -25,8 +25,11 @@ define double @f2(double %f) { ; processing so that using FPRs is unequivocally better. define void @f3(fp128 *%ptr, fp128 *%ptr2) { ; CHECK-LABEL: f3: -; CHECK: lcxbr -; CHECK: dxbr +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK-DAG: wflcxb [[NEGREG1:%v[0-9]+]], [[REG1]] +; CHECK: wfdxb [[RES:%v[0-9]+]], [[NEGREG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %orig = load fp128 , fp128 *%ptr %negzero = fpext float -0.0 to fp128 diff --git a/llvm/test/CodeGen/SystemZ/fp-round-03.ll b/llvm/test/CodeGen/SystemZ/fp-round-03.ll index 1a8296357bc..762e793701d 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-03.ll @@ -26,7 +26,9 @@ define double @f2(double %f) { declare fp128 @llvm.rint.f128(fp128 %f) define void @f3(fp128 *%ptr) { ; CHECK-LABEL: f3: -; CHECK: fixbr %f0, 0, %f0 +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 0, 0 +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %src = load fp128 , fp128 *%ptr %res = call fp128 @llvm.rint.f128(fp128 %src) @@ -58,7 +60,9 @@ define double @f5(double %f) { declare fp128 @llvm.nearbyint.f128(fp128 %f) define void @f6(fp128 *%ptr) { ; CHECK-LABEL: f6: -; CHECK: fixbra %f0, 0, %f0, 4 +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 0 +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %src = load fp128 , fp128 *%ptr %res = call fp128 @llvm.nearbyint.f128(fp128 %src) @@ -90,7 +94,9 @@ define double @f8(double %f) { declare fp128 @llvm.floor.f128(fp128 %f) define void @f9(fp128 *%ptr) { ; CHECK-LABEL: f9: -; CHECK: fixbra %f0, 7, %f0, 4 +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 7 +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %src = load fp128 , fp128 *%ptr %res = call fp128 @llvm.floor.f128(fp128 %src) @@ -122,7 +128,9 @@ define double @f11(double %f) { declare fp128 @llvm.ceil.f128(fp128 %f) define void @f12(fp128 *%ptr) { ; CHECK-LABEL: f12: -; CHECK: fixbra %f0, 6, %f0, 4 +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 6 +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %src = load fp128 , fp128 *%ptr %res = call fp128 @llvm.ceil.f128(fp128 %src) @@ -154,7 +162,9 @@ define double @f14(double %f) { declare fp128 @llvm.trunc.f128(fp128 %f) define void @f15(fp128 *%ptr) { ; CHECK-LABEL: f15: -; CHECK: fixbra %f0, 5, %f0, 4 +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 5 +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %src = load fp128 , fp128 *%ptr %res = call fp128 @llvm.trunc.f128(fp128 %src) @@ -186,7 +196,9 @@ define double @f17(double %f) { declare fp128 @llvm.round.f128(fp128 %f) define void @f18(fp128 *%ptr) { ; CHECK-LABEL: f18: -; CHECK: fixbra %f0, 1, %f0, 4 +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 1 +; CHECK: vst [[RES]], 0(%r2) ; CHECK: br %r14 %src = load fp128 , fp128 *%ptr %res = call fp128 @llvm.round.f128(fp128 %src) diff --git a/llvm/test/CodeGen/SystemZ/fp-sqrt-04.ll b/llvm/test/CodeGen/SystemZ/fp-sqrt-04.ll new file mode 100644 index 00000000000..e0fb2569b39 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-sqrt-04.ll @@ -0,0 +1,17 @@ +; Test 128-bit floating-point square root on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare fp128 @llvm.sqrt.f128(fp128 %f) + +define void @f1(fp128 *%ptr) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfsqxb [[RES:%v[0-9]+]], [[REG]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %res = call fp128 @llvm.sqrt.f128(fp128 %f) + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-04.ll b/llvm/test/CodeGen/SystemZ/fp-sub-04.ll new file mode 100644 index 00000000000..5f88132664e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-sub-04.ll @@ -0,0 +1,17 @@ +; Test 128-bit floating-point subtraction on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define void @f1(fp128 *%ptr1, fp128 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %sum = fsub fp128 %f1, %f2 + store fp128 %sum, fp128 *%ptr1 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/tdc-07.ll b/llvm/test/CodeGen/SystemZ/tdc-07.ll new file mode 100644 index 00000000000..6651410e7c6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/tdc-07.ll @@ -0,0 +1,18 @@ +; Test the Test Data Class instruction on z14 +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i32 @llvm.s390.tdc.f128(fp128, i64) + +; Check using as i32 - f128 +define i32 @f3(fp128 %x) { +; CHECK-LABEL: f3 +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: tcxb %f0, 123 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 + %res = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 123) + ret i32 %res +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll index 47400b8c66b..591d3bf36f1 100644 --- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll @@ -10,6 +10,9 @@ declare float @fmaxf(float, float) declare float @llvm.maxnum.f32(float, float) declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) +declare fp128 @fmaxl(fp128, fp128) +declare fp128 @llvm.maxnum.f128(fp128, fp128) + ; Test the fmax library function. define double @f1(double %dummy, double %val1, double %val2) { ; CHECK-LABEL: f1: @@ -110,3 +113,63 @@ define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1, ret <4 x float> %ret } +; Test the fmaxl library function. +define void @f21(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) { +; CHECK-LABEL: f21: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r4) +; CHECK: br %r14 + %val1 = load fp128, fp128* %ptr1 + %val2 = load fp128, fp128* %ptr2 + %res = call fp128 @fmaxl(fp128 %val1, fp128 %val2) readnone + store fp128 %res, fp128* %dst + ret void +} + +; Test the f128 maxnum intrinsic. +define void @f22(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) { +; CHECK-LABEL: f22: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r4) +; CHECK: br %r14 + %val1 = load fp128, fp128* %ptr1 + %val2 = load fp128, fp128* %ptr2 + %res = call fp128 @llvm.maxnum.f128(fp128 %val1, fp128 %val2) + store fp128 %res, fp128* %dst + ret void +} + +; Test a f128 constant compare/select resulting in maxnum. +define void @f23(fp128 *%ptr, fp128 *%dst) { +; CHECK-LABEL: f23: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r3) +; CHECK: br %r14 + %val = load fp128, fp128* %ptr + %cmp = fcmp ogt fp128 %val, 0xL00000000000000000000000000000000 + %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000 + store fp128 %res, fp128* %dst + ret void +} + +; Test a f128 constant compare/select resulting in maxnan. +define void @f24(fp128 *%ptr, fp128 *%dst) { +; CHECK-LABEL: f24: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 1 +; CHECK: vst [[RES]], 0(%r3) +; CHECK: br %r14 + %val = load fp128, fp128* %ptr + %cmp = fcmp ugt fp128 %val, 0xL00000000000000000000000000000000 + %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000 + store fp128 %res, fp128* %dst + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll index b84ea6b6b4f..3eef9016cd0 100644 --- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll @@ -10,6 +10,9 @@ declare float @fminf(float, float) declare float @llvm.minnum.f32(float, float) declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) +declare fp128 @fminl(fp128, fp128) +declare fp128 @llvm.minnum.f128(fp128, fp128) + ; Test the fmin library function. define double @f1(double %dummy, double %val1, double %val2) { ; CHECK-LABEL: f1: @@ -110,3 +113,63 @@ define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1, ret <4 x float> %ret } +; Test the fminl library function. +define void @f21(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) { +; CHECK-LABEL: f21: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r4) +; CHECK: br %r14 + %val1 = load fp128, fp128* %ptr1 + %val2 = load fp128, fp128* %ptr2 + %res = call fp128 @fminl(fp128 %val1, fp128 %val2) readnone + store fp128 %res, fp128* %dst + ret void +} + +; Test the f128 minnum intrinsic. +define void @f22(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) { +; CHECK-LABEL: f22: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3) +; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r4) +; CHECK: br %r14 + %val1 = load fp128, fp128* %ptr1 + %val2 = load fp128, fp128* %ptr2 + %res = call fp128 @llvm.minnum.f128(fp128 %val1, fp128 %val2) + store fp128 %res, fp128* %dst + ret void +} + +; Test a f128 constant compare/select resulting in minnum. +define void @f23(fp128 *%ptr, fp128 *%dst) { +; CHECK-LABEL: f23: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4 +; CHECK: vst [[RES]], 0(%r3) +; CHECK: br %r14 + %val = load fp128, fp128* %ptr + %cmp = fcmp olt fp128 %val, 0xL00000000000000000000000000000000 + %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000 + store fp128 %res, fp128* %dst + ret void +} + +; Test a f128 constant compare/select resulting in minnan. +define void @f24(fp128 *%ptr, fp128 *%dst) { +; CHECK-LABEL: f24: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 1 +; CHECK: vst [[RES]], 0(%r3) +; CHECK: br %r14 + %val = load fp128, fp128* %ptr + %cmp = fcmp ult fp128 %val, 0xL00000000000000000000000000000000 + %res = select i1 %cmp, fp128 %val, fp128 0xL00000000000000000000000000000000 + store fp128 %res, fp128* %dst + ret void +} + diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-z14.txt b/llvm/test/MC/Disassembler/SystemZ/insns-z14.txt index c6b55fd5a26..c73b50c1c2f 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns-z14.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns-z14.txt @@ -1901,6 +1901,21 @@ # CHECK: wfasb %v18, %f3, %v20 0xe7 0x23 0x40 0x08 0x2a 0xe3 +# CHECK: wfaxb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xe3 + +# CHECK: wfaxb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xe3 + +# CHECK: wfaxb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xe3 + +# CHECK: wfaxb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xe3 + +# CHECK: wfaxb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xe3 + # CHECK: wfcsb %f0, %f0 0xe7 0x00 0x00 0x00 0x20 0xcb @@ -1922,6 +1937,24 @@ # CHECK: wfcsb %f14, %v17 0xe7 0xe1 0x00 0x00 0x24 0xcb +# CHECK: wfcxb %v0, %v0 +0xe7 0x00 0x00 0x00 0x40 0xcb + +# CHECK: wfcxb %v0, %v15 +0xe7 0x0f 0x00 0x00 0x40 0xcb + +# CHECK: wfcxb %v0, %v31 +0xe7 0x0f 0x00 0x00 0x44 0xcb + +# CHECK: wfcxb %v15, %v0 +0xe7 0xf0 0x00 0x00 0x40 0xcb + +# CHECK: wfcxb %v31, %v0 +0xe7 0xf0 0x00 0x00 0x48 0xcb + +# CHECK: wfcxb %v14, %v17 +0xe7 0xe1 0x00 0x00 0x44 0xcb + # CHECK: wfcesb %f0, %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xe8 @@ -1958,6 +1991,36 @@ # CHECK: wfcesbs %v18, %f3, %v20 0xe7 0x23 0x40 0x18 0x2a 0xe8 +# CHECK: wfcexb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xe8 + +# CHECK: wfcexb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xe8 + +# CHECK: wfcexb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xe8 + +# CHECK: wfcexb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xe8 + +# CHECK: wfcexb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xe8 + +# CHECK: wfcexbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x18 0x40 0xe8 + +# CHECK: wfcexbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x18 0x42 0xe8 + +# CHECK: wfcexbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x18 0x44 0xe8 + +# CHECK: wfcexbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x18 0x48 0xe8 + +# CHECK: wfcexbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x18 0x4a 0xe8 + # CHECK: wfchsb %f0, %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xeb @@ -1994,6 +2057,36 @@ # CHECK: wfchsbs %v18, %f3, %v20 0xe7 0x23 0x40 0x18 0x2a 0xeb +# CHECK: wfchxb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xeb + +# CHECK: wfchxb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xeb + +# CHECK: wfchxb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xeb + +# CHECK: wfchxb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xeb + +# CHECK: wfchxb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xeb + +# CHECK: wfchxbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x18 0x40 0xeb + +# CHECK: wfchxbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x18 0x42 0xeb + +# CHECK: wfchxbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x18 0x44 0xeb + +# CHECK: wfchxbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x18 0x48 0xeb + +# CHECK: wfchxbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x18 0x4a 0xeb + # CHECK: wfchesb %f0, %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xea @@ -2030,6 +2123,36 @@ # CHECK: wfchesbs %v18, %f3, %v20 0xe7 0x23 0x40 0x18 0x2a 0xea +# CHECK: wfchexb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xea + +# CHECK: wfchexb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xea + +# CHECK: wfchexb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xea + +# CHECK: wfchexb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xea + +# CHECK: wfchexb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xea + +# CHECK: wfchexbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x18 0x40 0xea + +# CHECK: wfchexbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x18 0x42 0xea + +# CHECK: wfchexbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x18 0x44 0xea + +# CHECK: wfchexbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x18 0x48 0xea + +# CHECK: wfchexbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x18 0x4a 0xea + # CHECK: wfdsb %f0, %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xe5 @@ -2048,6 +2171,21 @@ # CHECK: wfdsb %v18, %f3, %v20 0xe7 0x23 0x40 0x08 0x2a 0xe5 +# CHECK: wfdxb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xe5 + +# CHECK: wfdxb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xe5 + +# CHECK: wfdxb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xe5 + +# CHECK: wfdxb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xe5 + +# CHECK: wfdxb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xe5 + # CHECK: wfisb %f0, %f0, 0, 0 0xe7 0x00 0x00 0x08 0x20 0xc7 @@ -2072,6 +2210,27 @@ # CHECK: wfisb %f14, %v17, 4, 10 0xe7 0xe1 0x00 0xac 0x24 0xc7 +# CHECK: wfixb %v0, %v0, 0, 0 +0xe7 0x00 0x00 0x08 0x40 0xc7 + +# CHECK: wfixb %v0, %v0, 0, 15 +0xe7 0x00 0x00 0xf8 0x40 0xc7 + +# CHECK: wfixb %v0, %v0, 4, 0 +0xe7 0x00 0x00 0x0c 0x40 0xc7 + +# CHECK: wfixb %v0, %v0, 7, 0 +0xe7 0x00 0x00 0x0f 0x40 0xc7 + +# CHECK: wfixb %v0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x44 0xc7 + +# CHECK: wfixb %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x08 0x48 0xc7 + +# CHECK: wfixb %v14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x44 0xc7 + # CHECK: wfksb %f0, %f0 0xe7 0x00 0x00 0x00 0x20 0xca @@ -2093,6 +2252,24 @@ # CHECK: wfksb %f14, %v17 0xe7 0xe1 0x00 0x00 0x24 0xca +# CHECK: wfkxb %v0, %v0 +0xe7 0x00 0x00 0x00 0x40 0xca + +# CHECK: wfkxb %v0, %v15 +0xe7 0x0f 0x00 0x00 0x40 0xca + +# CHECK: wfkxb %v0, %v31 +0xe7 0x0f 0x00 0x00 0x44 0xca + +# CHECK: wfkxb %v15, %v0 +0xe7 0xf0 0x00 0x00 0x40 0xca + +# CHECK: wfkxb %v31, %v0 +0xe7 0xf0 0x00 0x00 0x48 0xca + +# CHECK: wfkxb %v14, %v17 +0xe7 0xe1 0x00 0x00 0x44 0xca + # CHECK: wfkedb %f0, %f0, %f0 0xe7 0x00 0x00 0x0c 0x30 0xe8 @@ -2165,6 +2342,36 @@ # CHECK: wfkesbs %v18, %f3, %v20 0xe7 0x23 0x40 0x1c 0x2a 0xe8 +# CHECK: wfkexb %v0, %v0, %v0 +0xe7 0x00 0x00 0x0c 0x40 0xe8 + +# CHECK: wfkexb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x0c 0x42 0xe8 + +# CHECK: wfkexb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x0c 0x44 0xe8 + +# CHECK: wfkexb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x0c 0x48 0xe8 + +# CHECK: wfkexb %v18, %v3, %v20 +0xe7 0x23 0x40 0x0c 0x4a 0xe8 + +# CHECK: wfkexbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x1c 0x40 0xe8 + +# CHECK: wfkexbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x1c 0x42 0xe8 + +# CHECK: wfkexbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x1c 0x44 0xe8 + +# CHECK: wfkexbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x1c 0x48 0xe8 + +# CHECK: wfkexbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x1c 0x4a 0xe8 + # CHECK: wfkhdb %f0, %f0, %f0 0xe7 0x00 0x00 0x0c 0x30 0xeb @@ -2237,6 +2444,36 @@ # CHECK: wfkhsbs %v18, %f3, %v20 0xe7 0x23 0x40 0x1c 0x2a 0xeb +# CHECK: wfkhxb %v0, %v0, %v0 +0xe7 0x00 0x00 0x0c 0x40 0xeb + +# CHECK: wfkhxb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x0c 0x42 0xeb + +# CHECK: wfkhxb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x0c 0x44 0xeb + +# CHECK: wfkhxb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x0c 0x48 0xeb + +# CHECK: wfkhxb %v18, %v3, %v20 +0xe7 0x23 0x40 0x0c 0x4a 0xeb + +# CHECK: wfkhxbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x1c 0x40 0xeb + +# CHECK: wfkhxbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x1c 0x42 0xeb + +# CHECK: wfkhxbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x1c 0x44 0xeb + +# CHECK: wfkhxbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x1c 0x48 0xeb + +# CHECK: wfkhxbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x1c 0x4a 0xeb + # CHECK: wfkhedb %f0, %f0, %f0 0xe7 0x00 0x00 0x0c 0x30 0xea @@ -2309,6 +2546,36 @@ # CHECK: wfkhesbs %v18, %f3, %v20 0xe7 0x23 0x40 0x1c 0x2a 0xea +# CHECK: wfkhexb %v0, %v0, %v0 +0xe7 0x00 0x00 0x0c 0x40 0xea + +# CHECK: wfkhexb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x0c 0x42 0xea + +# CHECK: wfkhexb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x0c 0x44 0xea + +# CHECK: wfkhexb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x0c 0x48 0xea + +# CHECK: wfkhexb %v18, %v3, %v20 +0xe7 0x23 0x40 0x0c 0x4a 0xea + +# CHECK: wfkhexbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x1c 0x40 0xea + +# CHECK: wfkhexbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x1c 0x42 0xea + +# CHECK: wfkhexbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x1c 0x44 0xea + +# CHECK: wfkhexbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x1c 0x48 0xea + +# CHECK: wfkhexbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x1c 0x4a 0xea + # CHECK: wfpsosb %f0, %f0, 3 0xe7 0x00 0x00 0x38 0x20 0xcc @@ -2333,6 +2600,27 @@ # CHECK: wfpsosb %f14, %v17, 7 0xe7 0xe1 0x00 0x78 0x24 0xcc +# CHECK: wfpsoxb %v0, %v0, 3 +0xe7 0x00 0x00 0x38 0x40 0xcc + +# CHECK: wfpsoxb %v0, %v0, 15 +0xe7 0x00 0x00 0xf8 0x40 0xcc + +# CHECK: wfpsoxb %v0, %v15, 3 +0xe7 0x0f 0x00 0x38 0x40 0xcc + +# CHECK: wfpsoxb %v0, %v31, 3 +0xe7 0x0f 0x00 0x38 0x44 0xcc + +# CHECK: wfpsoxb %v15, %v0, 3 +0xe7 0xf0 0x00 0x38 0x40 0xcc + +# CHECK: wfpsoxb %v31, %v0, 3 +0xe7 0xf0 0x00 0x38 0x48 0xcc + +# CHECK: wfpsoxb %v14, %v17, 7 +0xe7 0xe1 0x00 0x78 0x44 0xcc + # CHECK: wflcsb %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xcc @@ -2354,6 +2642,24 @@ # CHECK: wflcsb %f14, %v17 0xe7 0xe1 0x00 0x08 0x24 0xcc +# CHECK: wflcxb %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xcc + +# CHECK: wflcxb %v0, %v15 +0xe7 0x0f 0x00 0x08 0x40 0xcc + +# CHECK: wflcxb %v0, %v31 +0xe7 0x0f 0x00 0x08 0x44 0xcc + +# CHECK: wflcxb %v15, %v0 +0xe7 0xf0 0x00 0x08 0x40 0xcc + +# CHECK: wflcxb %v31, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xcc + +# CHECK: wflcxb %v14, %v17 +0xe7 0xe1 0x00 0x08 0x44 0xcc + # CHECK: wflnsb %f0, %f0 0xe7 0x00 0x00 0x18 0x20 0xcc @@ -2375,6 +2681,24 @@ # CHECK: wflnsb %f14, %v17 0xe7 0xe1 0x00 0x18 0x24 0xcc +# CHECK: wflnxb %v0, %v0 +0xe7 0x00 0x00 0x18 0x40 0xcc + +# CHECK: wflnxb %v0, %v15 +0xe7 0x0f 0x00 0x18 0x40 0xcc + +# CHECK: wflnxb %v0, %v31 +0xe7 0x0f 0x00 0x18 0x44 0xcc + +# CHECK: wflnxb %v15, %v0 +0xe7 0xf0 0x00 0x18 0x40 0xcc + +# CHECK: wflnxb %v31, %v0 +0xe7 0xf0 0x00 0x18 0x48 0xcc + +# CHECK: wflnxb %v14, %v17 +0xe7 0xe1 0x00 0x18 0x44 0xcc + # CHECK: wflpsb %f0, %f0 0xe7 0x00 0x00 0x28 0x20 0xcc @@ -2396,6 +2720,69 @@ # CHECK: wflpsb %f14, %v17 0xe7 0xe1 0x00 0x28 0x24 0xcc +# CHECK: wflpxb %v0, %v0 +0xe7 0x00 0x00 0x28 0x40 0xcc + +# CHECK: wflpxb %v0, %v15 +0xe7 0x0f 0x00 0x28 0x40 0xcc + +# CHECK: wflpxb %v0, %v31 +0xe7 0x0f 0x00 0x28 0x44 0xcc + +# CHECK: wflpxb %v15, %v0 +0xe7 0xf0 0x00 0x28 0x40 0xcc + +# CHECK: wflpxb %v31, %v0 +0xe7 0xf0 0x00 0x28 0x48 0xcc + +# CHECK: wflpxb %v14, %v17 +0xe7 0xe1 0x00 0x28 0x44 0xcc + +# CHECK: wflld %v0, %f0 +0xe7 0x00 0x00 0x08 0x30 0xc4 + +# CHECK: wflld %v0, %f0 +0xe7 0x00 0x00 0x08 0x30 0xc4 + +# CHECK: wflld %v0, %f15 +0xe7 0x0f 0x00 0x08 0x30 0xc4 + +# CHECK: wflld %v0, %v31 +0xe7 0x0f 0x00 0x08 0x34 0xc4 + +# CHECK: wflld %v15, %f0 +0xe7 0xf0 0x00 0x08 0x30 0xc4 + +# CHECK: wflld %v31, %f0 +0xe7 0xf0 0x00 0x08 0x38 0xc4 + +# CHECK: wflld %v14, %v17 +0xe7 0xe1 0x00 0x08 0x34 0xc4 + +# CHECK: wflrx %f0, %v0, 0, 0 +0xe7 0x00 0x00 0x08 0x40 0xc5 + +# CHECK: wflrx %f0, %v0, 0, 0 +0xe7 0x00 0x00 0x08 0x40 0xc5 + +# CHECK: wflrx %f0, %v0, 0, 15 +0xe7 0x00 0x00 0xf8 0x40 0xc5 + +# CHECK: wflrx %f0, %v0, 4, 0 +0xe7 0x00 0x00 0x0c 0x40 0xc5 + +# CHECK: wflrx %f0, %v0, 7, 0 +0xe7 0x00 0x00 0x0f 0x40 0xc5 + +# CHECK: wflrx %f0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x44 0xc5 + +# CHECK: wflrx %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x08 0x48 0xc5 + +# CHECK: wflrx %f14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x44 0xc5 + # CHECK: wfmaxdb %f0, %f0, %f0, 0 0xe7 0x00 0x00 0x08 0x30 0xef @@ -2438,6 +2825,24 @@ # CHECK: wfmaxsb %v18, %f3, %v20, 11 0xe7 0x23 0x40 0xb8 0x2a 0xef +# CHECK: wfmaxxb %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x08 0x40 0xef + +# CHECK: wfmaxxb %v0, %v0, %v0, 4 +0xe7 0x00 0x00 0x48 0x40 0xef + +# CHECK: wfmaxxb %v0, %v0, %v31, 0 +0xe7 0x00 0xf0 0x08 0x42 0xef + +# CHECK: wfmaxxb %v0, %v31, %v0, 0 +0xe7 0x0f 0x00 0x08 0x44 0xef + +# CHECK: wfmaxxb %v31, %v0, %v0, 0 +0xe7 0xf0 0x00 0x08 0x48 0xef + +# CHECK: wfmaxxb %v18, %v3, %v20, 11 +0xe7 0x23 0x40 0xb8 0x4a 0xef + # CHECK: wfmindb %f0, %f0, %f0, 0 0xe7 0x00 0x00 0x08 0x30 0xee @@ -2480,6 +2885,24 @@ # CHECK: wfminsb %v18, %f3, %v20, 11 0xe7 0x23 0x40 0xb8 0x2a 0xee +# CHECK: wfminxb %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x08 0x40 0xee + +# CHECK: wfminxb %v0, %v0, %v0, 4 +0xe7 0x00 0x00 0x48 0x40 0xee + +# CHECK: wfminxb %v0, %v0, %v31, 0 +0xe7 0x00 0xf0 0x08 0x42 0xee + +# CHECK: wfminxb %v0, %v31, %v0, 0 +0xe7 0x0f 0x00 0x08 0x44 0xee + +# CHECK: wfminxb %v31, %v0, %v0, 0 +0xe7 0xf0 0x00 0x08 0x48 0xee + +# CHECK: wfminxb %v18, %v3, %v20, 11 +0xe7 0x23 0x40 0xb8 0x4a 0xee + # CHECK: wfmasb %f0, %f0, %f0, %f0 0xe7 0x00 0x02 0x08 0x00 0x8f @@ -2501,6 +2924,24 @@ # CHECK: wfmasb %f13, %v17, %v21, %v25 0xe7 0xd1 0x52 0x08 0x97 0x8f +# CHECK: wfmaxb %v0, %v0, %v0, %v0 +0xe7 0x00 0x04 0x08 0x00 0x8f + +# CHECK: wfmaxb %v0, %v0, %v0, %v31 +0xe7 0x00 0x04 0x08 0xf1 0x8f + +# CHECK: wfmaxb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf4 0x08 0x02 0x8f + +# CHECK: wfmaxb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x04 0x08 0x04 0x8f + +# CHECK: wfmaxb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x04 0x08 0x08 0x8f + +# CHECK: wfmaxb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x54 0x08 0x97 0x8f + # CHECK: wfmsb %f0, %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xe7 @@ -2519,6 +2960,21 @@ # CHECK: wfmsb %v18, %f3, %v20 0xe7 0x23 0x40 0x08 0x2a 0xe7 +# CHECK: wfmxb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xe7 + +# CHECK: wfmxb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xe7 + +# CHECK: wfmxb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xe7 + +# CHECK: wfmxb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xe7 + +# CHECK: wfmxb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xe7 + # CHECK: wfmssb %f0, %f0, %f0, %f0 0xe7 0x00 0x02 0x08 0x00 0x8e @@ -2540,6 +2996,24 @@ # CHECK: wfmssb %f13, %v17, %v21, %v25 0xe7 0xd1 0x52 0x08 0x97 0x8e +# CHECK: wfmsxb %v0, %v0, %v0, %v0 +0xe7 0x00 0x04 0x08 0x00 0x8e + +# CHECK: wfmsxb %v0, %v0, %v0, %v31 +0xe7 0x00 0x04 0x08 0xf1 0x8e + +# CHECK: wfmsxb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf4 0x08 0x02 0x8e + +# CHECK: wfmsxb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x04 0x08 0x04 0x8e + +# CHECK: wfmsxb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x04 0x08 0x08 0x8e + +# CHECK: wfmsxb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x54 0x08 0x97 0x8e + # CHECK: wfnmadb %f0, %f0, %f0, %f0 0xe7 0x00 0x03 0x08 0x00 0x9f @@ -2582,6 +3056,24 @@ # CHECK: wfnmasb %f13, %v17, %v21, %v25 0xe7 0xd1 0x52 0x08 0x97 0x9f +# CHECK: wfnmaxb %v0, %v0, %v0, %v0 +0xe7 0x00 0x04 0x08 0x00 0x9f + +# CHECK: wfnmaxb %v0, %v0, %v0, %v31 +0xe7 0x00 0x04 0x08 0xf1 0x9f + +# CHECK: wfnmaxb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf4 0x08 0x02 0x9f + +# CHECK: wfnmaxb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x04 0x08 0x04 0x9f + +# CHECK: wfnmaxb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x04 0x08 0x08 0x9f + +# CHECK: wfnmaxb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x54 0x08 0x97 0x9f + # CHECK: wfnmsdb %f0, %f0, %f0, %f0 0xe7 0x00 0x03 0x08 0x00 0x9e @@ -2624,6 +3116,24 @@ # CHECK: wfnmssb %f13, %v17, %v21, %v25 0xe7 0xd1 0x52 0x08 0x97 0x9e +# CHECK: wfnmsxb %v0, %v0, %v0, %v0 +0xe7 0x00 0x04 0x08 0x00 0x9e + +# CHECK: wfnmsxb %v0, %v0, %v0, %v31 +0xe7 0x00 0x04 0x08 0xf1 0x9e + +# CHECK: wfnmsxb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf4 0x08 0x02 0x9e + +# CHECK: wfnmsxb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x04 0x08 0x04 0x9e + +# CHECK: wfnmsxb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x04 0x08 0x08 0x9e + +# CHECK: wfnmsxb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x54 0x08 0x97 0x9e + # CHECK: wfssb %f0, %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xe2 @@ -2642,6 +3152,21 @@ # CHECK: wfssb %v18, %f3, %v20 0xe7 0x23 0x40 0x08 0x2a 0xe2 +# CHECK: wfsxb %v0, %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xe2 + +# CHECK: wfsxb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x08 0x42 0xe2 + +# CHECK: wfsxb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x08 0x44 0xe2 + +# CHECK: wfsxb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xe2 + +# CHECK: wfsxb %v18, %v3, %v20 +0xe7 0x23 0x40 0x08 0x4a 0xe2 + # CHECK: wfsqsb %f0, %f0 0xe7 0x00 0x00 0x08 0x20 0xce @@ -2663,6 +3188,24 @@ # CHECK: wfsqsb %f14, %v17 0xe7 0xe1 0x00 0x08 0x24 0xce +# CHECK: wfsqxb %v0, %v0 +0xe7 0x00 0x00 0x08 0x40 0xce + +# CHECK: wfsqxb %v0, %v15 +0xe7 0x0f 0x00 0x08 0x40 0xce + +# CHECK: wfsqxb %v0, %v31 +0xe7 0x0f 0x00 0x08 0x44 0xce + +# CHECK: wfsqxb %v15, %v0 +0xe7 0xf0 0x00 0x08 0x40 0xce + +# CHECK: wfsqxb %v31, %v0 +0xe7 0xf0 0x00 0x08 0x48 0xce + +# CHECK: wfsqxb %v14, %v17 +0xe7 0xe1 0x00 0x08 0x44 0xce + # CHECK: wftcisb %f0, %f0, 0 0xe7 0x00 0x00 0x08 0x20 0x4a @@ -2687,3 +3230,24 @@ # CHECK: wftcisb %f4, %v21, 1656 0xe7 0x45 0x67 0x88 0x24 0x4a +# CHECK: wftcixb %v0, %v0, 0 +0xe7 0x00 0x00 0x08 0x40 0x4a + +# CHECK: wftcixb %v0, %v0, 4095 +0xe7 0x00 0xff 0xf8 0x40 0x4a + +# CHECK: wftcixb %v0, %v15, 0 +0xe7 0x0f 0x00 0x08 0x40 0x4a + +# CHECK: wftcixb %v0, %v31, 0 +0xe7 0x0f 0x00 0x08 0x44 0x4a + +# CHECK: wftcixb %v15, %v0, 0 +0xe7 0xf0 0x00 0x08 0x40 0x4a + +# CHECK: wftcixb %v31, %v0, 0 +0xe7 0xf0 0x00 0x08 0x48 0x4a + +# CHECK: wftcixb %v4, %v21, 1656 +0xe7 0x45 0x67 0x88 0x44 0x4a + diff --git a/llvm/test/MC/SystemZ/insn-bad-z13.s b/llvm/test/MC/SystemZ/insn-bad-z13.s index 456567af2e2..0ccdd11cbe9 100644 --- a/llvm/test/MC/SystemZ/insn-bad-z13.s +++ b/llvm/test/MC/SystemZ/insn-bad-z13.s @@ -2738,42 +2738,69 @@ #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfasb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfaxb %v0, %v0, %v0 wfasb %v0, %v0, %v0 + wfaxb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfcsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfcxb %v0, %v0 wfcsb %v0, %v0 + wfcxb %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfcesb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfcesbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfcexb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfcexbs %v0, %v0, %v0 wfcesb %v0, %v0, %v0 wfcesbs %v0, %v0, %v0 + wfcexb %v0, %v0, %v0 + wfcexbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfchsb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfchsbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchxb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchxbs %v0, %v0, %v0 wfchsb %v0, %v0, %v0 wfchsbs %v0, %v0, %v0 + wfchxb %v0, %v0, %v0 + wfchxbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfchesb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfchesbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchexb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchexbs %v0, %v0, %v0 wfchesb %v0, %v0, %v0 wfchesbs %v0, %v0, %v0 + wfchexb %v0, %v0, %v0 + wfchexbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfdsb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfdxb %v0, %v0, %v0 wfdsb %v0, %v0, %v0 + wfdxb %v0, %v0, %v0 #CHECK: error: invalid operand #CHECK: wfidb %v0, %v0, 0, -1 @@ -2791,13 +2818,19 @@ #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfisb %v0, %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfixb %v0, %v0, 0, 0 wfisb %v0, %v0, 0, 0 + wfixb %v0, %v0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfksb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkxb %v0, %v0 wfksb %v0, %v0 + wfkxb %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkedb %v0, %v0, %v0 @@ -2807,11 +2840,17 @@ #CHECK: wfkesb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkesbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkexb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkexbs %v0, %v0, %v0 wfkedb %v0, %v0, %v0 wfkedbs %v0, %v0, %v0 wfkesb %v0, %v0, %v0 wfkesbs %v0, %v0, %v0 + wfkexb %v0, %v0, %v0 + wfkexbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhdb %v0, %v0, %v0 @@ -2821,11 +2860,17 @@ #CHECK: wfkhsb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhsbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhxb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhxbs %v0, %v0, %v0 wfkhdb %v0, %v0, %v0 wfkhdbs %v0, %v0, %v0 wfkhsb %v0, %v0, %v0 wfkhsbs %v0, %v0, %v0 + wfkhxb %v0, %v0, %v0 + wfkhxbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhedb %v0, %v0, %v0 @@ -2835,92 +2880,143 @@ #CHECK: wfkhesb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhesbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhexb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhexbs %v0, %v0, %v0 wfkhedb %v0, %v0, %v0 wfkhedbs %v0, %v0, %v0 wfkhesb %v0, %v0, %v0 wfkhesbs %v0, %v0, %v0 + wfkhexb %v0, %v0, %v0 + wfkhexbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfpsosb %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfpsoxb %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wflcsb %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflcxb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wflnsb %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflnxb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wflpsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflpxb %v0, %v0 wfpsosb %v0, %v0, 0 + wfpsoxb %v0, %v0, 0 wflcsb %v0, %v0 + wflcxb %v0, %v0 wflnsb %v0, %v0 + wflnxb %v0, %v0 wflpsb %v0, %v0 + wflpxb %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wflls %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflld %v0, %v0 wflls %v0, %v0 + wflld %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wflrd %v0, %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflrx %v0, %v0, 0, 0 wflrd %v0, %v0, 0, 0 + wflrx %v0, %v0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmaxdb %v0, %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmaxsb %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmaxxb %v0, %v0, %v0, 0 wfmaxdb %v0, %v0, %v0, 0 wfmaxsb %v0, %v0, %v0, 0 + wfmaxxb %v0, %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmindb %v0, %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfminsb %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfminxb %v0, %v0, %v0, 0 wfmindb %v0, %v0, %v0, 0 wfminsb %v0, %v0, %v0, 0 + wfminxb %v0, %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmasb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmaxb %v0, %v0, %v0, %v0 wfmasb %v0, %v0, %v0, %v0 + wfmaxb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmsb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmxb %v0, %v0, %v0 wfmsb %v0, %v0, %v0 + wfmxb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmssb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmsxb %v0, %v0, %v0, %v0 wfmssb %v0, %v0, %v0, %v0 + wfmsxb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfnmadb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfnmasb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfnmaxb %v0, %v0, %v0, %v0 wfnmadb %v0, %v0, %v0, %v0 wfnmasb %v0, %v0, %v0, %v0 + wfnmaxb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfnmsdb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfnmssb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfnmsxb %v0, %v0, %v0, %v0 wfnmsdb %v0, %v0, %v0, %v0 wfnmssb %v0, %v0, %v0, %v0 + wfnmsxb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfssb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfsxb %v0, %v0, %v0 wfssb %v0, %v0, %v0 + wfsxb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfsqsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfsqxb %v0, %v0 wfsqsb %v0, %v0 + wfsqxb %v0, %v0 #CHECK: error: invalid operand #CHECK: wftcidb %v0, %v0, -1 @@ -2932,8 +3028,11 @@ #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wftcisb %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wftcixb %v0, %v0, 0 wftcisb %v0, %v0, 0 + wftcixb %v0, %v0, 0 #CHECK: error: invalid operand #CHECK: wledb %v0, %v0, 0, -1 diff --git a/llvm/test/MC/SystemZ/insn-bad-z14.s b/llvm/test/MC/SystemZ/insn-bad-z14.s index 0e96fd3a40b..8bc736a7a1a 100644 --- a/llvm/test/MC/SystemZ/insn-bad-z14.s +++ b/llvm/test/MC/SystemZ/insn-bad-z14.s @@ -645,6 +645,20 @@ wfisb %v0, %v0, 16, 0 #CHECK: error: invalid operand +#CHECK: wfixb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wfixb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wfixb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wfixb %v0, %v0, 16, 0 + + wfixb %v0, %v0, 0, -1 + wfixb %v0, %v0, 0, 16 + wfixb %v0, %v0, -1, 0 + wfixb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand #CHECK: wflrd %v0, %v0, 0, -1 #CHECK: error: invalid operand #CHECK: wflrd %v0, %v0, 0, 16 @@ -659,6 +673,20 @@ wflrd %v0, %v0, 16, 0 #CHECK: error: invalid operand +#CHECK: wflrx %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wflrx %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wflrx %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wflrx %v0, %v0, 16, 0 + + wflrx %v0, %v0, 0, -1 + wflrx %v0, %v0, 0, 16 + wflrx %v0, %v0, -1, 0 + wflrx %v0, %v0, 16, 0 + +#CHECK: error: invalid operand #CHECK: wfmaxdb %v0, %v0, %v0, -1 #CHECK: error: invalid operand #CHECK: wfmaxdb %v0, %v0, %v0, 16 @@ -675,6 +703,14 @@ wfmaxsb %v0, %v0, %v0, 16 #CHECK: error: invalid operand +#CHECK: wfmaxxb %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: wfmaxxb %v0, %v0, %v0, 16 + + wfmaxxb %v0, %v0, %v0, -1 + wfmaxxb %v0, %v0, %v0, 16 + +#CHECK: error: invalid operand #CHECK: wfmindb %v0, %v0, %v0, -1 #CHECK: error: invalid operand #CHECK: wfmindb %v0, %v0, %v0, 16 @@ -691,6 +727,14 @@ wfminsb %v0, %v0, %v0, 16 #CHECK: error: invalid operand +#CHECK: wfminxb %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: wfminxb %v0, %v0, %v0, 16 + + wfminxb %v0, %v0, %v0, -1 + wfminxb %v0, %v0, %v0, 16 + +#CHECK: error: invalid operand #CHECK: wftcisb %v0, %v0, -1 #CHECK: error: invalid operand #CHECK: wftcisb %v0, %v0, 4096 @@ -698,3 +742,11 @@ wftcisb %v0, %v0, -1 wftcisb %v0, %v0, 4096 +#CHECK: error: invalid operand +#CHECK: wftcixb %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: wftcixb %v0, %v0, 4096 + + wftcixb %v0, %v0, -1 + wftcixb %v0, %v0, 4096 + diff --git a/llvm/test/MC/SystemZ/insn-good-z14.s b/llvm/test/MC/SystemZ/insn-good-z14.s index 99fb495e40a..1fcdcb4ccab 100644 --- a/llvm/test/MC/SystemZ/insn-good-z14.s +++ b/llvm/test/MC/SystemZ/insn-good-z14.s @@ -1592,6 +1592,18 @@ wfasb %v31, %v0, %v0 wfasb %v18, %v3, %v20 +#CHECK: wfaxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xe3] +#CHECK: wfaxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xe3] +#CHECK: wfaxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xe3] +#CHECK: wfaxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xe3] +#CHECK: wfaxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xe3] + + wfaxb %v0, %v0, %v0 + wfaxb %v0, %v0, %v31 + wfaxb %v0, %v31, %v0 + wfaxb %v31, %v0, %v0 + wfaxb %v18, %v3, %v20 + #CHECK: wfcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xcb] #CHECK: wfcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xcb] #CHECK: wfcsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xcb] @@ -1608,6 +1620,20 @@ wfcsb %v31, %v0 wfcsb %v14, %v17 +#CHECK: wfcxb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xcb] +#CHECK: wfcxb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0xcb] +#CHECK: wfcxb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xcb] +#CHECK: wfcxb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0xcb] +#CHECK: wfcxb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xcb] +#CHECK: wfcxb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0xcb] + + wfcxb %v0, %v0 + wfcxb %v0, %v15 + wfcxb %v0, %v31 + wfcxb %v15, %v0 + wfcxb %v31, %v0 + wfcxb %v14, %v17 + #CHECK: wfcesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe8] #CHECK: wfcesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe8] #CHECK: wfcesb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe8] @@ -1636,6 +1662,30 @@ wfcesbs %v31, %v0, %v0 wfcesbs %v18, %v3, %v20 +#CHECK: wfcexb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xe8] +#CHECK: wfcexb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xe8] +#CHECK: wfcexb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xe8] +#CHECK: wfcexb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xe8] +#CHECK: wfcexb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xe8] + + wfcexb %v0, %v0, %v0 + wfcexb %v0, %v0, %v31 + wfcexb %v0, %v31, %v0 + wfcexb %v31, %v0, %v0 + wfcexb %v18, %v3, %v20 + +#CHECK: wfcexbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x18,0x40,0xe8] +#CHECK: wfcexbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x42,0xe8] +#CHECK: wfcexbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x18,0x44,0xe8] +#CHECK: wfcexbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x18,0x48,0xe8] +#CHECK: wfcexbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x4a,0xe8] + + wfcexbs %v0, %v0, %v0 + wfcexbs %v0, %v0, %v31 + wfcexbs %v0, %v31, %v0 + wfcexbs %v31, %v0, %v0 + wfcexbs %v18, %v3, %v20 + #CHECK: wfchsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xeb] #CHECK: wfchsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xeb] #CHECK: wfchsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xeb] @@ -1664,6 +1714,30 @@ wfchsbs %v31, %v0, %v0 wfchsbs %v18, %v3, %v20 +#CHECK: wfchxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xeb] +#CHECK: wfchxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xeb] +#CHECK: wfchxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xeb] +#CHECK: wfchxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xeb] +#CHECK: wfchxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xeb] + + wfchxb %v0, %v0, %v0 + wfchxb %v0, %v0, %v31 + wfchxb %v0, %v31, %v0 + wfchxb %v31, %v0, %v0 + wfchxb %v18, %v3, %v20 + +#CHECK: wfchxbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x18,0x40,0xeb] +#CHECK: wfchxbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x42,0xeb] +#CHECK: wfchxbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x18,0x44,0xeb] +#CHECK: wfchxbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x18,0x48,0xeb] +#CHECK: wfchxbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x4a,0xeb] + + wfchxbs %v0, %v0, %v0 + wfchxbs %v0, %v0, %v31 + wfchxbs %v0, %v31, %v0 + wfchxbs %v31, %v0, %v0 + wfchxbs %v18, %v3, %v20 + #CHECK: wfchesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xea] #CHECK: wfchesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xea] #CHECK: wfchesb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xea] @@ -1692,6 +1766,30 @@ wfchesbs %v31, %v0, %v0 wfchesbs %v18, %v3, %v20 +#CHECK: wfchexb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xea] +#CHECK: wfchexb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xea] +#CHECK: wfchexb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xea] +#CHECK: wfchexb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xea] +#CHECK: wfchexb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xea] + + wfchexb %v0, %v0, %v0 + wfchexb %v0, %v0, %v31 + wfchexb %v0, %v31, %v0 + wfchexb %v31, %v0, %v0 + wfchexb %v18, %v3, %v20 + +#CHECK: wfchexbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x18,0x40,0xea] +#CHECK: wfchexbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x42,0xea] +#CHECK: wfchexbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x18,0x44,0xea] +#CHECK: wfchexbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x18,0x48,0xea] +#CHECK: wfchexbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x4a,0xea] + + wfchexbs %v0, %v0, %v0 + wfchexbs %v0, %v0, %v31 + wfchexbs %v0, %v31, %v0 + wfchexbs %v31, %v0, %v0 + wfchexbs %v18, %v3, %v20 + #CHECK: wfdsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe5] #CHECK: wfdsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe5] #CHECK: wfdsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe5] @@ -1706,6 +1804,18 @@ wfdsb %v31, %v0, %v0 wfdsb %v18, %v3, %v20 +#CHECK: wfdxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xe5] +#CHECK: wfdxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xe5] +#CHECK: wfdxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xe5] +#CHECK: wfdxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xe5] +#CHECK: wfdxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xe5] + + wfdxb %v0, %v0, %v0 + wfdxb %v0, %v0, %v31 + wfdxb %v0, %v31, %v0 + wfdxb %v31, %v0, %v0 + wfdxb %v18, %v3, %v20 + #CHECK: wfisb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc7] #CHECK: wfisb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc7] #CHECK: wfisb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc7] @@ -1724,6 +1834,22 @@ wfisb %v31, %v0, 0, 0 wfisb %v14, %v17, 4, 10 +#CHECK: wfixb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xc7] +#CHECK: wfixb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x40,0xc7] +#CHECK: wfixb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x40,0xc7] +#CHECK: wfixb %v0, %v0, 7, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x40,0xc7] +#CHECK: wfixb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xc7] +#CHECK: wfixb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xc7] +#CHECK: wfixb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x44,0xc7] + + wfixb %v0, %v0, 0, 0 + wfixb %v0, %v0, 0, 15 + wfixb %v0, %v0, 4, 0 + wfixb %v0, %v0, 7, 0 + wfixb %v0, %v31, 0, 0 + wfixb %v31, %v0, 0, 0 + wfixb %v14, %v17, 4, 10 + #CHECK: wfksb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xca] #CHECK: wfksb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xca] #CHECK: wfksb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xca] @@ -1740,6 +1866,20 @@ wfksb %v31, %v0 wfksb %v14, %v17 +#CHECK: wfkxb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xca] +#CHECK: wfkxb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0xca] +#CHECK: wfkxb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xca] +#CHECK: wfkxb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0xca] +#CHECK: wfkxb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xca] +#CHECK: wfkxb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0xca] + + wfkxb %v0, %v0 + wfkxb %v0, %v15 + wfkxb %v0, %v31 + wfkxb %v15, %v0 + wfkxb %v31, %v0 + wfkxb %v14, %v17 + #CHECK: wfkedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xe8] #CHECK: wfkedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xe8] #CHECK: wfkedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x32,0xe8] @@ -1796,6 +1936,30 @@ wfkesbs %v31, %v0, %v0 wfkesbs %v18, %v3, %v20 +#CHECK: wfkexb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x0c,0x40,0xe8] +#CHECK: wfkexb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x42,0xe8] +#CHECK: wfkexb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x0c,0x44,0xe8] +#CHECK: wfkexb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x0c,0x48,0xe8] +#CHECK: wfkexb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x0c,0x4a,0xe8] + + wfkexb %v0, %v0, %v0 + wfkexb %v0, %v0, %v31 + wfkexb %v0, %v31, %v0 + wfkexb %v31, %v0, %v0 + wfkexb %v18, %v3, %v20 + +#CHECK: wfkexbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x1c,0x40,0xe8] +#CHECK: wfkexbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x1c,0x42,0xe8] +#CHECK: wfkexbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x1c,0x44,0xe8] +#CHECK: wfkexbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x1c,0x48,0xe8] +#CHECK: wfkexbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x1c,0x4a,0xe8] + + wfkexbs %v0, %v0, %v0 + wfkexbs %v0, %v0, %v31 + wfkexbs %v0, %v31, %v0 + wfkexbs %v31, %v0, %v0 + wfkexbs %v18, %v3, %v20 + #CHECK: wfkhdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xeb] #CHECK: wfkhdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xeb] #CHECK: wfkhdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x32,0xeb] @@ -1852,6 +2016,30 @@ wfkhsbs %v31, %v0, %v0 wfkhsbs %v18, %v3, %v20 +#CHECK: wfkhxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x0c,0x40,0xeb] +#CHECK: wfkhxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x42,0xeb] +#CHECK: wfkhxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x0c,0x44,0xeb] +#CHECK: wfkhxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x0c,0x48,0xeb] +#CHECK: wfkhxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x0c,0x4a,0xeb] + + wfkhxb %v0, %v0, %v0 + wfkhxb %v0, %v0, %v31 + wfkhxb %v0, %v31, %v0 + wfkhxb %v31, %v0, %v0 + wfkhxb %v18, %v3, %v20 + +#CHECK: wfkhxbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x1c,0x40,0xeb] +#CHECK: wfkhxbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x1c,0x42,0xeb] +#CHECK: wfkhxbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x1c,0x44,0xeb] +#CHECK: wfkhxbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x1c,0x48,0xeb] +#CHECK: wfkhxbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x1c,0x4a,0xeb] + + wfkhxbs %v0, %v0, %v0 + wfkhxbs %v0, %v0, %v31 + wfkhxbs %v0, %v31, %v0 + wfkhxbs %v31, %v0, %v0 + wfkhxbs %v18, %v3, %v20 + #CHECK: wfkhedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xea] #CHECK: wfkhedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xea] #CHECK: wfkhedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x32,0xea] @@ -1908,6 +2096,30 @@ wfkhesbs %v31, %v0, %v0 wfkhesbs %v18, %v3, %v20 +#CHECK: wfkhexb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x0c,0x40,0xea] +#CHECK: wfkhexb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x42,0xea] +#CHECK: wfkhexb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x0c,0x44,0xea] +#CHECK: wfkhexb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x0c,0x48,0xea] +#CHECK: wfkhexb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x0c,0x4a,0xea] + + wfkhexb %v0, %v0, %v0 + wfkhexb %v0, %v0, %v31 + wfkhexb %v0, %v31, %v0 + wfkhexb %v31, %v0, %v0 + wfkhexb %v18, %v3, %v20 + +#CHECK: wfkhexbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x1c,0x40,0xea] +#CHECK: wfkhexbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x1c,0x42,0xea] +#CHECK: wfkhexbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x1c,0x44,0xea] +#CHECK: wfkhexbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x1c,0x48,0xea] +#CHECK: wfkhexbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x1c,0x4a,0xea] + + wfkhexbs %v0, %v0, %v0 + wfkhexbs %v0, %v0, %v31 + wfkhexbs %v0, %v31, %v0 + wfkhexbs %v31, %v0, %v0 + wfkhexbs %v18, %v3, %v20 + #CHECK: wfpsosb %f0, %f0, 3 # encoding: [0xe7,0x00,0x00,0x38,0x20,0xcc] #CHECK: wfpsosb %f0, %f0, 3 # encoding: [0xe7,0x00,0x00,0x38,0x20,0xcc] #CHECK: wfpsosb %f0, %f0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xcc] @@ -1926,6 +2138,22 @@ wfpsosb %v31, %v0, 3 wfpsosb %v14, %v17, 7 +#CHECK: wfpsoxb %v0, %v0, 3 # encoding: [0xe7,0x00,0x00,0x38,0x40,0xcc] +#CHECK: wfpsoxb %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x40,0xcc] +#CHECK: wfpsoxb %v0, %v15, 3 # encoding: [0xe7,0x0f,0x00,0x38,0x40,0xcc] +#CHECK: wfpsoxb %v0, %v31, 3 # encoding: [0xe7,0x0f,0x00,0x38,0x44,0xcc] +#CHECK: wfpsoxb %v15, %v0, 3 # encoding: [0xe7,0xf0,0x00,0x38,0x40,0xcc] +#CHECK: wfpsoxb %v31, %v0, 3 # encoding: [0xe7,0xf0,0x00,0x38,0x48,0xcc] +#CHECK: wfpsoxb %v14, %v17, 7 # encoding: [0xe7,0xe1,0x00,0x78,0x44,0xcc] + + wfpsoxb %v0, %v0, 3 + wfpsoxb %v0, %v0, 15 + wfpsoxb %v0, %v15, 3 + wfpsoxb %v0, %v31, 3 + wfpsoxb %v15, %v0, 3 + wfpsoxb %v31, %v0, 3 + wfpsoxb %v14, %v17, 7 + #CHECK: wflcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xcc] #CHECK: wflcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xcc] #CHECK: wflcsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xcc] @@ -1942,6 +2170,20 @@ wflcsb %v31, %v0 wflcsb %v14, %v17 +#CHECK: wflcxb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xcc] +#CHECK: wflcxb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x08,0x40,0xcc] +#CHECK: wflcxb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xcc] +#CHECK: wflcxb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x40,0xcc] +#CHECK: wflcxb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xcc] +#CHECK: wflcxb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x44,0xcc] + + wflcxb %v0, %v0 + wflcxb %v0, %v15 + wflcxb %v0, %v31 + wflcxb %v15, %v0 + wflcxb %v31, %v0 + wflcxb %v14, %v17 + #CHECK: wflnsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xcc] #CHECK: wflnsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xcc] #CHECK: wflnsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x18,0x20,0xcc] @@ -1958,6 +2200,20 @@ wflnsb %v31, %v0 wflnsb %v14, %v17 +#CHECK: wflnxb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x18,0x40,0xcc] +#CHECK: wflnxb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x18,0x40,0xcc] +#CHECK: wflnxb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x18,0x44,0xcc] +#CHECK: wflnxb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x18,0x40,0xcc] +#CHECK: wflnxb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x18,0x48,0xcc] +#CHECK: wflnxb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x18,0x44,0xcc] + + wflnxb %v0, %v0 + wflnxb %v0, %v15 + wflnxb %v0, %v31 + wflnxb %v15, %v0 + wflnxb %v31, %v0 + wflnxb %v14, %v17 + #CHECK: wflpsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x20,0xcc] #CHECK: wflpsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x20,0xcc] #CHECK: wflpsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x28,0x20,0xcc] @@ -1974,6 +2230,20 @@ wflpsb %v31, %v0 wflpsb %v14, %v17 +#CHECK: wflpxb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x28,0x40,0xcc] +#CHECK: wflpxb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x28,0x40,0xcc] +#CHECK: wflpxb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x28,0x44,0xcc] +#CHECK: wflpxb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x28,0x40,0xcc] +#CHECK: wflpxb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x28,0x48,0xcc] +#CHECK: wflpxb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x28,0x44,0xcc] + + wflpxb %v0, %v0 + wflpxb %v0, %v15 + wflpxb %v0, %v31 + wflpxb %v15, %v0 + wflpxb %v31, %v0 + wflpxb %v14, %v17 + #CHECK: wflls %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4] #CHECK: wflls %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4] #CHECK: wflls %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4] @@ -1990,6 +2260,22 @@ wflls %v31, %v0 wflls %v14, %v17 +#CHECK: wflld %v0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc4] +#CHECK: wflld %v0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc4] +#CHECK: wflld %v0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xc4] +#CHECK: wflld %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc4] +#CHECK: wflld %v15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xc4] +#CHECK: wflld %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc4] +#CHECK: wflld %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xc4] + + wflld %v0, %v0 + wflld %v0, %f0 + wflld %v0, %v15 + wflld %v0, %v31 + wflld %v15, %v0 + wflld %v31, %v0 + wflld %v14, %v17 + #CHECK: wflrd %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5] #CHECK: wflrd %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5] #CHECK: wflrd %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5] @@ -2008,6 +2294,24 @@ wflrd %v31, %v0, 0, 0 wflrd %v14, %v17, 4, 10 +#CHECK: wflrx %f0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xc5] +#CHECK: wflrx %f0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xc5] +#CHECK: wflrx %f0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x40,0xc5] +#CHECK: wflrx %f0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x40,0xc5] +#CHECK: wflrx %f0, %v0, 7, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x40,0xc5] +#CHECK: wflrx %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xc5] +#CHECK: wflrx %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xc5] +#CHECK: wflrx %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x44,0xc5] + + wflrx %v0, %v0, 0, 0 + wflrx %f0, %v0, 0, 0 + wflrx %v0, %v0, 0, 15 + wflrx %v0, %v0, 4, 0 + wflrx %v0, %v0, 7, 0 + wflrx %v0, %v31, 0, 0 + wflrx %v31, %v0, 0, 0 + wflrx %v14, %v17, 4, 10 + #CHECK: wfmaxdb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xef] #CHECK: wfmaxdb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xef] #CHECK: wfmaxdb %f0, %f0, %f0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x30,0xef] @@ -2040,6 +2344,20 @@ wfmaxsb %v31, %v0, %v0, 0 wfmaxsb %v18, %v3, %v20, 11 +#CHECK: wfmaxxb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xef] +#CHECK: wfmaxxb %v0, %v0, %v0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x40,0xef] +#CHECK: wfmaxxb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xef] +#CHECK: wfmaxxb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xef] +#CHECK: wfmaxxb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xef] +#CHECK: wfmaxxb %v18, %v3, %v20, 11 # encoding: [0xe7,0x23,0x40,0xb8,0x4a,0xef] + + wfmaxxb %v0, %v0, %v0, 0 + wfmaxxb %v0, %v0, %v0, 4 + wfmaxxb %v0, %v0, %v31, 0 + wfmaxxb %v0, %v31, %v0, 0 + wfmaxxb %v31, %v0, %v0, 0 + wfmaxxb %v18, %v3, %v20, 11 + #CHECK: wfmindb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xee] #CHECK: wfmindb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xee] #CHECK: wfmindb %f0, %f0, %f0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x30,0xee] @@ -2072,6 +2390,20 @@ wfminsb %v31, %v0, %v0, 0 wfminsb %v18, %v3, %v20, 11 +#CHECK: wfminxb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xee] +#CHECK: wfminxb %v0, %v0, %v0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x40,0xee] +#CHECK: wfminxb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xee] +#CHECK: wfminxb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xee] +#CHECK: wfminxb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xee] +#CHECK: wfminxb %v18, %v3, %v20, 11 # encoding: [0xe7,0x23,0x40,0xb8,0x4a,0xee] + + wfminxb %v0, %v0, %v0, 0 + wfminxb %v0, %v0, %v0, 4 + wfminxb %v0, %v0, %v31, 0 + wfminxb %v0, %v31, %v0, 0 + wfminxb %v31, %v0, %v0, 0 + wfminxb %v18, %v3, %v20, 11 + #CHECK: wfmasb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8f] #CHECK: wfmasb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8f] #CHECK: wfmasb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x02,0x08,0xf1,0x8f] @@ -2088,6 +2420,20 @@ wfmasb %v31, %v0, %v0, %v0 wfmasb %v13, %v17, %v21, %v25 +#CHECK: wfmaxb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x08,0x00,0x8f] +#CHECK: wfmaxb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x08,0xf1,0x8f] +#CHECK: wfmaxb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x08,0x02,0x8f] +#CHECK: wfmaxb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x08,0x04,0x8f] +#CHECK: wfmaxb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x08,0x08,0x8f] +#CHECK: wfmaxb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x08,0x97,0x8f] + + wfmaxb %v0, %v0, %v0, %v0 + wfmaxb %v0, %v0, %v0, %v31 + wfmaxb %v0, %v0, %v31, %v0 + wfmaxb %v0, %v31, %v0, %v0 + wfmaxb %v31, %v0, %v0, %v0 + wfmaxb %v13, %v17, %v21, %v25 + #CHECK: wfmsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe7] #CHECK: wfmsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe7] #CHECK: wfmsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe7] @@ -2102,6 +2448,18 @@ wfmsb %v31, %v0, %v0 wfmsb %v18, %v3, %v20 +#CHECK: wfmxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xe7] +#CHECK: wfmxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xe7] +#CHECK: wfmxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xe7] +#CHECK: wfmxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xe7] +#CHECK: wfmxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xe7] + + wfmxb %v0, %v0, %v0 + wfmxb %v0, %v0, %v31 + wfmxb %v0, %v31, %v0 + wfmxb %v31, %v0, %v0 + wfmxb %v18, %v3, %v20 + #CHECK: wfmssb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8e] #CHECK: wfmssb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8e] #CHECK: wfmssb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x02,0x08,0xf1,0x8e] @@ -2118,6 +2476,20 @@ wfmssb %v31, %v0, %v0, %v0 wfmssb %v13, %v17, %v21, %v25 +#CHECK: wfmsxb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x08,0x00,0x8e] +#CHECK: wfmsxb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x08,0xf1,0x8e] +#CHECK: wfmsxb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x08,0x02,0x8e] +#CHECK: wfmsxb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x08,0x04,0x8e] +#CHECK: wfmsxb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x08,0x08,0x8e] +#CHECK: wfmsxb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x08,0x97,0x8e] + + wfmsxb %v0, %v0, %v0, %v0 + wfmsxb %v0, %v0, %v0, %v31 + wfmsxb %v0, %v0, %v31, %v0 + wfmsxb %v0, %v31, %v0, %v0 + wfmsxb %v31, %v0, %v0, %v0 + wfmsxb %v13, %v17, %v21, %v25 + #CHECK: wfnmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9f] #CHECK: wfnmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9f] #CHECK: wfnmadb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x9f] @@ -2150,6 +2522,20 @@ wfnmasb %v31, %v0, %v0, %v0 wfnmasb %v13, %v17, %v21, %v25 +#CHECK: wfnmaxb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x08,0x00,0x9f] +#CHECK: wfnmaxb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x08,0xf1,0x9f] +#CHECK: wfnmaxb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x08,0x02,0x9f] +#CHECK: wfnmaxb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x08,0x04,0x9f] +#CHECK: wfnmaxb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x08,0x08,0x9f] +#CHECK: wfnmaxb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x08,0x97,0x9f] + + wfnmaxb %v0, %v0, %v0, %v0 + wfnmaxb %v0, %v0, %v0, %v31 + wfnmaxb %v0, %v0, %v31, %v0 + wfnmaxb %v0, %v31, %v0, %v0 + wfnmaxb %v31, %v0, %v0, %v0 + wfnmaxb %v13, %v17, %v21, %v25 + #CHECK: wfnmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9e] #CHECK: wfnmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9e] #CHECK: wfnmsdb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x9e] @@ -2182,6 +2568,20 @@ wfnmssb %v31, %v0, %v0, %v0 wfnmssb %v13, %v17, %v21, %v25 +#CHECK: wfnmsxb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x08,0x00,0x9e] +#CHECK: wfnmsxb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x08,0xf1,0x9e] +#CHECK: wfnmsxb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x08,0x02,0x9e] +#CHECK: wfnmsxb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x08,0x04,0x9e] +#CHECK: wfnmsxb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x08,0x08,0x9e] +#CHECK: wfnmsxb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x08,0x97,0x9e] + + wfnmsxb %v0, %v0, %v0, %v0 + wfnmsxb %v0, %v0, %v0, %v31 + wfnmsxb %v0, %v0, %v31, %v0 + wfnmsxb %v0, %v31, %v0, %v0 + wfnmsxb %v31, %v0, %v0, %v0 + wfnmsxb %v13, %v17, %v21, %v25 + #CHECK: wfssb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe2] #CHECK: wfssb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe2] #CHECK: wfssb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe2] @@ -2196,6 +2596,18 @@ wfssb %v31, %v0, %v0 wfssb %v18, %v3, %v20 +#CHECK: wfsxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xe2] +#CHECK: wfsxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x42,0xe2] +#CHECK: wfsxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xe2] +#CHECK: wfsxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xe2] +#CHECK: wfsxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x4a,0xe2] + + wfsxb %v0, %v0, %v0 + wfsxb %v0, %v0, %v31 + wfsxb %v0, %v31, %v0 + wfsxb %v31, %v0, %v0 + wfsxb %v18, %v3, %v20 + #CHECK: wfsqsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xce] #CHECK: wfsqsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xce] #CHECK: wfsqsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xce] @@ -2212,6 +2624,20 @@ wfsqsb %v31, %v0 wfsqsb %v14, %v17 +#CHECK: wfsqxb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0xce] +#CHECK: wfsqxb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x08,0x40,0xce] +#CHECK: wfsqxb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0xce] +#CHECK: wfsqxb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x40,0xce] +#CHECK: wfsqxb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0xce] +#CHECK: wfsqxb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x44,0xce] + + wfsqxb %v0, %v0 + wfsqxb %v0, %v15 + wfsqxb %v0, %v31 + wfsqxb %v15, %v0 + wfsqxb %v31, %v0 + wfsqxb %v14, %v17 + #CHECK: wftcisb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0x4a] #CHECK: wftcisb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0x4a] #CHECK: wftcisb %f0, %f0, 4095 # encoding: [0xe7,0x00,0xff,0xf8,0x20,0x4a] @@ -2230,3 +2656,19 @@ wftcisb %v31, %v0, 0 wftcisb %v4, %v21, 0x678 +#CHECK: wftcixb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x40,0x4a] +#CHECK: wftcixb %v0, %v0, 4095 # encoding: [0xe7,0x00,0xff,0xf8,0x40,0x4a] +#CHECK: wftcixb %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x40,0x4a] +#CHECK: wftcixb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x44,0x4a] +#CHECK: wftcixb %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x40,0x4a] +#CHECK: wftcixb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x48,0x4a] +#CHECK: wftcixb %v4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x88,0x44,0x4a] + + wftcixb %v0, %v0, 0 + wftcixb %v0, %v0, 4095 + wftcixb %v0, %v15, 0 + wftcixb %v0, %v31, 0 + wftcixb %v15, %v0, 0 + wftcixb %v31, %v0, 0 + wftcixb %v4, %v21, 0x678 + |