diff options
Diffstat (limited to 'lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp')
| -rw-r--r-- | lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp | 567 |
1 files changed, 564 insertions, 3 deletions
diff --git a/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp b/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp index 86843fd97c0..4cdb3e265bc 100644 --- a/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp @@ -119,6 +119,17 @@ extern "C" bool CPUHasAVX() { return LogAVXAndReturn(g_has_avx, err, buffer); } +extern "C" bool CPUHasAVX512f() { + static AVXPresence g_has_avx512f = eAVXUnknown; + if (g_has_avx512f != eAVXUnknown) + return g_has_avx512f == eAVXPresent; + + g_has_avx512f = DetectHardwareFeature("hw.optional.avx512f") ? eAVXPresent + : eAVXNotPresent; + + return (g_has_avx512f == eAVXPresent); +} + uint64_t DNBArchImplX86_64::GetPC(uint64_t failValue) { // Get program counter if (GetGPRState(false) == KERN_SUCCESS) @@ -338,10 +349,82 @@ kern_return_t DNBArchImplX86_64::GetFPUState(bool force) { for (int i = 0; i < sizeof(m_state.context.fpu.avx.__avx_reserved1); ++i) m_state.context.fpu.avx.__avx_reserved1[i] = INT8_MIN; } + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + for (int i = 0; i < 8; ++i) { + m_state.context.fpu.avx512f.__fpu_k0.__opmask_reg[i] = '0'; + m_state.context.fpu.avx512f.__fpu_k1.__opmask_reg[i] = '1'; + m_state.context.fpu.avx512f.__fpu_k2.__opmask_reg[i] = '2'; + m_state.context.fpu.avx512f.__fpu_k3.__opmask_reg[i] = '3'; + m_state.context.fpu.avx512f.__fpu_k4.__opmask_reg[i] = '4'; + m_state.context.fpu.avx512f.__fpu_k5.__opmask_reg[i] = '5'; + m_state.context.fpu.avx512f.__fpu_k6.__opmask_reg[i] = '6'; + m_state.context.fpu.avx512f.__fpu_k7.__opmask_reg[i] = '7'; + } + + for (int i = 0; i < 32; ++i) { + m_state.context.fpu.avx512f.__fpu_zmmh0.__ymm_reg[i] = '0'; + m_state.context.fpu.avx512f.__fpu_zmmh1.__ymm_reg[i] = '1'; + m_state.context.fpu.avx512f.__fpu_zmmh2.__ymm_reg[i] = '2'; + m_state.context.fpu.avx512f.__fpu_zmmh3.__ymm_reg[i] = '3'; + m_state.context.fpu.avx512f.__fpu_zmmh4.__ymm_reg[i] = '4'; + m_state.context.fpu.avx512f.__fpu_zmmh5.__ymm_reg[i] = '5'; + m_state.context.fpu.avx512f.__fpu_zmmh6.__ymm_reg[i] = '6'; + m_state.context.fpu.avx512f.__fpu_zmmh7.__ymm_reg[i] = '7'; + m_state.context.fpu.avx512f.__fpu_zmmh8.__ymm_reg[i] = '8'; + m_state.context.fpu.avx512f.__fpu_zmmh9.__ymm_reg[i] = '9'; + m_state.context.fpu.avx512f.__fpu_zmmh10.__ymm_reg[i] = 'A'; + m_state.context.fpu.avx512f.__fpu_zmmh11.__ymm_reg[i] = 'B'; + m_state.context.fpu.avx512f.__fpu_zmmh12.__ymm_reg[i] = 'C'; + m_state.context.fpu.avx512f.__fpu_zmmh13.__ymm_reg[i] = 'D'; + m_state.context.fpu.avx512f.__fpu_zmmh14.__ymm_reg[i] = 'E'; + m_state.context.fpu.avx512f.__fpu_zmmh15.__ymm_reg[i] = 'F'; + } + for (int i = 0; i < 64; ++i) { + m_state.context.fpu.avx512f.__fpu_zmm16.__zmm_reg[i] = 'G'; + m_state.context.fpu.avx512f.__fpu_zmm17.__zmm_reg[i] = 'H'; + m_state.context.fpu.avx512f.__fpu_zmm18.__zmm_reg[i] = 'I'; + m_state.context.fpu.avx512f.__fpu_zmm19.__zmm_reg[i] = 'J'; + m_state.context.fpu.avx512f.__fpu_zmm20.__zmm_reg[i] = 'K'; + m_state.context.fpu.avx512f.__fpu_zmm21.__zmm_reg[i] = 'L'; + m_state.context.fpu.avx512f.__fpu_zmm22.__zmm_reg[i] = 'M'; + m_state.context.fpu.avx512f.__fpu_zmm23.__zmm_reg[i] = 'N'; + m_state.context.fpu.avx512f.__fpu_zmm24.__zmm_reg[i] = 'O'; + m_state.context.fpu.avx512f.__fpu_zmm25.__zmm_reg[i] = 'P'; + m_state.context.fpu.avx512f.__fpu_zmm26.__zmm_reg[i] = 'Q'; + m_state.context.fpu.avx512f.__fpu_zmm27.__zmm_reg[i] = 'R'; + m_state.context.fpu.avx512f.__fpu_zmm28.__zmm_reg[i] = 'S'; + m_state.context.fpu.avx512f.__fpu_zmm29.__zmm_reg[i] = 'T'; + m_state.context.fpu.avx512f.__fpu_zmm30.__zmm_reg[i] = 'U'; + m_state.context.fpu.avx512f.__fpu_zmm31.__zmm_reg[i] = 'V'; + } + } m_state.SetError(e_regSetFPU, Read, 0); } else { mach_msg_type_number_t count = e_regSetWordSizeFPU; int flavor = __x86_64_FLOAT_STATE; + // On a machine with the AVX512 register set, a process only gets a + // full AVX512 register context after it uses the AVX512 registers; + // if the process has not yet triggered this change, trying to fetch + // the AVX512 registers will fail. Fall through to fetching the AVX + // registers. + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + count = e_regSetWordSizeAVX512f; + flavor = __x86_64_AVX512F_STATE; + m_state.SetError(e_regSetFPU, Read, + ::thread_get_state(m_thread->MachPortNumber(), flavor, + (thread_state_t)&m_state.context.fpu, + &count)); + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state (0x%4.4x, %u, &fpu, %u => 0x%8.8x", + m_thread->MachPortNumber(), flavor, (uint32_t)count, + m_state.GetError(e_regSetFPU, Read)); + + if (m_state.GetError(e_regSetFPU, Read) == KERN_SUCCESS) + return m_state.GetError(e_regSetFPU, Read); + else + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state attempted fetch of avx512 fpu regctx failed, will try fetching avx"); + } if (CPUHasAVX() || FORCE_AVX_REGS) { count = e_regSetWordSizeAVX; flavor = __x86_64_AVX_STATE; @@ -413,6 +496,20 @@ kern_return_t DNBArchImplX86_64::SetFPUState() { } else { int flavor = __x86_64_FLOAT_STATE; mach_msg_type_number_t count = e_regSetWordSizeFPU; + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + count = e_regSetWordSizeAVX512f; + flavor = __x86_64_AVX512F_STATE; + m_state.SetError( + e_regSetFPU, Write, + ::thread_set_state(m_thread->MachPortNumber(), flavor, + (thread_state_t)&m_state.context.fpu, count)); + if (m_state.GetError(e_regSetFPU, Write) == KERN_SUCCESS) + return m_state.GetError(e_regSetFPU, Write); + else + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state attempted save of avx512 fpu regctx failed, will try saving avx regctx"); + } + if (CPUHasAVX() || FORCE_AVX_REGS) { flavor = __x86_64_AVX_STATE; count = e_regSetWordSizeAVX; @@ -1019,6 +1116,46 @@ enum { fpu_ymm13, fpu_ymm14, fpu_ymm15, + fpu_k0, + fpu_k1, + fpu_k2, + fpu_k3, + fpu_k4, + fpu_k5, + fpu_k6, + fpu_k7, + fpu_zmm0, + fpu_zmm1, + fpu_zmm2, + fpu_zmm3, + fpu_zmm4, + fpu_zmm5, + fpu_zmm6, + fpu_zmm7, + fpu_zmm8, + fpu_zmm9, + fpu_zmm10, + fpu_zmm11, + fpu_zmm12, + fpu_zmm13, + fpu_zmm14, + fpu_zmm15, + fpu_zmm16, + fpu_zmm17, + fpu_zmm18, + fpu_zmm19, + fpu_zmm20, + fpu_zmm21, + fpu_zmm22, + fpu_zmm23, + fpu_zmm24, + fpu_zmm25, + fpu_zmm26, + fpu_zmm27, + fpu_zmm28, + fpu_zmm29, + fpu_zmm30, + fpu_zmm31, k_num_fpu_regs, // Aliases @@ -1095,7 +1232,47 @@ enum ehframe_dwarf_regnums { ehframe_dwarf_ymm12 = ehframe_dwarf_xmm12, ehframe_dwarf_ymm13 = ehframe_dwarf_xmm13, ehframe_dwarf_ymm14 = ehframe_dwarf_xmm14, - ehframe_dwarf_ymm15 = ehframe_dwarf_xmm15 + ehframe_dwarf_ymm15 = ehframe_dwarf_xmm15, + ehframe_dwarf_zmm0 = ehframe_dwarf_xmm0, + ehframe_dwarf_zmm1 = ehframe_dwarf_xmm1, + ehframe_dwarf_zmm2 = ehframe_dwarf_xmm2, + ehframe_dwarf_zmm3 = ehframe_dwarf_xmm3, + ehframe_dwarf_zmm4 = ehframe_dwarf_xmm4, + ehframe_dwarf_zmm5 = ehframe_dwarf_xmm5, + ehframe_dwarf_zmm6 = ehframe_dwarf_xmm6, + ehframe_dwarf_zmm7 = ehframe_dwarf_xmm7, + ehframe_dwarf_zmm8 = ehframe_dwarf_xmm8, + ehframe_dwarf_zmm9 = ehframe_dwarf_xmm9, + ehframe_dwarf_zmm10 = ehframe_dwarf_xmm10, + ehframe_dwarf_zmm11 = ehframe_dwarf_xmm11, + ehframe_dwarf_zmm12 = ehframe_dwarf_xmm12, + ehframe_dwarf_zmm13 = ehframe_dwarf_xmm13, + ehframe_dwarf_zmm14 = ehframe_dwarf_xmm14, + ehframe_dwarf_zmm15 = ehframe_dwarf_xmm15, + ehframe_dwarf_zmm16 = 67, + ehframe_dwarf_zmm17, + ehframe_dwarf_zmm18, + ehframe_dwarf_zmm19, + ehframe_dwarf_zmm20, + ehframe_dwarf_zmm21, + ehframe_dwarf_zmm22, + ehframe_dwarf_zmm23, + ehframe_dwarf_zmm24, + ehframe_dwarf_zmm25, + ehframe_dwarf_zmm26, + ehframe_dwarf_zmm27, + ehframe_dwarf_zmm28, + ehframe_dwarf_zmm29, + ehframe_dwarf_zmm30, + ehframe_dwarf_zmm31, + ehframe_dwarf_k0 = 118, + ehframe_dwarf_k1, + ehframe_dwarf_k2, + ehframe_dwarf_k3, + ehframe_dwarf_k4, + ehframe_dwarf_k5, + ehframe_dwarf_k6, + ehframe_dwarf_k7, }; enum debugserver_regnums { @@ -1178,7 +1355,47 @@ enum debugserver_regnums { debugserver_ymm12 = debugserver_xmm12, debugserver_ymm13 = debugserver_xmm13, debugserver_ymm14 = debugserver_xmm14, - debugserver_ymm15 = debugserver_xmm15 + debugserver_ymm15 = debugserver_xmm15, + debugserver_zmm0 = debugserver_xmm0, + debugserver_zmm1 = debugserver_xmm1, + debugserver_zmm2 = debugserver_xmm2, + debugserver_zmm3 = debugserver_xmm3, + debugserver_zmm4 = debugserver_xmm4, + debugserver_zmm5 = debugserver_xmm5, + debugserver_zmm6 = debugserver_xmm6, + debugserver_zmm7 = debugserver_xmm7, + debugserver_zmm8 = debugserver_xmm8, + debugserver_zmm9 = debugserver_xmm9, + debugserver_zmm10 = debugserver_xmm10, + debugserver_zmm11 = debugserver_xmm11, + debugserver_zmm12 = debugserver_xmm12, + debugserver_zmm13 = debugserver_xmm13, + debugserver_zmm14 = debugserver_xmm14, + debugserver_zmm15 = debugserver_xmm15, + debugserver_zmm16 = 67, + debugserver_zmm17 = 68, + debugserver_zmm18 = 69, + debugserver_zmm19 = 70, + debugserver_zmm20 = 71, + debugserver_zmm21 = 72, + debugserver_zmm22 = 73, + debugserver_zmm23 = 74, + debugserver_zmm24 = 75, + debugserver_zmm25 = 76, + debugserver_zmm26 = 77, + debugserver_zmm27 = 78, + debugserver_zmm28 = 79, + debugserver_zmm29 = 80, + debugserver_zmm30 = 81, + debugserver_zmm31 = 82, + debugserver_k0 = 118, + debugserver_k1 = 119, + debugserver_k2 = 120, + debugserver_k3 = 121, + debugserver_k4 = 122, + debugserver_k5 = 123, + debugserver_k6 = 124, + debugserver_k7 = 125, }; #define GPR_OFFSET(reg) (offsetof(DNBArchImplX86_64::GPR, __##reg)) @@ -1188,10 +1405,14 @@ enum debugserver_regnums { #define AVX_OFFSET(reg) \ (offsetof(DNBArchImplX86_64::AVX, __fpu_##reg) + \ offsetof(DNBArchImplX86_64::Context, fpu.avx)) +#define AVX512F_OFFSET(reg) \ + (offsetof(DNBArchImplX86_64::AVX512F, __fpu_##reg) + \ + offsetof(DNBArchImplX86_64::Context, fpu.avx512f)) #define EXC_OFFSET(reg) \ (offsetof(DNBArchImplX86_64::EXC, __##reg) + \ offsetof(DNBArchImplX86_64::Context, exc)) #define AVX_OFFSET_YMM(n) (AVX_OFFSET(ymmh0) + (32 * n)) +#define AVX512F_OFFSET_ZMM(n) (AVX512F_OFFSET(zmmh0) + (64 * n)) #define GPR_SIZE(reg) (sizeof(((DNBArchImplX86_64::GPR *)NULL)->__##reg)) #define FPU_SIZE_UINT(reg) \ @@ -1201,6 +1422,7 @@ enum debugserver_regnums { #define FPU_SIZE_XMM(reg) \ (sizeof(((DNBArchImplX86_64::FPU *)NULL)->__fpu_##reg.__xmm_reg)) #define FPU_SIZE_YMM(reg) (32) +#define FPU_SIZE_ZMM(reg) (64) #define EXC_SIZE(reg) (sizeof(((DNBArchImplX86_64::EXC *)NULL)->__##reg)) // These macros will auto define the register name, alt name, register size, @@ -1638,6 +1860,183 @@ const DNBRegisterInfo DNBArchImplX86_64::g_fpu_registers_avx[] = { }; +static const char *g_contained_zmm0[] = {"zmm0", NULL}; +static const char *g_contained_zmm1[] = {"zmm1", NULL}; +static const char *g_contained_zmm2[] = {"zmm2", NULL}; +static const char *g_contained_zmm3[] = {"zmm3", NULL}; +static const char *g_contained_zmm4[] = {"zmm4", NULL}; +static const char *g_contained_zmm5[] = {"zmm5", NULL}; +static const char *g_contained_zmm6[] = {"zmm6", NULL}; +static const char *g_contained_zmm7[] = {"zmm7", NULL}; +static const char *g_contained_zmm8[] = {"zmm8", NULL}; +static const char *g_contained_zmm9[] = {"zmm9", NULL}; +static const char *g_contained_zmm10[] = {"zmm10", NULL}; +static const char *g_contained_zmm11[] = {"zmm11", NULL}; +static const char *g_contained_zmm12[] = {"zmm12", NULL}; +static const char *g_contained_zmm13[] = {"zmm13", NULL}; +static const char *g_contained_zmm14[] = {"zmm14", NULL}; +static const char *g_contained_zmm15[] = {"zmm15", NULL}; + +#define STR(s) #s + +#define ZMM_REG_DEF(reg) \ + { \ + e_regSetFPU, fpu_zmm##reg, STR(zmm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_ZMM(zmm##reg), AVX512F_OFFSET_ZMM(reg), \ + ehframe_dwarf_zmm##reg, ehframe_dwarf_zmm##reg, -1U, \ + debugserver_zmm##reg, NULL, NULL \ + } + +#define YMM_REG_ALIAS(reg) \ + { \ + e_regSetFPU, fpu_ymm##reg, STR(ymm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_YMM(ymm##reg), 0, ehframe_dwarf_ymm##reg, \ + ehframe_dwarf_ymm##reg, -1U, debugserver_ymm##reg, \ + g_contained_zmm##reg, NULL \ + } + +#define XMM_REG_ALIAS(reg) \ + { \ + e_regSetFPU, fpu_xmm##reg, STR(xmm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_XMM(xmm##reg), 0, ehframe_dwarf_xmm##reg, \ + ehframe_dwarf_xmm##reg, -1U, debugserver_xmm##reg, \ + g_contained_zmm##reg, NULL \ + } + +#define AVX512_K_REG_DEF(reg) \ + { \ + e_regSetFPU, fpu_k##reg, STR(k##reg), NULL, Vector, VectorOfUInt8, 8, \ + AVX512F_OFFSET(k##reg), ehframe_dwarf_k##reg, ehframe_dwarf_k##reg, \ + -1U, debugserver_k##reg, NULL, NULL \ + } + +const DNBRegisterInfo DNBArchImplX86_64::g_fpu_registers_avx512f[] = { + {e_regSetFPU, fpu_fcw, "fctrl", NULL, Uint, Hex, FPU_SIZE_UINT(fcw), + AVX_OFFSET(fcw), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_fsw, "fstat", NULL, Uint, Hex, FPU_SIZE_UINT(fsw), + AVX_OFFSET(fsw), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_ftw, "ftag", NULL, Uint, Hex, 2 /* sizeof __fpu_ftw + sizeof __fpu_rsrv1 */, + AVX_OFFSET(ftw), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_fop, "fop", NULL, Uint, Hex, FPU_SIZE_UINT(fop), + AVX_OFFSET(fop), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_ip, "fioff", NULL, Uint, Hex, FPU_SIZE_UINT(ip), + AVX_OFFSET(ip), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_cs, "fiseg", NULL, Uint, Hex, FPU_SIZE_UINT(cs), + AVX_OFFSET(cs), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_dp, "fooff", NULL, Uint, Hex, FPU_SIZE_UINT(dp), + AVX_OFFSET(dp), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_ds, "foseg", NULL, Uint, Hex, FPU_SIZE_UINT(ds), + AVX_OFFSET(ds), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_mxcsr, "mxcsr", NULL, Uint, Hex, FPU_SIZE_UINT(mxcsr), + AVX_OFFSET(mxcsr), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_mxcsrmask, "mxcsrmask", NULL, Uint, Hex, + FPU_SIZE_UINT(mxcsrmask), AVX_OFFSET(mxcsrmask), -1U, -1U, -1U, -1U, NULL, + NULL}, + + {e_regSetFPU, fpu_stmm0, "stmm0", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm0), AVX_OFFSET(stmm0), ehframe_dwarf_stmm0, + ehframe_dwarf_stmm0, -1U, debugserver_stmm0, NULL, NULL}, + {e_regSetFPU, fpu_stmm1, "stmm1", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm1), AVX_OFFSET(stmm1), ehframe_dwarf_stmm1, + ehframe_dwarf_stmm1, -1U, debugserver_stmm1, NULL, NULL}, + {e_regSetFPU, fpu_stmm2, "stmm2", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm2), AVX_OFFSET(stmm2), ehframe_dwarf_stmm2, + ehframe_dwarf_stmm2, -1U, debugserver_stmm2, NULL, NULL}, + {e_regSetFPU, fpu_stmm3, "stmm3", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm3), AVX_OFFSET(stmm3), ehframe_dwarf_stmm3, + ehframe_dwarf_stmm3, -1U, debugserver_stmm3, NULL, NULL}, + {e_regSetFPU, fpu_stmm4, "stmm4", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm4), AVX_OFFSET(stmm4), ehframe_dwarf_stmm4, + ehframe_dwarf_stmm4, -1U, debugserver_stmm4, NULL, NULL}, + {e_regSetFPU, fpu_stmm5, "stmm5", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm5), AVX_OFFSET(stmm5), ehframe_dwarf_stmm5, + ehframe_dwarf_stmm5, -1U, debugserver_stmm5, NULL, NULL}, + {e_regSetFPU, fpu_stmm6, "stmm6", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm6), AVX_OFFSET(stmm6), ehframe_dwarf_stmm6, + ehframe_dwarf_stmm6, -1U, debugserver_stmm6, NULL, NULL}, + {e_regSetFPU, fpu_stmm7, "stmm7", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm7), AVX_OFFSET(stmm7), ehframe_dwarf_stmm7, + ehframe_dwarf_stmm7, -1U, debugserver_stmm7, NULL, NULL}, + + AVX512_K_REG_DEF(0), + AVX512_K_REG_DEF(1), + AVX512_K_REG_DEF(2), + AVX512_K_REG_DEF(3), + AVX512_K_REG_DEF(4), + AVX512_K_REG_DEF(5), + AVX512_K_REG_DEF(6), + AVX512_K_REG_DEF(7), + + ZMM_REG_DEF(0), + ZMM_REG_DEF(1), + ZMM_REG_DEF(2), + ZMM_REG_DEF(3), + ZMM_REG_DEF(4), + ZMM_REG_DEF(5), + ZMM_REG_DEF(6), + ZMM_REG_DEF(7), + ZMM_REG_DEF(8), + ZMM_REG_DEF(9), + ZMM_REG_DEF(10), + ZMM_REG_DEF(11), + ZMM_REG_DEF(12), + ZMM_REG_DEF(13), + ZMM_REG_DEF(14), + ZMM_REG_DEF(15), + ZMM_REG_DEF(16), + ZMM_REG_DEF(17), + ZMM_REG_DEF(18), + ZMM_REG_DEF(19), + ZMM_REG_DEF(20), + ZMM_REG_DEF(21), + ZMM_REG_DEF(22), + ZMM_REG_DEF(23), + ZMM_REG_DEF(24), + ZMM_REG_DEF(25), + ZMM_REG_DEF(26), + ZMM_REG_DEF(27), + ZMM_REG_DEF(28), + ZMM_REG_DEF(29), + ZMM_REG_DEF(30), + ZMM_REG_DEF(31), + + YMM_REG_ALIAS(0), + YMM_REG_ALIAS(1), + YMM_REG_ALIAS(2), + YMM_REG_ALIAS(3), + YMM_REG_ALIAS(4), + YMM_REG_ALIAS(5), + YMM_REG_ALIAS(6), + YMM_REG_ALIAS(7), + YMM_REG_ALIAS(8), + YMM_REG_ALIAS(9), + YMM_REG_ALIAS(10), + YMM_REG_ALIAS(11), + YMM_REG_ALIAS(12), + YMM_REG_ALIAS(13), + YMM_REG_ALIAS(14), + YMM_REG_ALIAS(15), + + XMM_REG_ALIAS(0), + XMM_REG_ALIAS(1), + XMM_REG_ALIAS(2), + XMM_REG_ALIAS(3), + XMM_REG_ALIAS(4), + XMM_REG_ALIAS(5), + XMM_REG_ALIAS(6), + XMM_REG_ALIAS(7), + XMM_REG_ALIAS(8), + XMM_REG_ALIAS(9), + XMM_REG_ALIAS(10), + XMM_REG_ALIAS(11), + XMM_REG_ALIAS(12), + XMM_REG_ALIAS(13), + XMM_REG_ALIAS(14), + XMM_REG_ALIAS(15), + +}; + + // Exception registers const DNBRegisterInfo DNBArchImplX86_64::g_exc_registers[] = { @@ -1662,6 +2061,10 @@ const size_t DNBArchImplX86_64::k_num_all_registers_no_avx = k_num_gpr_registers + k_num_fpu_registers_no_avx + k_num_exc_registers; const size_t DNBArchImplX86_64::k_num_all_registers_avx = k_num_gpr_registers + k_num_fpu_registers_avx + k_num_exc_registers; +const size_t DNBArchImplX86_64::k_num_fpu_registers_avx512f = + sizeof(g_fpu_registers_avx512f) / sizeof(DNBRegisterInfo); +const size_t DNBArchImplX86_64::k_num_all_registers_avx512f = + k_num_gpr_registers + k_num_fpu_registers_avx512f + k_num_exc_registers; //---------------------------------------------------------------------- // Register set definitions. The first definitions at register set index @@ -1681,6 +2084,13 @@ const DNBRegisterSetInfo DNBArchImplX86_64::g_reg_sets_avx[] = { {"Floating Point Registers", g_fpu_registers_avx, k_num_fpu_registers_avx}, {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; +const DNBRegisterSetInfo DNBArchImplX86_64::g_reg_sets_avx512f[] = { + {"x86_64 Registers", NULL, k_num_all_registers_avx}, + {"General Purpose Registers", g_gpr_registers, k_num_gpr_registers}, + {"Floating Point Registers", g_fpu_registers_avx512f, + k_num_fpu_registers_avx512f}, + {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; + // Total number of register sets for this architecture const size_t DNBArchImplX86_64::k_num_register_sets = sizeof(g_reg_sets_avx) / sizeof(DNBRegisterSetInfo); @@ -1702,6 +2112,8 @@ const DNBRegisterSetInfo * DNBArchImplX86_64::GetRegisterSetInfo(nub_size_t *num_reg_sets) { *num_reg_sets = k_num_register_sets; + if (CPUHasAVX512f() || FORCE_AVX_REGS) + return g_reg_sets_avx512f; if (CPUHasAVX() || FORCE_AVX_REGS) return g_reg_sets_avx; else @@ -1765,6 +2177,8 @@ bool DNBArchImplX86_64::GetRegisterValue(uint32_t set, uint32_t reg, case e_regSetFPU: if (reg > fpu_xmm15 && !(CPUHasAVX() || FORCE_AVX_REGS)) return false; + if (reg > fpu_ymm15 && !(CPUHasAVX512f() || FORCE_AVX_REGS)) + return false; switch (reg) { case fpu_fcw: @@ -1853,6 +2267,59 @@ bool DNBArchImplX86_64::GetRegisterValue(uint32_t set, uint32_t reg, memcpy((&value->value.uint8) + 16, &m_state.context.fpu.avx.__fpu_ymmh0 + (reg - fpu_ymm0), 16); return true; + case fpu_k0: + case fpu_k1: + case fpu_k2: + case fpu_k3: + case fpu_k4: + case fpu_k5: + case fpu_k6: + case fpu_k7: + memcpy((&value->value.uint8), + &m_state.context.fpu.avx512f.__fpu_k0 + (reg - fpu_k0), 8); + return true; + case fpu_zmm0: + case fpu_zmm1: + case fpu_zmm2: + case fpu_zmm3: + case fpu_zmm4: + case fpu_zmm5: + case fpu_zmm6: + case fpu_zmm7: + case fpu_zmm8: + case fpu_zmm9: + case fpu_zmm10: + case fpu_zmm11: + case fpu_zmm12: + case fpu_zmm13: + case fpu_zmm14: + case fpu_zmm15: + memcpy(&value->value.uint8, + &m_state.context.fpu.avx512f.__fpu_xmm0 + (reg - fpu_zmm0), 16); + memcpy((&value->value.uint8) + 16, + &m_state.context.fpu.avx512f.__fpu_ymmh0 + (reg - fpu_zmm0), 16); + memcpy((&value->value.uint8) + 32, + &m_state.context.fpu.avx512f.__fpu_zmmh0 + (reg - fpu_zmm0), 32); + return true; + case fpu_zmm16: + case fpu_zmm17: + case fpu_zmm18: + case fpu_zmm19: + case fpu_zmm20: + case fpu_zmm21: + case fpu_zmm22: + case fpu_zmm23: + case fpu_zmm24: + case fpu_zmm25: + case fpu_zmm26: + case fpu_zmm27: + case fpu_zmm28: + case fpu_zmm29: + case fpu_zmm30: + case fpu_zmm31: + memcpy(&value->value.uint8, + &m_state.context.fpu.avx512f.__fpu_zmm16 + (reg - fpu_zmm16), 64); + return true; } break; @@ -1919,6 +2386,8 @@ bool DNBArchImplX86_64::SetRegisterValue(uint32_t set, uint32_t reg, break; if (reg > fpu_xmm15 && !(CPUHasAVX() || FORCE_AVX_REGS)) return false; + if (reg > fpu_ymm15 && !(CPUHasAVX512f() || FORCE_AVX_REGS)) + return false; case e_regSetFPU: switch (reg) { case fpu_fcw: @@ -2019,6 +2488,59 @@ bool DNBArchImplX86_64::SetRegisterValue(uint32_t set, uint32_t reg, memcpy(&m_state.context.fpu.avx.__fpu_ymmh0 + (reg - fpu_ymm0), (&value->value.uint8) + 16, 16); return true; + case fpu_k0: + case fpu_k1: + case fpu_k2: + case fpu_k3: + case fpu_k4: + case fpu_k5: + case fpu_k6: + case fpu_k7: + memcpy(&m_state.context.fpu.avx512f.__fpu_k0 + (reg - fpu_k0), + &value->value.uint8, 8); + return true; + case fpu_zmm0: + case fpu_zmm1: + case fpu_zmm2: + case fpu_zmm3: + case fpu_zmm4: + case fpu_zmm5: + case fpu_zmm6: + case fpu_zmm7: + case fpu_zmm8: + case fpu_zmm9: + case fpu_zmm10: + case fpu_zmm11: + case fpu_zmm12: + case fpu_zmm13: + case fpu_zmm14: + case fpu_zmm15: + memcpy(&m_state.context.fpu.avx512f.__fpu_xmm0 + (reg - fpu_zmm0), + &value->value.uint8, 16); + memcpy(&m_state.context.fpu.avx512f.__fpu_ymmh0 + (reg - fpu_zmm0), + &value->value.uint8 + 16, 16); + memcpy(&m_state.context.fpu.avx512f.__fpu_zmmh0 + (reg - fpu_zmm0), + &value->value.uint8 + 32, 32); + return true; + case fpu_zmm16: + case fpu_zmm17: + case fpu_zmm18: + case fpu_zmm19: + case fpu_zmm20: + case fpu_zmm21: + case fpu_zmm22: + case fpu_zmm23: + case fpu_zmm24: + case fpu_zmm25: + case fpu_zmm26: + case fpu_zmm27: + case fpu_zmm28: + case fpu_zmm29: + case fpu_zmm30: + case fpu_zmm31: + memcpy(&m_state.context.fpu.avx512f.__fpu_zmm16 + (reg - fpu_zmm16), + &value->value.uint8, 64); + return true; } break; @@ -2049,7 +2571,12 @@ bool DNBArchImplX86_64::SetRegisterValue(uint32_t set, uint32_t reg, uint32_t DNBArchImplX86_64::GetRegisterContextSize() { static uint32_t g_cached_size = 0; if (g_cached_size == 0) { - if (CPUHasAVX() || FORCE_AVX_REGS) { + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < k_num_fpu_registers_avx512f; ++i) { + if (g_fpu_registers_avx512f[i].value_regs == NULL) + g_cached_size += g_fpu_registers_avx512f[i].size; + } + } else if (CPUHasAVX() || FORCE_AVX_REGS) { for (size_t i = 0; i < k_num_fpu_registers_avx; ++i) { if (g_fpu_registers_avx[i].value_regs == NULL) g_cached_size += g_fpu_registers_avx[i].size; @@ -2121,6 +2648,13 @@ nub_size_t DNBArchImplX86_64::GetRegisterContext(void *buf, p += 10; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_k0 + i, 8); + p += 8; + } + } + if (CPUHasAVX() || FORCE_AVX_REGS) { // Interleave the XMM and YMMH registers to make the YMM registers for (size_t i = 0; i < 16; ++i) { @@ -2129,6 +2663,16 @@ nub_size_t DNBArchImplX86_64::GetRegisterContext(void *buf, memcpy(p, &m_state.context.fpu.avx.__fpu_ymmh0 + i, 16); p += 16; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 16; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_zmmh0 + i, 32); + p += 32; + } + for (size_t i = 0; i < 16; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_zmm16 + i, 64); + p += 64; + } + } } else { // Copy the XMM registers in a single block memcpy(p, &m_state.context.fpu.no_avx.__fpu_xmm0, 16 * 16); @@ -2186,6 +2730,13 @@ nub_size_t DNBArchImplX86_64::SetRegisterContext(const void *buf, p += 10; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_k0 + i, p, 8); + p += 8; + } + } + if (CPUHasAVX() || FORCE_AVX_REGS) { // Interleave the XMM and YMMH registers to make the YMM registers for (size_t i = 0; i < 16; ++i) { @@ -2194,6 +2745,16 @@ nub_size_t DNBArchImplX86_64::SetRegisterContext(const void *buf, memcpy(&m_state.context.fpu.avx.__fpu_ymmh0 + i, p, 16); p += 16; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 16; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_zmmh0 + i, p, 32); + p += 32; + } + for (size_t i = 0; i < 16; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_zmm16 + i, p, 64); + p += 64; + } + } } else { // Copy the XMM registers in a single block memcpy(&m_state.context.fpu.no_avx.__fpu_xmm0, p, 16 * 16); |

