From 2900cb906ec96134dba7c57c23b18e98cec215a7 Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Fri, 2 Sep 2011 23:23:23 -0500 Subject: Floating point support. Change-Id: I859cac1c01bf631d12223702d68813b45339b65f Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/295 Tested-by: Jenkins Server Reviewed-by: A. Patrick Williams III --- config.mk | 2 +- src/include/kernel/ppcconsts.S | 85 ++++++++++++++++++++++++++++---- src/include/kernel/task.H | 14 ++++++ src/include/math.h | 2 + src/kernel/exception.C | 20 ++++++++ src/kernel/start.S | 109 ++++++++++++++++++++++++++++++++++++++++- src/kernel/syscall.C | 5 ++ src/kernel/taskmgr.C | 3 ++ src/lib/makefile | 2 +- src/lib/math.C | 31 ++++++++++++ src/makefile | 2 +- src/usr/testcore/lib/fptest.H | 49 ++++++++++++++++++ src/usr/trace/trace.C | 23 +++++++++ 13 files changed, 334 insertions(+), 13 deletions(-) create mode 100644 src/lib/math.C create mode 100644 src/usr/testcore/lib/fptest.H diff --git a/config.mk b/config.mk index d354f80de..a8d270cc4 100644 --- a/config.mk +++ b/config.mk @@ -74,7 +74,7 @@ BEAMFLAGS = \ -o /dev/null COMMONFLAGS = -O3 -nostdlib ${EXTRACOMMONFLAGS} -CFLAGS = ${COMMONFLAGS} -mcpu=power7 -nostdinc -g -msoft-float -mno-altivec \ +CFLAGS = ${COMMONFLAGS} -mcpu=power7 -nostdinc -g -mno-vsx -mno-altivec\ -Wall -Werror -fshort-enums ${CUSTOMFLAGS} ASMFLAGS = ${COMMONFLAGS} -mcpu=power7 CXXFLAGS = ${CFLAGS} -nostdinc++ -fno-rtti -fno-exceptions -Wall diff --git a/src/include/kernel/ppcconsts.S b/src/include/kernel/ppcconsts.S index 42fc17563..b83678907 100644 --- a/src/include/kernel/ppcconsts.S +++ b/src/include/kernel/ppcconsts.S @@ -1,24 +1,24 @@ # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. -# +# # $Source: src/include/kernel/ppcconsts.S $ -# +# # IBM CONFIDENTIAL -# +# # COPYRIGHT International Business Machines Corp. 2010 - 2011 -# +# # p1 -# +# # Object Code Only (OCO) source materials # Licensed Internal Code Source Materials # IBM HostBoot Licensed Internal Code -# +# # The source code for this program is not published or other- # wise divested of its trade secrets, irrespective of what has # been deposited with the U.S. Copyright Office. -# +# # Origin: 30 -# +# # IBM_PROLOG_END #*--------------------------------------------------------------------*# #* Register Constants *# @@ -50,13 +50,46 @@ .set r23,23 .set r24,24 .set r25,25 - .set r26,26 + .set r26,26 .set r27,27 .set r28,28 .set r29,29 .set r30,30 .set r31,31 + .set f0,0 + .set f1,1 + .set f2,2 + .set f3,3 + .set f4,4 + .set f5,5 + .set f6,6 + .set f7,7 + .set f8,8 + .set f9,9 + .set f10,10 + .set f11,11 + .set f12,12 + .set f13,13 + .set f14,14 + .set f15,15 + .set f16,16 + .set f17,17 + .set f18,18 + .set f19,19 + .set f20,20 + .set f21,21 + .set f22,22 + .set f23,23 + .set f24,24 + .set f25,25 + .set f26,26 + .set f27,27 + .set f28,28 + .set f29,29 + .set f30,30 + .set f31,31 + #*--------------------------------------------------------------------*# #* CR Register Constants *# #*--------------------------------------------------------------------*# @@ -172,4 +205,38 @@ .set TASK_CTR, TASK_CR+8 .set TASK_XER, TASK_CTR+8 +.set TASK_FP_CONTEXT, TASK_XER+8 +.set TASK_FPR_0, 0 +.set TASK_FPR_1, TASK_FPR_0+8 +.set TASK_FPR_2, TASK_FPR_1+8 +.set TASK_FPR_3, TASK_FPR_2+8 +.set TASK_FPR_4, TASK_FPR_3+8 +.set TASK_FPR_5, TASK_FPR_4+8 +.set TASK_FPR_6, TASK_FPR_5+8 +.set TASK_FPR_7, TASK_FPR_6+8 +.set TASK_FPR_8, TASK_FPR_7+8 +.set TASK_FPR_9, TASK_FPR_8+8 +.set TASK_FPR_10, TASK_FPR_9+8 +.set TASK_FPR_11, TASK_FPR_10+8 +.set TASK_FPR_12, TASK_FPR_11+8 +.set TASK_FPR_13, TASK_FPR_12+8 +.set TASK_FPR_14, TASK_FPR_13+8 +.set TASK_FPR_15, TASK_FPR_14+8 +.set TASK_FPR_16, TASK_FPR_15+8 +.set TASK_FPR_17, TASK_FPR_16+8 +.set TASK_FPR_18, TASK_FPR_17+8 +.set TASK_FPR_19, TASK_FPR_18+8 +.set TASK_FPR_20, TASK_FPR_19+8 +.set TASK_FPR_21, TASK_FPR_20+8 +.set TASK_FPR_22, TASK_FPR_21+8 +.set TASK_FPR_23, TASK_FPR_22+8 +.set TASK_FPR_24, TASK_FPR_23+8 +.set TASK_FPR_25, TASK_FPR_24+8 +.set TASK_FPR_26, TASK_FPR_25+8 +.set TASK_FPR_27, TASK_FPR_26+8 +.set TASK_FPR_28, TASK_FPR_27+8 +.set TASK_FPR_29, TASK_FPR_28+8 +.set TASK_FPR_30, TASK_FPR_29+8 +.set TASK_FPR_31, TASK_FPR_30+8 +.set TASK_FPSCR, TASK_FPR_31+8 diff --git a/src/include/kernel/task.H b/src/include/kernel/task.H index d3dcc000a..cab925977 100644 --- a/src/include/kernel/task.H +++ b/src/include/kernel/task.H @@ -44,6 +44,18 @@ struct context_t uint64_t xer; }; +/** @struct context_fp_t + * @brief Defines the save-restore context for the floating point registers + * associated with a task. + * + * See PowerISA for information on registers listed. + */ +struct context_fp_t +{ + uint64_t fprs[32]; + uint64_t fpscr; +}; + /** @struct task_t * @brief The kernel-level task structure. */ @@ -54,6 +66,8 @@ struct task_t /** Context information. This MUST stay here due to * save-restore asm code. */ context_t context; + /** Pointer to optional floating point context. */ + context_fp_t* fp_context; /** Task ID */ tid_t tid; diff --git a/src/include/math.h b/src/include/math.h index bd6821190..63584a26e 100644 --- a/src/include/math.h +++ b/src/include/math.h @@ -38,6 +38,8 @@ static inline int64_t log2(uint64_t s) return 63-n; } +double sqrt(double); + #ifdef __cplusplus }; #endif diff --git a/src/kernel/exception.C b/src/kernel/exception.C index 128708032..47c235fe8 100644 --- a/src/kernel/exception.C +++ b/src/kernel/exception.C @@ -20,6 +20,7 @@ // Origin: 30 // // IBM_PROLOG_END +#include #include #include #include @@ -160,3 +161,22 @@ namespace ExceptionHandles } } + +extern "C" +void kernel_execute_fp_unavail() +{ + task_t* t = TaskManager::getCurrentTask(); + + if (t->fp_context) + { + printk("Error: FP unavailable while task has FP-context.\n"); + kassert(t->fp_context == NULL); + } + else + { + // Enable FP by creating a FP context. + // Context switch code will handle the rest. + t->fp_context = new context_fp_t; + memset(t->fp_context, '\0', sizeof(context_fp_t)); + } +} diff --git a/src/kernel/start.S b/src/kernel/start.S index 332fe7796..b7702745a 100644 --- a/src/kernel/start.S +++ b/src/kernel/start.S @@ -148,7 +148,7 @@ STD_INTERRUPT(inst_segment, 0x480) UNIMPL_INTERRUPT(external, 0x500) STD_INTERRUPT(alignment, 0x600) STD_INTERRUPT(prog_ex, 0x700) -UNIMPL_INTERRUPT(fp_unavail, 0x800) +STD_INTERRUPT(fp_unavail, 0x800) STD_INTERRUPT(decrementer, 0x900) UNIMPL_INTERRUPT(hype_decrementer, 0x980) @@ -303,12 +303,62 @@ kernel_save_task: std r30, TASK_GPR_30(r1) ;// Save GPR30 std r31, TASK_GPR_31(r1) ;// Save GPR31 + ld r2, TASK_FP_CONTEXT(r1) ;// Load FP Context pointer. + cmpi cr0, r2, 0 + bne- cr0, 1f ;// Jump to FP-save if != NULL. +2: + ld r1, 0(r1) ;// Get CPU pointer ld r1, 0(r1) ;// Get kernel stack pointer. mfsprg0 r0 ;// Retrieve return address from SPRG0 mtlr r0 ;// Call blr + ;// Save FP context. +1: + ;// Enable FP. + mfmsr r3 + ori r3,r3,0x2000 + mtmsrd r3 + ;// Save FPRs. + stfd f0, TASK_FPR_0(r2) + stfd f1, TASK_FPR_1(r2) + stfd f2, TASK_FPR_2(r2) + stfd f3, TASK_FPR_3(r2) + stfd f4, TASK_FPR_4(r2) + stfd f5, TASK_FPR_5(r2) + stfd f6, TASK_FPR_6(r2) + stfd f7, TASK_FPR_7(r2) + stfd f8, TASK_FPR_8(r2) + stfd f9, TASK_FPR_9(r2) + stfd f10, TASK_FPR_10(r2) + stfd f11, TASK_FPR_11(r2) + stfd f12, TASK_FPR_12(r2) + stfd f13, TASK_FPR_13(r2) + stfd f14, TASK_FPR_14(r2) + stfd f15, TASK_FPR_15(r2) + stfd f16, TASK_FPR_16(r2) + stfd f17, TASK_FPR_17(r2) + stfd f18, TASK_FPR_18(r2) + stfd f19, TASK_FPR_19(r2) + stfd f20, TASK_FPR_20(r2) + stfd f21, TASK_FPR_21(r2) + stfd f22, TASK_FPR_22(r2) + stfd f23, TASK_FPR_23(r2) + stfd f24, TASK_FPR_24(r2) + stfd f25, TASK_FPR_25(r2) + stfd f26, TASK_FPR_26(r2) + stfd f27, TASK_FPR_27(r2) + stfd f28, TASK_FPR_28(r2) + stfd f29, TASK_FPR_29(r2) + stfd f30, TASK_FPR_30(r2) + stfd f31, TASK_FPR_31(r2) + ;// Save FPSRC + mffs f0 + stfd f0, TASK_FPSCR(r2) + + b 2b + ;// @fn dispatch_task ;// Loads context from task structure and performs rfi. @@ -327,11 +377,18 @@ kernel_dispatch_task: ori r2,r2, 0xC030 ;// Enable MSR[EE,PR,IR,DR]. rldicl r2,r2,1,1 ;// Clear ... rotldi r2,r2,63 ;// MSR[TA] + rldicl r2,r2,50,1 ;// Clear ... + rotldi r2,r2,14 ;// MSR[FP] mtsrr1 r2 ;// Set task MSR (SRR1) ld r2, TASK_NIP(r1) ;// Load NIP from context. mtsrr0 r2 ;// Set task NIP (SRR0) + ;// Check if FP enabled, load context. + ld r2, TASK_FP_CONTEXT(r1) + cmpi cr0, r2, 0 + bne- 1f +2: ;// Restore GPRs from context. ld r0, TASK_GPR_0(r1) ;// GPR0 ld r2, TASK_GPR_2(r1) ;// GPR2 @@ -378,6 +435,56 @@ kernel_dispatch_task: rfid ;// Execute task. + ;// Load FP context. +1: + ;// Set MSR[FP] and also in SRR1. + mfmsr r3 + ori r3,r3,0x2000 + mtmsrd r3 + mfsrr1 r3 + ori r3,r3,0x2000 + mtsrr1 r3 + ;// Restore FPSCR + lfd f0, TASK_FPSCR(r2) + mtfsf f0,f0,1,1 + ;// Restore FPRs + lfd f0, TASK_FPR_0(r2) + lfd f1, TASK_FPR_1(r2) + lfd f2, TASK_FPR_2(r2) + lfd f3, TASK_FPR_3(r2) + lfd f4, TASK_FPR_4(r2) + lfd f5, TASK_FPR_5(r2) + lfd f6, TASK_FPR_6(r2) + lfd f7, TASK_FPR_7(r2) + lfd f8, TASK_FPR_8(r2) + lfd f9, TASK_FPR_9(r2) + lfd f10, TASK_FPR_10(r2) + lfd f11, TASK_FPR_11(r2) + lfd f12, TASK_FPR_12(r2) + lfd f13, TASK_FPR_13(r2) + lfd f14, TASK_FPR_14(r2) + lfd f15, TASK_FPR_15(r2) + lfd f16, TASK_FPR_16(r2) + lfd f17, TASK_FPR_17(r2) + lfd f18, TASK_FPR_18(r2) + lfd f19, TASK_FPR_19(r2) + lfd f20, TASK_FPR_20(r2) + lfd f21, TASK_FPR_21(r2) + lfd f22, TASK_FPR_22(r2) + lfd f23, TASK_FPR_23(r2) + lfd f24, TASK_FPR_24(r2) + lfd f25, TASK_FPR_25(r2) + lfd f26, TASK_FPR_26(r2) + lfd f27, TASK_FPR_27(r2) + lfd f28, TASK_FPR_28(r2) + lfd f29, TASK_FPR_29(r2) + lfd f30, TASK_FPR_30(r2) + lfd f31, TASK_FPR_31(r2) + + b 2b + + + ;// @fn system_call_fast_path ;// Handle fast path system calls. ;// 0x800 = HMER read (HMER -> r3). diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C index 6faf47681..622c970a7 100644 --- a/src/kernel/syscall.C +++ b/src/kernel/syscall.C @@ -165,7 +165,12 @@ namespace Systemcalls // TODO: Deal with join. // Clean up task memory. + // Delete FP context. + if (t->fp_context) + delete t->fp_context; + // Delete stack. StackSegment::deleteStack(t->tid); + // Delete task struct. delete t; } diff --git a/src/kernel/taskmgr.C b/src/kernel/taskmgr.C index 3fe2cff6c..04d91c4f8 100644 --- a/src/kernel/taskmgr.C +++ b/src/kernel/taskmgr.C @@ -110,6 +110,9 @@ task_t* TaskManager::_createTask(TaskManager::task_fn_t t, task->context.gprs[1] = NULL; } + // Clear FP context (start with FP disabled on all tasks). + task->fp_context = NULL; + return task; } diff --git a/src/lib/makefile b/src/lib/makefile index 5ad3dd0de..59a12b350 100644 --- a/src/lib/makefile +++ b/src/lib/makefile @@ -22,7 +22,7 @@ # IBM_PROLOG_END ROOTPATH = ../.. -OBJS = string.o stdlib.o assert.o stdio.o +OBJS = string.o stdlib.o assert.o stdio.o math.o OBJS += syscall_stub.o syscall_task.o syscall_msg.o OBJS += syscall_mmio.o syscall_time.o sync.o syscall_misc.o OBJS += syscall_mm.o cxxtest_data.o diff --git a/src/lib/math.C b/src/lib/math.C new file mode 100644 index 000000000..8958eebf5 --- /dev/null +++ b/src/lib/math.C @@ -0,0 +1,31 @@ +// IBM_PROLOG_BEGIN_TAG +// This is an automatically generated prolog. +// +// $Source: src/lib/math.C $ +// +// IBM CONFIDENTIAL +// +// COPYRIGHT International Business Machines Corp. 2011 +// +// p1 +// +// Object Code Only (OCO) source materials +// Licensed Internal Code Source Materials +// IBM HostBoot Licensed Internal Code +// +// The source code for this program is not published or other- +// wise divested of its trade secrets, irrespective of what has +// been deposited with the U.S. Copyright Office. +// +// Origin: 30 +// +// IBM_PROLOG_END +#include + +double sqrt(double x) +{ + register double _x = x; + asm volatile("fsqrt %0, %1" : "=f" (_x) : "f" (_x)); + return _x; +} + diff --git a/src/makefile b/src/makefile index 4ec0b90a3..fad0678e5 100644 --- a/src/makefile +++ b/src/makefile @@ -28,7 +28,7 @@ IMGS = hbicore hbicore_test EXTRA_LIDS = dslid BASE_OBJECTS = console.o spinlock.o string.o stdlib.o assert.o stdio.o \ - builtins.o vfs_init.o heapmgr.o pagemgr.o + builtins.o vfs_init.o heapmgr.o pagemgr.o math.o DIRECT_BOOT_OBJECTS = start.o kernel.o taskmgr.o cpumgr.o syscall.o \ scheduler.o exception.o vmmmgr.o timemgr.o \ diff --git a/src/usr/testcore/lib/fptest.H b/src/usr/testcore/lib/fptest.H new file mode 100644 index 000000000..71d097cc5 --- /dev/null +++ b/src/usr/testcore/lib/fptest.H @@ -0,0 +1,49 @@ +// IBM_PROLOG_BEGIN_TAG +// This is an automatically generated prolog. +// +// $Source: src/usr/testcore/lib/fptest.H $ +// +// IBM CONFIDENTIAL +// +// COPYRIGHT International Business Machines Corp. 2011 +// +// p1 +// +// Object Code Only (OCO) source materials +// Licensed Internal Code Source Materials +// IBM HostBoot Licensed Internal Code +// +// The source code for this program is not published or other- +// wise divested of its trade secrets, irrespective of what has +// been deposited with the U.S. Copyright Office. +// +// Origin: 30 +// +// IBM_PROLOG_END +#ifndef __TESTCORE_LIB_FPTEST_H +#define __TESTCORE_LIB_FPTEST_H + +/** @file fptest.H + * @brief Test cases for floating point support. + */ +#include + +class FloatingPointTest : public CxxTest::TestSuite +{ + public: + + void testSqrt() + { + if (2.0 != sqrt(4.0)) + { + TS_FAIL("SQRT(4) != 2"); + } + + if (3.0 != sqrt(9.0)) + { + TS_FAIL("SQRT(9) != 3"); + } + } +}; + +#endif diff --git a/src/usr/trace/trace.C b/src/usr/trace/trace.C index b37940300..8a8456bb7 100644 --- a/src/usr/trace/trace.C +++ b/src/usr/trace/trace.C @@ -329,6 +329,7 @@ void Trace::_trace_adal_write_all(trace_desc_t *io_td, trace_bin_entry_t l_entry; uint64_t l_str_map = 0; uint64_t l_char_map = 0; + uint64_t l_double_map = 0; /*------------------------------------------------------------------------*/ @@ -390,6 +391,21 @@ void Trace::_trace_adal_write_all(trace_desc_t *io_td, // Add to total size; data is word aligned l_data_size += sizeof(l_tmpData); } + else if (('e' == _fmt[i]) || ('f' == _fmt[i]) || ('g' == _fmt[i])) + { + // Set flag to indicate argument is a double + l_double_map = l_double_map | (1 << num_args); + + // Numbers count as two 4-byte arg + num_args++; + num_4byte_args += 2; + + // Retrieve the argument to increment to next one + double l_tmpData = va_arg(i_args,double); + + // Add to total size; data is word aligned + l_data_size += sizeof(l_tmpData); + } else { // Numbers count as two 4-byte arg @@ -470,6 +486,13 @@ void Trace::_trace_adal_write_all(trace_desc_t *io_td, l_ptr += sizeof(uint32_t); } + else if (l_double_map & (1 << i)) + { + // Save number to buffer & increment pointer (no need to align) + *(reinterpret_cast(l_ptr)) = va_arg(l_args, double); + + l_ptr += sizeof(double); + } else { // Save number to buffer & increment pointer (no need to align) -- cgit v1.2.1