//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// This file defines the standard OpenCL implementation of the Acxxel API. /// //===----------------------------------------------------------------------===// #include "acxxel.h" #include "CL/cl.h" #include #include #include #include namespace acxxel { namespace { /// An ID containing the platform ID and the device ID within the platform. struct FullDeviceID { cl_platform_id PlatformID; cl_device_id DeviceID; FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID) : PlatformID(PlatformID), DeviceID(DeviceID) {} }; static std::string getOpenCLErrorMessage(cl_int Result) { if (!Result) return "success"; std::ostringstream OutStream; OutStream << "OpenCL error: code = " << Result; return OutStream.str(); } static Status getOpenCLError(cl_int Result, const std::string &Message) { if (!Result) return Status(); std::ostringstream OutStream; OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message; return Status(OutStream.str()); } static void logOpenCLWarning(cl_int Result, const std::string &Message) { if (Result) { std::ostringstream OutStream; OutStream << Message << ": " << getOpenCLErrorMessage(Result); logWarning(OutStream.str()); } } class OpenCLPlatform : public Platform { public: ~OpenCLPlatform() override = default; static Expected create(); Expected getDeviceCount() override; Expected createStream(int DeviceIndex) override; Expected createEvent(int DeviceIndex) override; Expected createProgramFromSource(Span Source, int DeviceIndex) override; protected: Status streamSync(void *Stream) override; Status streamWaitOnEvent(void *Stream, void *Event) override; Expected rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override; HandleDestructor getDeviceMemoryHandleDestructor() override; void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize, size_t ByteOffset) override; void rawDestroyDeviceMemorySpanHandle(void *Handle) override; Expected rawGetDeviceSymbolAddress(const void *Symbol, int DeviceIndex) override; Expected rawGetDeviceSymbolSize(const void *Symbol, int DeviceIndex) override; Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override; HandleDestructor getUnregisterHostMemoryHandleDestructor() override; Expected rawMallocRegisteredH(ptrdiff_t ByteCount) override; HandleDestructor getFreeHostMemoryHandleDestructor() override; Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, void *DeviceDst, ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount, void *Stream) override; Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, void *HostDst, ptrdiff_t ByteCount, void *Stream) override; Status asyncCopyHToD(const void *HostSrc, void *DeviceDst, ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount, void *Stream) override; Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, ptrdiff_t ByteCount, char ByteValue, void *Stream) override; Status addStreamCallback(Stream &Stream, StreamCallback Callback) override; Status enqueueEvent(void *Event, void *Stream) override; bool eventIsDone(void *Event) override; Status eventSync(void *Event) override; Expected getSecondsBetweenEvents(void *StartEvent, void *EndEvent) override; Expected rawCreateKernel(void *Program, const std::string &Name) override; HandleDestructor getKernelHandleDestructor() override; Status rawEnqueueKernelLaunch(void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions, Span Arguments, Span ArgumentSizes, size_t SharedMemoryBytes) override; private: OpenCLPlatform(std::vector &&FullDeviceIDs, std::vector &&Contexts, std::vector &&CommandQueues) : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)), CommandQueues(std::move(CommandQueues)) {} std::vector FullDeviceIDs; std::vector Contexts; std::vector CommandQueues; }; Expected OpenCLPlatform::create() { constexpr cl_uint MaxNumEntries = 100; cl_platform_id Platforms[MaxNumEntries]; cl_uint NumPlatforms; if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms)) return getOpenCLError(Result, "clGetPlatformIDs"); std::vector FullDeviceIDs; for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms; ++PlatformIndex) { cl_uint NumDevices; cl_device_id Devices[MaxNumEntries]; if (cl_int Result = clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL, MaxNumEntries, Devices, &NumDevices)) return getOpenCLError(Result, "clGetDeviceIDs"); for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex) FullDeviceIDs.emplace_back(Platforms[PlatformIndex], Devices[DeviceIndex]); } if (FullDeviceIDs.empty()) return Status("No OpenCL device available on this system."); std::vector Contexts(FullDeviceIDs.size()); std::vector CommandQueues(FullDeviceIDs.size()); for (size_t I = 0; I < FullDeviceIDs.size(); ++I) { cl_int CreateContextResult; Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID, nullptr, nullptr, &CreateContextResult); if (CreateContextResult) return getOpenCLError(CreateContextResult, "clCreateContext"); cl_int CreateCommandQueueResult; CommandQueues[I] = clCreateCommandQueue( Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE, &CreateCommandQueueResult); if (CreateCommandQueueResult) return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue"); } return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts), std::move(CommandQueues)); } Expected OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); } static void openCLDestroyStream(void *H) { logOpenCLWarning(clReleaseCommandQueue(static_cast(H)), "clReleaseCommandQueue"); } Expected OpenCLPlatform::createStream(int DeviceIndex) { cl_int Result; cl_command_queue Queue = clCreateCommandQueue( Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID, CL_QUEUE_PROFILING_ENABLE, &Result); if (Result) return getOpenCLError(Result, "clCreateCommandQueue"); return constructStream(this, DeviceIndex, Queue, openCLDestroyStream); } static void openCLEventDestroy(void *H) { cl_event *CLEvent = static_cast(H); logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent"); delete CLEvent; } Status OpenCLPlatform::streamSync(void *Stream) { return getOpenCLError(clFinish(static_cast(Stream)), "clFinish"); } Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) { cl_event *CLEvent = static_cast(Event); return getOpenCLError( clEnqueueBarrierWithWaitList(static_cast(Stream), 1, CLEvent, nullptr), "clEnqueueMarkerWithWaitList"); } Expected OpenCLPlatform::createEvent(int DeviceIndex) { cl_int Result; cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result); if (Result) return getOpenCLError(Result, "clCreateUserEvent"); if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE)) return getOpenCLError(Result, "clSetUserEventStatus"); return constructEvent(this, DeviceIndex, new cl_event(Event), openCLEventDestroy); } static void openCLDestroyProgram(void *H) { logOpenCLWarning(clReleaseProgram(static_cast(H)), "clReleaseProgram"); } Expected OpenCLPlatform::createProgramFromSource(Span Source, int DeviceIndex) { cl_int Error; const char *CSource = Source.data(); size_t SourceSize = Source.size(); cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1, &CSource, &SourceSize, &Error); if (Error) return getOpenCLError(Error, "clCreateProgramWithSource"); cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID; if (cl_int Error = clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr)) return getOpenCLError(Error, "clBuildProgram"); return constructProgram(this, Program, openCLDestroyProgram); } Expected OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) { cl_int Result; cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE, ByteCount, nullptr, &Result); if (Result) return getOpenCLError(Result, "clCreateBuffer"); return reinterpret_cast(Memory); } static void openCLDestroyDeviceMemory(void *H) { logOpenCLWarning(clReleaseMemObject(static_cast(H)), "clReleaseMemObject"); } HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() { return openCLDestroyDeviceMemory; } void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize, size_t ByteOffset) { cl_int Error; cl_buffer_region Region; Region.origin = ByteOffset; Region.size = ByteSize; cl_mem SubBuffer = clCreateSubBuffer(static_cast(BaseHandle), 0, CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error); logOpenCLWarning(Error, "clCreateSubBuffer"); if (Error) return nullptr; return SubBuffer; } void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) { openCLDestroyDeviceMemory(Handle); } Expected OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/, int /*DeviceIndex*/) { // This doesn't seem to have any equivalent in OpenCL. return Status("not implemented"); } Expected OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/, int /*DeviceIndex*/) { // This doesn't seem to have any equivalent in OpenCL. return Status("not implemented"); } static void noOpHandleDestructor(void *) {} Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/, ptrdiff_t /*ByteCount*/) { // TODO(jhen): Do we want to do something to pin the memory here? return Status(); } HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() { // TODO(jhen): Do we want to unpin the memory here? return noOpHandleDestructor; } Expected OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) { // TODO(jhen): Do we want to do something to pin the memory here? return std::malloc(ByteCount); } static void freeMemoryHandleDestructor(void *Memory) { // TODO(jhen): Do we want to unpin the memory here? std::free(Memory); } HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() { return freeMemoryHandleDestructor; } Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, void *DeviceDst, ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount, void *Stream) { return getOpenCLError( clEnqueueCopyBuffer(static_cast(Stream), static_cast(const_cast(DeviceSrc)), static_cast(DeviceDst), DeviceSrcByteOffset, DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr), "clEnqueueCopyBuffer"); } Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, void *HostDst, ptrdiff_t ByteCount, void *Stream) { return getOpenCLError( clEnqueueReadBuffer(static_cast(Stream), static_cast(const_cast(DeviceSrc)), CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0, nullptr, nullptr), "clEnqueueReadBuffer"); } Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst, ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount, void *Stream) { return getOpenCLError( clEnqueueWriteBuffer(static_cast(Stream), static_cast(DeviceDst), CL_TRUE, DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr, nullptr), "clEnqueueWriteBuffer"); } Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, ptrdiff_t ByteCount, char ByteValue, void *Stream) { return getOpenCLError( clEnqueueFillBuffer(static_cast(Stream), static_cast(DeviceDst), &ByteValue, 1, ByteOffset, ByteCount, 0, nullptr, nullptr), "clEnqueueFillBuffer"); } struct StreamCallbackUserData { StreamCallbackUserData(Stream &TheStream, StreamCallback Function, cl_event EndEvent) : TheStream(TheStream), TheFunction(std::move(Function)), EndEvent(EndEvent) {} Stream &TheStream; StreamCallback TheFunction; cl_event EndEvent; }; // A function with the right signature to pass to clSetEventCallback. void CL_CALLBACK openCLStreamCallbackShim(cl_event, cl_int EventCommandExecStatus, void *UserData) { std::unique_ptr Data( static_cast(UserData)); Data->TheFunction( Data->TheStream, getOpenCLError(EventCommandExecStatus, "stream callback error state")); if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE)) logOpenCLWarning(Result, "clSetUserEventStatus"); if (cl_int Result = clReleaseEvent(Data->EndEvent)) logOpenCLWarning(Result, "clReleaseEvent"); } Status OpenCLPlatform::addStreamCallback(Stream &TheStream, StreamCallback Callback) { cl_int Result; cl_event StartEvent = clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result); if (Result) return getOpenCLError(Result, "clCreateUserEvent"); cl_event EndEvent = clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result); if (Result) return getOpenCLError(Result, "clCreateUserEvent"); cl_event StartBarrierEvent; if (cl_int Result = clEnqueueBarrierWithWaitList( static_cast(getStreamHandle(TheStream)), 1, &StartEvent, &StartBarrierEvent)) return getOpenCLError(Result, "clEnqueueBarrierWithWaitList"); if (cl_int Result = clEnqueueBarrierWithWaitList( static_cast(getStreamHandle(TheStream)), 1, &EndEvent, nullptr)) return getOpenCLError(Result, "clEnqueueBarrierWithWaitList"); std::unique_ptr UserData( new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent)); if (cl_int Result = clSetEventCallback(StartBarrierEvent, CL_RUNNING, openCLStreamCallbackShim, UserData.release())) return getOpenCLError(Result, "clSetEventCallback"); if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE)) return getOpenCLError(Result, "clSetUserEventStatus"); if (cl_int Result = clReleaseEvent(StartBarrierEvent)) return getOpenCLError(Result, "clReleaseEvent"); return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent"); } Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) { cl_event *CLEvent = static_cast(Event); cl_event OldEvent = *CLEvent; cl_event NewEvent; if (cl_int Result = clEnqueueMarkerWithWaitList( static_cast(Stream), 0, nullptr, &NewEvent)) return getOpenCLError(Result, "clEnqueueMarkerWithWaitList"); *CLEvent = NewEvent; return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent"); } bool OpenCLPlatform::eventIsDone(void *Event) { cl_event *CLEvent = static_cast(Event); cl_int EventStatus; logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(EventStatus), &EventStatus, nullptr), "clGetEventInfo"); return EventStatus == CL_COMPLETE || EventStatus < 0; } Status OpenCLPlatform::eventSync(void *Event) { cl_event *CLEvent = static_cast(Event); return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents"); } Expected OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent, void *EndEvent) { cl_event *CLStartEvent = static_cast(StartEvent); cl_event *CLEndEvent = static_cast(EndEvent); cl_profiling_info ParamName = CL_PROFILING_COMMAND_END; cl_ulong StartNanoseconds; cl_ulong EndNanoseconds; if (cl_int Result = clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong), &StartNanoseconds, nullptr)) return getOpenCLError(Result, "clGetEventProfilingInfo"); if (cl_int Result = clGetEventProfilingInfo( *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr)) return getOpenCLError(Result, "clGetEventProfilingInfo"); return (EndNanoseconds - StartNanoseconds) * 1e-12; } Expected OpenCLPlatform::rawCreateKernel(void *Program, const std::string &Name) { cl_int Error; cl_kernel Kernel = clCreateKernel(static_cast(Program), Name.c_str(), &Error); if (Error) return getOpenCLError(Error, "clCreateKernel"); return Kernel; } static void openCLDestroyKernel(void *H) { logOpenCLWarning(clReleaseKernel(static_cast(H)), "clReleaseKernel"); } HandleDestructor OpenCLPlatform::getKernelHandleDestructor() { return openCLDestroyKernel; } Status OpenCLPlatform::rawEnqueueKernelLaunch( void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions, Span Arguments, Span ArgumentSizes, size_t SharedMemoryBytes) { if (SharedMemoryBytes != 0) return Status("OpenCL kernel launches only accept zero for the shared " "memory byte size"); cl_kernel TheKernel = static_cast(Kernel); for (int I = 0; I < Arguments.size(); ++I) if (cl_int Error = clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I])) return getOpenCLError(Error, "clSetKernelArg"); size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY, LaunchDimensions.BlockZ}; size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX, LaunchDimensions.BlockY * LaunchDimensions.GridY, LaunchDimensions.BlockZ * LaunchDimensions.GridZ}; return getOpenCLError( clEnqueueNDRangeKernel(static_cast(Stream), TheKernel, 3, nullptr, GlobalWorkSize, LocalWorkSize, 0, nullptr, nullptr), "clEnqueueNDRangeKernel"); } } // namespace namespace opencl { /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a /// Platform. Expected getPlatform() { static auto MaybePlatform = []() -> Expected { Expected CreationResult = OpenCLPlatform::create(); if (CreationResult.isError()) return CreationResult.getError(); else return new OpenCLPlatform(CreationResult.takeValue()); }(); return MaybePlatform; } } // namespace opencl } // namespace acxxel