diff --git a/examples_cuda/mandelbrot_tasks3d/drvapi_error_string.h b/examples_cuda/mandelbrot_tasks3d/drvapi_error_string.h deleted file mode 100644 index ce85f152..00000000 --- a/examples_cuda/mandelbrot_tasks3d/drvapi_error_string.h +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef _DRVAPI_ERROR_STRING_H_ -#define _DRVAPI_ERROR_STRING_H_ - -#include -#include -#include - -// Error Code string definitions here -typedef struct -{ - char const *error_string; - int error_id; -} s_CudaErrorStr; - -/** - * Error codes - */ -static s_CudaErrorStr sCudaDrvErrorString[] = -{ - /** - * The API call returned with no errors. In the case of query calls, this - * can also mean that the operation being queried is complete (see - * ::cuEventQuery() and ::cuStreamQuery()). - */ - { "CUDA_SUCCESS", 0 }, - - /** - * This indicates that one or more of the parameters passed to the API call - * is not within an acceptable range of values. - */ - { "CUDA_ERROR_INVALID_VALUE", 1 }, - - /** - * The API call failed because it was unable to allocate enough memory to - * perform the requested operation. - */ - { "CUDA_ERROR_OUT_OF_MEMORY", 2 }, - - /** - * This indicates that the CUDA driver has not been initialized with - * ::cuInit() or that initialization has failed. - */ - { "CUDA_ERROR_NOT_INITIALIZED", 3 }, - - /** - * This indicates that the CUDA driver is in the process of shutting down. - */ - { "CUDA_ERROR_DEINITIALIZED", 4 }, - - /** - * This indicates profiling APIs are called while application is running - * in visual profiler mode. - */ - { "CUDA_ERROR_PROFILER_DISABLED", 5 }, - /** - * This indicates profiling has not been initialized for this context. - * Call cuProfilerInitialize() to resolve this. - */ - { "CUDA_ERROR_PROFILER_NOT_INITIALIZED", 6 }, - /** - * This indicates profiler has already been started and probably - * cuProfilerStart() is incorrectly called. - */ - { "CUDA_ERROR_PROFILER_ALREADY_STARTED", 7 }, - /** - * This indicates profiler has already been stopped and probably - * cuProfilerStop() is incorrectly called. - */ - { "CUDA_ERROR_PROFILER_ALREADY_STOPPED", 8 }, - /** - * This indicates that no CUDA-capable devices were detected by the installed - * CUDA driver. - */ - { "CUDA_ERROR_NO_DEVICE (no CUDA-capable devices were detected)", 100 }, - - /** - * This indicates that the device ordinal supplied by the user does not - * correspond to a valid CUDA device. - */ - { "CUDA_ERROR_INVALID_DEVICE (device specified is not a valid CUDA device)", 101 }, - - - /** - * This indicates that the device kernel image is invalid. This can also - * indicate an invalid CUDA module. - */ - { "CUDA_ERROR_INVALID_IMAGE", 200 }, - - /** - * This most frequently indicates that there is no context bound to the - * current thread. This can also be returned if the context passed to an - * API call is not a valid handle (such as a context that has had - * ::cuCtxDestroy() invoked on it). This can also be returned if a user - * mixes different API versions (i.e. 3010 context with 3020 API calls). - * See ::cuCtxGetApiVersion() for more details. - */ - { "CUDA_ERROR_INVALID_CONTEXT", 201 }, - - /** - * This indicated that the context being supplied as a parameter to the - * API call was already the active context. - * \deprecated - * This error return is deprecated as of CUDA 3.2. It is no longer an - * error to attempt to push the active context via ::cuCtxPushCurrent(). - */ - { "CUDA_ERROR_CONTEXT_ALREADY_CURRENT", 202 }, - - /** - * This indicates that a map or register operation has failed. - */ - { "CUDA_ERROR_MAP_FAILED", 205 }, - - /** - * This indicates that an unmap or unregister operation has failed. - */ - { "CUDA_ERROR_UNMAP_FAILED", 206 }, - - /** - * This indicates that the specified array is currently mapped and thus - * cannot be destroyed. - */ - { "CUDA_ERROR_ARRAY_IS_MAPPED", 207 }, - - /** - * This indicates that the resource is already mapped. - */ - { "CUDA_ERROR_ALREADY_MAPPED", 208 }, - - /** - * This indicates that there is no kernel image available that is suitable - * for the device. This can occur when a user specifies code generation - * options for a particular CUDA source file that do not include the - * corresponding device configuration. - */ - { "CUDA_ERROR_NO_BINARY_FOR_GPU", 209 }, - - /** - * This indicates that a resource has already been acquired. - */ - { "CUDA_ERROR_ALREADY_ACQUIRED", 210 }, - - /** - * This indicates that a resource is not mapped. - */ - { "CUDA_ERROR_NOT_MAPPED", 211 }, - - /** - * This indicates that a mapped resource is not available for access as an - * array. - */ - { "CUDA_ERROR_NOT_MAPPED_AS_ARRAY", 212 }, - - /** - * This indicates that a mapped resource is not available for access as a - * pointer. - */ - { "CUDA_ERROR_NOT_MAPPED_AS_POINTER", 213 }, - - /** - * This indicates that an uncorrectable ECC error was detected during - * execution. - */ - { "CUDA_ERROR_ECC_UNCORRECTABLE", 214 }, - - /** - * This indicates that the ::CUlimit passed to the API call is not - * supported by the active device. - */ - { "CUDA_ERROR_UNSUPPORTED_LIMIT", 215 }, - - /** - * This indicates that the ::CUcontext passed to the API call can - * only be bound to a single CPU thread at a time but is already - * bound to a CPU thread. - */ - { "CUDA_ERROR_CONTEXT_ALREADY_IN_USE", 216 }, - - /** - * This indicates that peer access is not supported across the given - * devices. - */ - { "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED", 217}, - - /** - * This indicates that the device kernel source is invalid. - */ - { "CUDA_ERROR_INVALID_SOURCE", 300 }, - - /** - * This indicates that the file specified was not found. - */ - { "CUDA_ERROR_FILE_NOT_FOUND", 301 }, - - /** - * This indicates that a link to a shared object failed to resolve. - */ - { "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND", 302 }, - - /** - * This indicates that initialization of a shared object failed. - */ - { "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED", 303 }, - - /** - * This indicates that an OS call failed. - */ - { "CUDA_ERROR_OPERATING_SYSTEM", 304 }, - - - /** - * This indicates that a resource handle passed to the API call was not - * valid. Resource handles are opaque types like ::CUstream and ::CUevent. - */ - { "CUDA_ERROR_INVALID_HANDLE", 400 }, - - - /** - * This indicates that a named symbol was not found. Examples of symbols - * are global/constant variable names, texture names }, and surface names. - */ - { "CUDA_ERROR_NOT_FOUND", 500 }, - - - /** - * This indicates that asynchronous operations issued previously have not - * completed yet. This result is not actually an error, but must be indicated - * differently than ::CUDA_SUCCESS (which indicates completion). Calls that - * may return this value include ::cuEventQuery() and ::cuStreamQuery(). - */ - { "CUDA_ERROR_NOT_READY", 600 }, - - - /** - * An exception occurred on the device while executing a kernel. Common - * causes include dereferencing an invalid device pointer and accessing - * out of bounds shared memory. The context cannot be used }, so it must - * be destroyed (and a new one should be created). All existing device - * memory allocations from this context are invalid and must be - * reconstructed if the program is to continue using CUDA. - */ - { "CUDA_ERROR_LAUNCH_FAILED", 700 }, - - /** - * This indicates that a launch did not occur because it did not have - * appropriate resources. This error usually indicates that the user has - * attempted to pass too many arguments to the device kernel, or the - * kernel launch specifies too many threads for the kernel's register - * count. Passing arguments of the wrong size (i.e. a 64-bit pointer - * when a 32-bit int is expected) is equivalent to passing too many - * arguments and can also result in this error. - */ - { "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES", 701 }, - - /** - * This indicates that the device kernel took too long to execute. This can - * only occur if timeouts are enabled - see the device attribute - * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The - * context cannot be used (and must be destroyed similar to - * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from - * this context are invalid and must be reconstructed if the program is to - * continue using CUDA. - */ - { "CUDA_ERROR_LAUNCH_TIMEOUT", 702 }, - - /** - * This error indicates a kernel launch that uses an incompatible texturing - * mode. - */ - { "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING", 703 }, - - /** - * This error indicates that a call to ::cuCtxEnablePeerAccess() is - * trying to re-enable peer access to a context which has already - * had peer access to it enabled. - */ - { "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED", 704 }, - - /** - * This error indicates that ::cuCtxDisablePeerAccess() is - * trying to disable peer access which has not been enabled yet - * via ::cuCtxEnablePeerAccess(). - */ - { "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED", 705 }, - - /** - * This error indicates that the primary context for the specified device - * has already been initialized. - */ - { "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE", 708 }, - - /** - * This error indicates that the context current to the calling thread - * has been destroyed using ::cuCtxDestroy }, or is a primary context which - * has not yet been initialized. - */ - { "CUDA_ERROR_CONTEXT_IS_DESTROYED", 709 }, - - /** - * A device-side assert triggered during kernel execution. The context - * cannot be used anymore, and must be destroyed. All existing device - * memory allocations from this context are invalid and must be - * reconstructed if the program is to continue using CUDA. - */ - { "CUDA_ERROR_ASSERT", 710 }, - - /** - * This error indicates that the hardware resources required to enable - * peer access have been exhausted for one or more of the devices - * passed to ::cuCtxEnablePeerAccess(). - */ - { "CUDA_ERROR_TOO_MANY_PEERS", 711 }, - - /** - * This error indicates that the memory range passed to ::cuMemHostRegister() - * has already been registered. - */ - { "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED", 712 }, - - /** - * This error indicates that the pointer passed to ::cuMemHostUnregister() - * does not correspond to any currently registered memory region. - */ - { "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED", 713 }, - - /** - * This error indicates that the attempted operation is not permitted. - */ - { "CUDA_ERROR_NOT_PERMITTED", 800 }, - - /** - * This error indicates that the attempted operation is not supported - * on the current system or device. - */ - { "CUDA_ERROR_NOT_SUPPORTED", 801 }, - - /** - * This indicates that an unknown internal error has occurred. - */ - { "CUDA_ERROR_UNKNOWN", 999 }, - { NULL, -1 } -}; - -// This is just a linear search through the array, since the error_id's are not -// always ocurring consecutively -const char * getCudaDrvErrorString(CUresult error_id) -{ - int index = 0; - while (sCudaDrvErrorString[index].error_id != error_id && - sCudaDrvErrorString[index].error_id != -1) - { - index++; - } - if (sCudaDrvErrorString[index].error_id == error_id) - return (const char *)sCudaDrvErrorString[index].error_string; - else - return (const char *)"CUDA_ERROR not found!"; -} - -#endif diff --git a/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp b/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp index 98ed0bb7..d16c5b3c 100644 --- a/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp +++ b/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp @@ -43,288 +43,7 @@ #include #include "../timing.h" -#include -double rtc(void) -{ - struct timeval Tvalue; - double etime; - struct timezone dummy; - - gettimeofday(&Tvalue,&dummy); - etime = (double) Tvalue.tv_sec + - 1.e-6*((double) Tvalue.tv_usec); - return etime; -} - -#include -#include -#include -#include -#include "drvapi_error_string.h" - -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) -// These are the inline versions for all of the SDK helper functions -void __checkCudaErrors(CUresult err, const char *file, const int line) { - if(CUDA_SUCCESS != err) { - std::cerr << "checkCudeErrors() Driver API error = " << err << "\"" - << getCudaDrvErrorString(err) << "\" from file <" << file - << ", line " << line << "\n"; - exit(-1); - } -} - - -/**********************/ -/* Basic CUDriver API */ -CUcontext context; - -void createContext(const int deviceId = 0) -{ - CUdevice device; - int devCount; - checkCudaErrors(cuInit(0)); - checkCudaErrors(cuDeviceGetCount(&devCount)); - assert(devCount > 0); - checkCudaErrors(cuDeviceGet(&device, deviceId < devCount ? deviceId : 0)); - - char name[128]; - checkCudaErrors(cuDeviceGetName(name, 128, device)); - std::cout << "Using CUDA Device [0]: " << name << "\n"; - - int devMajor, devMinor; - checkCudaErrors(cuDeviceComputeCapability(&devMajor, &devMinor, device)); - std::cout << "Device Compute Capability: " - << devMajor << "." << devMinor << "\n"; - if (devMajor < 2) { - std::cerr << "ERROR: Device 0 is not SM 2.0 or greater\n"; - exit(1); - } - - // Create driver context - checkCudaErrors(cuCtxCreate(&context, 0, device)); -} -void destroyContext() -{ - checkCudaErrors(cuCtxDestroy(context)); -} - -CUmodule loadModule(const char * module) -{ - const double t0 = rtc(); - CUmodule cudaModule; - // in this branch we use compilation with parameters - -#if 0 - unsigned int jitNumOptions = 1; - CUjit_option *jitOptions = new CUjit_option[jitNumOptions]; - void **jitOptVals = new void*[jitNumOptions]; - // set up pointer to set the Maximum # of registers for a particular kernel - jitOptions[0] = CU_JIT_MAX_REGISTERS; - int jitRegCount = 64; - jitOptVals[0] = (void *)(size_t)jitRegCount; -#if 0 - - { - jitNumOptions = 3; - // set up size of compilation log buffer - jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - int jitLogBufferSize = 1024; - jitOptVals[0] = (void *)(size_t)jitLogBufferSize; - - // set up pointer to the compilation log buffer - jitOptions[1] = CU_JIT_INFO_LOG_BUFFER; - char *jitLogBuffer = new char[jitLogBufferSize]; - jitOptVals[1] = jitLogBuffer; - - // set up pointer to set the Maximum # of registers for a particular kernel - jitOptions[2] = CU_JIT_MAX_REGISTERS; - int jitRegCount = 32; - jitOptVals[2] = (void *)(size_t)jitRegCount; - } -#endif - - checkCudaErrors(cuModuleLoadDataEx(&cudaModule, module,jitNumOptions, jitOptions, (void **)jitOptVals)); -#else - CUlinkState CUState; - CUlinkState *lState = &CUState; - const int nOptions = 7; - CUjit_option options[nOptions]; - void* optionVals[nOptions]; - float walltime; - const unsigned int logSize = 32768; - char error_log[logSize], - info_log[logSize]; - void *cuOut; - size_t outSize; - int myErr = 0; - - // Setup linker options - // Return walltime from JIT compilation - options[0] = CU_JIT_WALL_TIME; - optionVals[0] = (void*) &walltime; - // Pass a buffer for info messages - options[1] = CU_JIT_INFO_LOG_BUFFER; - optionVals[1] = (void*) info_log; - // Pass the size of the info buffer - options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - optionVals[2] = (void*) logSize; - // Pass a buffer for error message - options[3] = CU_JIT_ERROR_LOG_BUFFER; - optionVals[3] = (void*) error_log; - // Pass the size of the error buffer - options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; - optionVals[4] = (void*) logSize; - // Make the linker verbose - options[5] = CU_JIT_LOG_VERBOSE; - optionVals[5] = (void*) 1; - // Max # of registers/pthread - options[6] = CU_JIT_MAX_REGISTERS; - int jitRegCount = 48; - optionVals[6] = (void *)(size_t)jitRegCount; - - // Create a pending linker invocation - checkCudaErrors(cuLinkCreate(nOptions,options, optionVals, lState)); - -#if 0 - if (sizeof(void *)==4) - { - // Load the PTX from the string myPtx32 - printf("Loading myPtx32[] program\n"); - // PTX May also be loaded from file, as per below. - myErr = cuLinkAddData(*lState, CU_JIT_INPUT_PTX, (void*)myPtx32, strlen(myPtx32)+1, 0, 0, 0, 0); - } - else -#endif - { - // Load the PTX from the string myPtx (64-bit) - fprintf(stderr, "Loading ptx..\n"); - myErr = cuLinkAddData(*lState, CU_JIT_INPUT_PTX, (void*)module, strlen(module)+1, 0, 0, 0, 0); - myErr = cuLinkAddFile(*lState, CU_JIT_INPUT_LIBRARY, "libcudadevrt.a", 0,0,0); - // PTX May also be loaded from file, as per below. - // myErr = cuLinkAddFile(*lState, CU_JIT_INPUT_PTX, "myPtx64.ptx",0,0,0); - } - - // Complete the linker step - myErr = cuLinkComplete(*lState, &cuOut, &outSize); - - if ( myErr != CUDA_SUCCESS ) - { - // Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above. - fprintf(stderr,"PTX Linker Error:\n%s\n",error_log); - assert(0); - } - - // Linker walltime and info_log were requested in options above. - fprintf(stderr, "CUDA Link Completed in %fms [ %g ms]. Linker Output:\n%s\n",walltime,info_log,1e3*(rtc() - t0)); - - // Load resulting cuBin into module - checkCudaErrors(cuModuleLoadData(&cudaModule, cuOut)); - - // Destroy the linker invocation - checkCudaErrors(cuLinkDestroy(*lState)); -#endif - fprintf(stderr, " loadModule took %g ms \n", 1e3*(rtc() - t0)); - return cudaModule; -} -void unloadModule(CUmodule &cudaModule) -{ - checkCudaErrors(cuModuleUnload(cudaModule)); -} - -CUfunction getFunction(CUmodule &cudaModule, const char * function) -{ - CUfunction cudaFunction; - checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function)); - return cudaFunction; -} - -CUdeviceptr deviceMalloc(const size_t size) -{ - CUdeviceptr d_buf; - checkCudaErrors(cuMemAlloc(&d_buf, size)); - return d_buf; -} -void deviceFree(CUdeviceptr d_buf) -{ - checkCudaErrors(cuMemFree(d_buf)); -} -void memcpyD2H(void * h_buf, CUdeviceptr d_buf, const size_t size) -{ - checkCudaErrors(cuMemcpyDtoH(h_buf, d_buf, size)); -} -void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size) -{ - checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size)); -} -#define deviceLaunch(func,params) \ - checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_EQUAL)); \ - checkCudaErrors( \ - cuLaunchKernel( \ - (func), \ - 1,1,1, \ - 32, 1, 1, \ - 0, NULL, (params), NULL \ - )); - -typedef CUdeviceptr devicePtr; - - -/**************/ -#include -std::vector readBinary(const char * filename) -{ - std::vector buffer; - FILE *fp = fopen(filename, "rb"); - if (!fp ) - { - fprintf(stderr, "file %s not found\n", filename); - assert(0); - } -#if 0 - char c; - while ((c = fgetc(fp)) != EOF) - buffer.push_back(c); -#else - fseek(fp, 0, SEEK_END); - const unsigned long long size = ftell(fp); /*calc the size needed*/ - fseek(fp, 0, SEEK_SET); - buffer.resize(size); - - if (fp == NULL){ /*ERROR detection if file == empty*/ - fprintf(stderr, "Error: There was an Error reading the file %s \n",filename); - exit(1); - } - else if (fread(&buffer[0], sizeof(char), size, fp) != size){ /* if count of read bytes != calculated size of .bin file -> ERROR*/ - fprintf(stderr, "Error: There was an Error reading the file %s \n", filename); - exit(1); - } -#endif - fprintf(stderr, " read buffer of size= %d bytes \n", (int)buffer.size()); - return buffer; -} - -extern "C" -{ - double CUDALaunch( - void **handlePtr, - const char * func_name, - void **func_args) - { - const std::vector module_str = readBinary("__kernels.ptx"); - const char * module = &module_str[0]; - CUmodule cudaModule = loadModule(module); - CUfunction cudaFunction = getFunction(cudaModule, func_name); - const double t0 = rtc(); - deviceLaunch(cudaFunction, func_args); - checkCudaErrors(cuStreamSynchronize(0)); - const double dt = rtc() - t0; - unloadModule(cudaModule); - return dt; - } - -} - -/********************/ +#include "../cuda_ispc.h" extern void mandelbrot_serial(float x0, float y0, float x1, float y1,