diff --git a/examples_cuda/mandelbrot_tasks3d/Makefile b/examples_cuda/mandelbrot_tasks3d/Makefile index 3dd44d65..ad1a9b3a 100644 --- a/examples_cuda/mandelbrot_tasks3d/Makefile +++ b/examples_cuda/mandelbrot_tasks3d/Makefile @@ -2,7 +2,7 @@ EXAMPLE=mandelbrot_tasks3d CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp ISPC_SRC=mandelbrot_tasks3d.ispc -ISPC_IA_TARGETS=avx,sse2,sse4 +ISPC_IA_TARGETS=avx ISPC_ARM_TARGETS=neon include ../common.mk diff --git a/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp b/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp index 0f7f0884..57ce9ac7 100644 --- a/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp +++ b/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp @@ -43,6 +43,21 @@ #include #include "../timing.h" +#include + + +double rtc(void) +{ + struct timeval Tvalue; + double etime; + struct timezone dummy; + + gettimeofday(&Tvalue,&dummy); + etime = (double) Tvalue.tv_sec + + 1.e-6*((double) Tvalue.tv_usec); + return etime; +} + #include #include #include @@ -54,17 +69,17 @@ void __checkCudaErrors(CUresult err, const char *file, const int line) { if(CUDA_SUCCESS != err) { std::cerr << "checkCudeErrors() Driver API error = " << err << "\"" - << getCudaDrvErrorString(err) << "\" from file <" << file - << ", line " << line << "\n"; + << getCudaDrvErrorString(err) << "\" from file <" << file + << ", line " << line << "\n"; exit(-1); } } extern "C" void mandelbrot_ispc( - float x0, float y0, - float x1, float y1, - int width, int height, - int maxIterations, int output[]) ; + float x0, float y0, + float x1, float y1, + int width, int height, + int maxIterations, int output[]) ; /**********************/ @@ -118,7 +133,7 @@ CUfunction getFunction(CUmodule &cudaModule, const char * function) checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function)); return cudaFunction; } - + CUdeviceptr deviceMalloc(const size_t size) { CUdeviceptr d_buf; @@ -203,19 +218,19 @@ extern "C" CUmodule loadModule(const char * module_name, const char * module_data) { const ModuleMap::iterator it = findModule(module_name) - if (it != ModuleMap::end) - { - CUmodule cudaModule = loadModule(module); - module_list.insert(std::make_pair(std::string(module_name), cudaModule)); - return cudaModule - } + if (it != ModuleMap::end) + { + CUmodule cudaModule = loadModule(module); + module_list.insert(std::make_pair(std::string(module_name), cudaModule)); + return cudaModule + } return it->second; } void unloadModule(const char * module_name) { ModuleMap::iterator it = findModule(module_name) - if (it != ModuleMap::end) - module_list.erase(it); + if (it != ModuleMap::end) + module_list.erase(it); } }; #endif @@ -259,7 +274,7 @@ extern "C" fprintf(stderr, " module_name= %s \n", module_name); fprintf(stderr, " func_name= %s \n", func_name); -// fprintf(stderr, " ptx= %s \n", module); + // fprintf(stderr, " ptx= %s \n", module); fprintf(stderr, " x0= %g \n", *((float*)(func_args[0]))); fprintf(stderr, " dx= %g \n", *((float*)(func_args[1]))); fprintf(stderr, " y0= %g \n", *((float*)(func_args[2]))); @@ -289,114 +304,118 @@ extern "C" extern void mandelbrot_serial(float x0, float y0, float x1, float y1, - int width, int height, int maxIterations, - int output[]); + int width, int height, int maxIterations, + int output[]); /* Write a PPM image file with the image of the Mandelbrot set */ static void writePPM(int *buf, int width, int height, const char *fn) { - FILE *fp = fopen(fn, "wb"); - fprintf(fp, "P6\n"); - fprintf(fp, "%d %d\n", width, height); - fprintf(fp, "255\n"); - for (int i = 0; i < width*height; ++i) { - // Map the iteration count to colors by just alternating between - // two greys. - char c = (buf[i] & 0x1) ? 240 : 20; - for (int j = 0; j < 3; ++j) - fputc(c, fp); - } - fclose(fp); - printf("Wrote image file %s\n", fn); + FILE *fp = fopen(fn, "wb"); + fprintf(fp, "P6\n"); + fprintf(fp, "%d %d\n", width, height); + fprintf(fp, "255\n"); + for (int i = 0; i < width*height; ++i) { + // Map the iteration count to colors by just alternating between + // two greys. + char c = (buf[i] & 0x1) ? 240 : 20; + for (int j = 0; j < 3; ++j) + fputc(c, fp); + } + fclose(fp); + printf("Wrote image file %s\n", fn); } static void usage() { - fprintf(stderr, "usage: mandelbrot [--scale=]\n"); - exit(1); + fprintf(stderr, "usage: mandelbrot [--scale=]\n"); + exit(1); } int main(int argc, char *argv[]) { - unsigned int width = 1536; - unsigned int height = 1024; - float x0 = -2; - float x1 = 1; - float y0 = -1; - float y1 = 1; + unsigned int width = 1536; + unsigned int height = 1024; + float x0 = -2; + float x1 = 1; + float y0 = -1; + float y1 = 1; - if (argc == 1) - ; - else if (argc == 2) { - if (strncmp(argv[1], "--scale=", 8) == 0) { - float scale = atof(argv[1] + 8); - if (scale == 0.f) - usage(); - width *= scale; - height *= scale; - // round up to multiples of 16 - width = (width + 0xf) & ~0xf; - height = (height + 0xf) & ~0xf; - } - else - usage(); - } - else + if (argc == 1) + ; + else if (argc == 2) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + if (scale == 0.f) usage(); + width *= scale; + height *= scale; + // round up to multiples of 16 + width = (width + 0xf) & ~0xf; + height = (height + 0xf) & ~0xf; + } + else + usage(); + } + else + usage(); - /*******************/ - createContext(); - /*******************/ + /*******************/ + createContext(); + /*******************/ - int maxIterations = 512; - int *buf = new int[width*height]; + int maxIterations = 512; + int *buf = new int[width*height]; - for (unsigned int i = 0; i < width*height; i++) + for (unsigned int i = 0; i < width*height; i++) + buf[i] = 0; + const size_t bufsize = sizeof(int)*width*height; + devicePtr d_buf = deviceMalloc(bufsize); + memcpyH2D(d_buf, buf, bufsize); + + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; +#if 1 + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) buf[i] = 0; - const size_t bufsize = sizeof(int)*width*height; - devicePtr d_buf = deviceMalloc(bufsize); - memcpyH2D(d_buf, buf, bufsize); + reset_and_start_timer(); + const double t0 = rtc(); + mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf); + double dt = rtc() - t0; //get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } +#endif - // - // Compute the image using the ispc implementation; report the minimum - // time of three runs. - // - double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { - // Clear out the buffer - for (unsigned int i = 0; i < width * height; ++i) - buf[i] = 0; - reset_and_start_timer(); - mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf); - double dt = get_elapsed_mcycles(); - minISPC = std::min(minISPC, dt); - } + memcpyD2H(buf, d_buf, bufsize); + deviceFree(d_buf); - memcpyD2H(buf, d_buf, bufsize); - deviceFree(d_buf); - - printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); - writePPM(buf, width, height, "mandelbrot-ispc.ppm"); + printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); + writePPM(buf, width, height, "mandelbrot-ispc.ppm"); - // - // And run the serial implementation 3 times, again reporting the - // minimum time. - // - double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { - // Clear out the buffer - for (unsigned int i = 0; i < width * height; ++i) - buf[i] = 0; - reset_and_start_timer(); - mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); - double dt = get_elapsed_mcycles(); - minSerial = std::min(minSerial, dt); - } + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + const double t0 = rtc(); + mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = rtc() - t0; //get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } - printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); - writePPM(buf, width, height, "mandelbrot-serial.ppm"); + printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); + writePPM(buf, width, height, "mandelbrot-serial.ppm"); - printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); + printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); - return 0; + return 0; } diff --git a/examples_cuda/mandelbrot_tasks3d/mandelbrot_tasks3d b/examples_cuda/mandelbrot_tasks3d/mandelbrot_tasks3d deleted file mode 100755 index c3052fec..00000000 Binary files a/examples_cuda/mandelbrot_tasks3d/mandelbrot_tasks3d and /dev/null differ diff --git a/examples_cuda/stencil/stencil_cu b/examples_cuda/stencil/stencil_cu index 40e4a9ba..1667a10b 100755 Binary files a/examples_cuda/stencil/stencil_cu and b/examples_cuda/stencil/stencil_cu differ diff --git a/examples_cuda/stencil/stencil_cu.cpp b/examples_cuda/stencil/stencil_cu.cpp index 9cdd1050..f23809a1 100644 --- a/examples_cuda/stencil/stencil_cu.cpp +++ b/examples_cuda/stencil/stencil_cu.cpp @@ -49,14 +49,28 @@ using namespace ispc; #include #include #include "drvapi_error_string.h" +#include + + +double rtc(void) +{ + struct timeval Tvalue; + double etime; + struct timezone dummy; + + gettimeofday(&Tvalue,&dummy); + etime = (double) Tvalue.tv_sec + + 1.e-6*((double) Tvalue.tv_usec); + return etime; +} #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) // These are the inline versions for all of the SDK helper functions void __checkCudaErrors(CUresult err, const char *file, const int line) { if(CUDA_SUCCESS != err) { std::cerr << "checkCudeErrors() Driver API error = " << err << "\"" - << getCudaDrvErrorString(err) << "\" from file <" << file - << ", line " << line << "\n"; + << getCudaDrvErrorString(err) << "\" from file <" << file + << ", line " << line << "\n"; exit(-1); } } @@ -112,7 +126,7 @@ CUfunction getFunction(CUmodule &cudaModule, const char * function) checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function)); return cudaFunction; } - + CUdeviceptr deviceMalloc(const size_t size) { CUdeviceptr d_buf; @@ -133,13 +147,13 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size) } #define deviceLaunch(func,nbx,nby,nbz,params) \ checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \ - checkCudaErrors( \ - cuLaunchKernel( \ - (func), \ - ((nbx-1)/(128/32)+1), (nby), (nbz), \ - 128, 1, 1, \ - 0, NULL, (params), NULL \ - )); +checkCudaErrors( \ + cuLaunchKernel( \ + (func), \ + ((nbx-1)/(128/32)+1), (nby), (nbz), \ + 128, 1, 1, \ + 0, NULL, (params), NULL \ + )); typedef CUdeviceptr devicePtr; @@ -272,6 +286,7 @@ int main() { // the minimum time of three runs. // double minTimeISPC = 1e30; +#if 0 for (int i = 0; i < 3; ++i) { reset_and_start_timer(); loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width, @@ -280,6 +295,7 @@ int main() { double dt = get_elapsed_mcycles(); minTimeISPC = std::min(minTimeISPC, dt); } +#endif printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC); @@ -296,10 +312,11 @@ int main() { double minTimeISPCTasks = 1e30; for (int i = 0; i < 3; ++i) { reset_and_start_timer(); + const double t0 = rtc(); loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width, width, Nz - width, Nx, Ny, Nz, (double*)d_coeff, (double*)d_vsq, (double*)d_Aispc0, (double*)d_Aispc1); - double dt = get_elapsed_mcycles(); + double dt = rtc() - t0; //get_elapsed_mcycles(); minTimeISPCTasks = std::min(minTimeISPCTasks, dt); } memcpyD2H(Aispc[1], d_Aispc1, bufsize); diff --git a/examples_cuda/volume_rendering/Makefile b/examples_cuda/volume_rendering/Makefile index ca2d0958..032f3cbc 100644 --- a/examples_cuda/volume_rendering/Makefile +++ b/examples_cuda/volume_rendering/Makefile @@ -1,7 +1,7 @@ EXAMPLE=volume CPP_SRC=volume.cpp volume_serial.cpp -ISPC_SRC=volume.ispc +ISPC_SRC=volume1.ispc ISPC_IA_TARGETS=avx include ../common.mk diff --git a/examples_cuda/volume_rendering/camera.dat b/examples_cuda/volume_rendering/camera.dat index 555ac769..557c0443 100644 --- a/examples_cuda/volume_rendering/camera.dat +++ b/examples_cuda/volume_rendering/camera.dat @@ -1,4 +1,4 @@ -896 1184 +1792 2368 0.000155 0.000000 0.000000 -0.069927 0.000000 -0.000155 0.000000 0.093236 diff --git a/examples_cuda/volume_rendering/volume.cpp b/examples_cuda/volume_rendering/volume.cpp index 458cd407..17f0fe7b 100644 --- a/examples_cuda/volume_rendering/volume.cpp +++ b/examples_cuda/volume_rendering/volume.cpp @@ -44,64 +44,79 @@ #include "volume_ispc.h" using namespace ispc; +#include + + +static inline double rtc(void) +{ + struct timeval Tvalue; + double etime; + struct timezone dummy; + + gettimeofday(&Tvalue,&dummy); + etime = (double) Tvalue.tv_sec + + 1.e-6*((double) Tvalue.tv_usec); + return etime; +} + extern void volume_serial(float density[], int nVoxels[3], - const float raster2camera[4][4], - const float camera2world[4][4], - int width, int height, float image[]); + const float raster2camera[4][4], + const float camera2world[4][4], + int width, int height, float image[]); /* Write a PPM image file with the image */ static void writePPM(float *buf, int width, int height, const char *fn) { - FILE *fp = fopen(fn, "wb"); - fprintf(fp, "P6\n"); - fprintf(fp, "%d %d\n", width, height); - fprintf(fp, "255\n"); - for (int i = 0; i < width*height; ++i) { - float v = buf[i] * 255.f; - if (v < 0.f) v = 0.f; - else if (v > 255.f) v = 255.f; - unsigned char c = (unsigned char)v; - for (int j = 0; j < 3; ++j) - fputc(c, fp); - } - fclose(fp); - printf("Wrote image file %s\n", fn); + FILE *fp = fopen(fn, "wb"); + fprintf(fp, "P6\n"); + fprintf(fp, "%d %d\n", width, height); + fprintf(fp, "255\n"); + for (int i = 0; i < width*height; ++i) { + float v = buf[i] * 255.f; + if (v < 0.f) v = 0.f; + else if (v > 255.f) v = 255.f; + unsigned char c = (unsigned char)v; + for (int j = 0; j < 3; ++j) + fputc(c, fp); + } + fclose(fp); + printf("Wrote image file %s\n", fn); } /* Load image and viewing parameters from a camera data file. - FIXME: we should add support to be able to specify viewing parameters - in the program here directly. */ +FIXME: we should add support to be able to specify viewing parameters +in the program here directly. */ static void loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4], - float camera2world[4][4]) { - FILE *f = fopen(fn, "r"); - if (!f) { - perror(fn); - exit(1); - } - if (fscanf(f, "%d %d", width, height) != 2) { + float camera2world[4][4]) { + FILE *f = fopen(fn, "r"); + if (!f) { + perror(fn); + exit(1); + } + if (fscanf(f, "%d %d", width, height) != 2) { + fprintf(stderr, "Unexpected end of file in camera file\n"); + exit(1); + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + if (fscanf(f, "%f", &raster2camera[i][j]) != 1) { fprintf(stderr, "Unexpected end of file in camera file\n"); exit(1); + } } - - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 4; ++j) { - if (fscanf(f, "%f", &raster2camera[i][j]) != 1) { - fprintf(stderr, "Unexpected end of file in camera file\n"); - exit(1); - } - } + } + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + if (fscanf(f, "%f", &camera2world[i][j]) != 1) { + fprintf(stderr, "Unexpected end of file in camera file\n"); + exit(1); + } } - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 4; ++j) { - if (fscanf(f, "%f", &camera2world[i][j]) != 1) { - fprintf(stderr, "Unexpected end of file in camera file\n"); - exit(1); - } - } - } - fclose(f); + } + fclose(f); } @@ -110,105 +125,108 @@ loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4], floating-point values (also as strings) to give the densities. */ static float * loadVolume(const char *fn, int n[3]) { - FILE *f = fopen(fn, "r"); - if (!f) { - perror(fn); - exit(1); - } + FILE *f = fopen(fn, "r"); + if (!f) { + perror(fn); + exit(1); + } - if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) { - fprintf(stderr, "Couldn't find resolution at start of density file\n"); - exit(1); - } + if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) { + fprintf(stderr, "Couldn't find resolution at start of density file\n"); + exit(1); + } - int count = n[0] * n[1] * n[2]; - float *v = new float[count]; - for (int i = 0; i < count; ++i) { - if (fscanf(f, "%f", &v[i]) != 1) { - fprintf(stderr, "Unexpected end of file at %d'th density value\n", i); - exit(1); - } + int count = n[0] * n[1] * n[2]; + float *v = new float[count]; + for (int i = 0; i < count; ++i) { + if (fscanf(f, "%f", &v[i]) != 1) { + fprintf(stderr, "Unexpected end of file at %d'th density value\n", i); + exit(1); } + } - return v; + return v; } int main(int argc, char *argv[]) { - if (argc != 3) { - fprintf(stderr, "usage: volume \n"); - return 1; - } + if (argc != 3) { + fprintf(stderr, "usage: volume \n"); + return 1; + } - // - // Load viewing data and the volume density data - // - int width, height; - float raster2camera[4][4], camera2world[4][4]; - loadCamera(argv[1], &width, &height, raster2camera, camera2world); - float *image = new float[width*height]; + // + // Load viewing data and the volume density data + // + int width, height; + float raster2camera[4][4], camera2world[4][4]; + loadCamera(argv[1], &width, &height, raster2camera, camera2world); + float *image = new float[width*height]; - int n[3]; - float *density = loadVolume(argv[2], n); + int n[3]; + float *density = loadVolume(argv[2], n); - // - // Compute the image using the ispc implementation; report the minimum - // time of three runs. - // - double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { - reset_and_start_timer(); - volume_ispc(density, n, raster2camera, camera2world, - width, height, image); - double dt = get_elapsed_mcycles(); - minISPC = std::min(minISPC, dt); - } + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; +#if 0 + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + volume_ispc(density, n, raster2camera, camera2world, + width, height, image); + double dt = get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } +#endif - printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC); - writePPM(image, width, height, "volume-ispc-1core.ppm"); + printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC); + writePPM(image, width, height, "volume-ispc-1core.ppm"); - // Clear out the buffer - for (int i = 0; i < width * height; ++i) - image[i] = 0.; + // Clear out the buffer + for (int i = 0; i < width * height; ++i) + image[i] = 0.; - // - // Compute the image using the ispc implementation that also uses - // tasks; report the minimum time of three runs. - // - double minISPCtasks = 1e30; - for (int i = 0; i < 3; ++i) { - reset_and_start_timer(); - volume_ispc_tasks(density, n, raster2camera, camera2world, - width, height, image); - double dt = get_elapsed_mcycles(); - minISPCtasks = std::min(minISPCtasks, dt); - } + // + // Compute the image using the ispc implementation that also uses + // tasks; report the minimum time of three runs. + // + double minISPCtasks = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + const double t0 = rtc(); + volume_ispc_tasks(density, n, raster2camera, camera2world, + width, height, image); + double dt = rtc() - t0; //get_elapsed_mcycles(); + minISPCtasks = std::min(minISPCtasks, dt); + } - printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks); - writePPM(image, width, height, "volume-ispc-tasks.ppm"); + printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks); + writePPM(image, width, height, "volume-ispc-tasks.ppm"); - // Clear out the buffer - for (int i = 0; i < width * height; ++i) - image[i] = 0.; + // Clear out the buffer + for (int i = 0; i < width * height; ++i) + image[i] = 0.; - // - // And run the serial implementation 3 times, again reporting the - // minimum time. - // - double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { - reset_and_start_timer(); - volume_serial(density, n, raster2camera, camera2world, - width, height, image); - double dt = get_elapsed_mcycles(); - minSerial = std::min(minSerial, dt); - } + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + volume_serial(density, n, raster2camera, camera2world, + width, height, image); + double dt = get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } - printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial); - writePPM(image, width, height, "volume-serial.ppm"); + printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial); + writePPM(image, width, height, "volume-serial.ppm"); - printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", - minSerial/minISPC, minSerial / minISPCtasks); + printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", + minSerial/minISPC, minSerial / minISPCtasks); - return 0; + return 0; } diff --git a/examples_cuda/volume_rendering/volume_cu.cpp b/examples_cuda/volume_rendering/volume_cu.cpp index b020d9a7..b0d52374 100644 --- a/examples_cuda/volume_rendering/volume_cu.cpp +++ b/examples_cuda/volume_rendering/volume_cu.cpp @@ -44,6 +44,19 @@ #include "volume_ispc.h" using namespace ispc; +#include +static inline double rtc(void) +{ + struct timeval Tvalue; + double etime; + struct timezone dummy; + + gettimeofday(&Tvalue,&dummy); + etime = (double) Tvalue.tv_sec + + 1.e-6*((double) Tvalue.tv_usec); + return etime; +} + #include #include #include @@ -414,6 +427,7 @@ int main(int argc, char *argv[]) { double minISPCtasks = 1e30; for (int i = 0; i < 3; ++i) { reset_and_start_timer(); + const double t0 = rtc(); volume_ispc_tasks( (float*)d_density, (int*)d_n, @@ -421,7 +435,7 @@ int main(int argc, char *argv[]) { (float(*)[4])d_camera2world, width, height, (float*)d_image); - double dt = get_elapsed_mcycles(); + double dt = rtc() - t0; //get_elapsed_mcycles(); minISPCtasks = std::min(minISPCtasks, dt); }