+added wc-timer

This commit is contained in:
Evghenii
2013-11-08 15:27:51 +01:00
parent ce5f8cd46f
commit eb8e1a2160
9 changed files with 309 additions and 241 deletions

View File

@@ -2,7 +2,7 @@
EXAMPLE=mandelbrot_tasks3d EXAMPLE=mandelbrot_tasks3d
CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp
ISPC_SRC=mandelbrot_tasks3d.ispc ISPC_SRC=mandelbrot_tasks3d.ispc
ISPC_IA_TARGETS=avx,sse2,sse4 ISPC_IA_TARGETS=avx
ISPC_ARM_TARGETS=neon ISPC_ARM_TARGETS=neon
include ../common.mk include ../common.mk

View File

@@ -43,6 +43,21 @@
#include <string.h> #include <string.h>
#include "../timing.h" #include "../timing.h"
#include <sys/time.h>
double rtc(void)
{
struct timeval Tvalue;
double etime;
struct timezone dummy;
gettimeofday(&Tvalue,&dummy);
etime = (double) Tvalue.tv_sec +
1.e-6*((double) Tvalue.tv_usec);
return etime;
}
#include <iostream> #include <iostream>
#include <cuda.h> #include <cuda.h>
#include <vector> #include <vector>
@@ -54,17 +69,17 @@
void __checkCudaErrors(CUresult err, const char *file, const int line) { void __checkCudaErrors(CUresult err, const char *file, const int line) {
if(CUDA_SUCCESS != err) { if(CUDA_SUCCESS != err) {
std::cerr << "checkCudeErrors() Driver API error = " << err << "\"" std::cerr << "checkCudeErrors() Driver API error = " << err << "\""
<< getCudaDrvErrorString(err) << "\" from file <" << file << getCudaDrvErrorString(err) << "\" from file <" << file
<< ", line " << line << "\n"; << ", line " << line << "\n";
exit(-1); exit(-1);
} }
} }
extern "C" extern "C"
void mandelbrot_ispc( void mandelbrot_ispc(
float x0, float y0, float x0, float y0,
float x1, float y1, float x1, float y1,
int width, int height, int width, int height,
int maxIterations, int output[]) ; int maxIterations, int output[]) ;
/**********************/ /**********************/
@@ -118,7 +133,7 @@ CUfunction getFunction(CUmodule &cudaModule, const char * function)
checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function)); checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function));
return cudaFunction; return cudaFunction;
} }
CUdeviceptr deviceMalloc(const size_t size) CUdeviceptr deviceMalloc(const size_t size)
{ {
CUdeviceptr d_buf; CUdeviceptr d_buf;
@@ -203,19 +218,19 @@ extern "C"
CUmodule loadModule(const char * module_name, const char * module_data) CUmodule loadModule(const char * module_name, const char * module_data)
{ {
const ModuleMap::iterator it = findModule(module_name) const ModuleMap::iterator it = findModule(module_name)
if (it != ModuleMap::end) if (it != ModuleMap::end)
{ {
CUmodule cudaModule = loadModule(module); CUmodule cudaModule = loadModule(module);
module_list.insert(std::make_pair(std::string(module_name), cudaModule)); module_list.insert(std::make_pair(std::string(module_name), cudaModule));
return cudaModule return cudaModule
} }
return it->second; return it->second;
} }
void unloadModule(const char * module_name) void unloadModule(const char * module_name)
{ {
ModuleMap::iterator it = findModule(module_name) ModuleMap::iterator it = findModule(module_name)
if (it != ModuleMap::end) if (it != ModuleMap::end)
module_list.erase(it); module_list.erase(it);
} }
}; };
#endif #endif
@@ -259,7 +274,7 @@ extern "C"
fprintf(stderr, " module_name= %s \n", module_name); fprintf(stderr, " module_name= %s \n", module_name);
fprintf(stderr, " func_name= %s \n", func_name); fprintf(stderr, " func_name= %s \n", func_name);
// fprintf(stderr, " ptx= %s \n", module); // fprintf(stderr, " ptx= %s \n", module);
fprintf(stderr, " x0= %g \n", *((float*)(func_args[0]))); fprintf(stderr, " x0= %g \n", *((float*)(func_args[0])));
fprintf(stderr, " dx= %g \n", *((float*)(func_args[1]))); fprintf(stderr, " dx= %g \n", *((float*)(func_args[1])));
fprintf(stderr, " y0= %g \n", *((float*)(func_args[2]))); fprintf(stderr, " y0= %g \n", *((float*)(func_args[2])));
@@ -289,114 +304,118 @@ extern "C"
extern void mandelbrot_serial(float x0, float y0, float x1, float y1, extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
int width, int height, int maxIterations, int width, int height, int maxIterations,
int output[]); int output[]);
/* Write a PPM image file with the image of the Mandelbrot set */ /* Write a PPM image file with the image of the Mandelbrot set */
static void static void
writePPM(int *buf, int width, int height, const char *fn) { writePPM(int *buf, int width, int height, const char *fn) {
FILE *fp = fopen(fn, "wb"); FILE *fp = fopen(fn, "wb");
fprintf(fp, "P6\n"); fprintf(fp, "P6\n");
fprintf(fp, "%d %d\n", width, height); fprintf(fp, "%d %d\n", width, height);
fprintf(fp, "255\n"); fprintf(fp, "255\n");
for (int i = 0; i < width*height; ++i) { for (int i = 0; i < width*height; ++i) {
// Map the iteration count to colors by just alternating between // Map the iteration count to colors by just alternating between
// two greys. // two greys.
char c = (buf[i] & 0x1) ? 240 : 20; char c = (buf[i] & 0x1) ? 240 : 20;
for (int j = 0; j < 3; ++j) for (int j = 0; j < 3; ++j)
fputc(c, fp); fputc(c, fp);
} }
fclose(fp); fclose(fp);
printf("Wrote image file %s\n", fn); printf("Wrote image file %s\n", fn);
} }
static void usage() { static void usage() {
fprintf(stderr, "usage: mandelbrot [--scale=<factor>]\n"); fprintf(stderr, "usage: mandelbrot [--scale=<factor>]\n");
exit(1); exit(1);
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
unsigned int width = 1536; unsigned int width = 1536;
unsigned int height = 1024; unsigned int height = 1024;
float x0 = -2; float x0 = -2;
float x1 = 1; float x1 = 1;
float y0 = -1; float y0 = -1;
float y1 = 1; float y1 = 1;
if (argc == 1) if (argc == 1)
; ;
else if (argc == 2) { else if (argc == 2) {
if (strncmp(argv[1], "--scale=", 8) == 0) { if (strncmp(argv[1], "--scale=", 8) == 0) {
float scale = atof(argv[1] + 8); float scale = atof(argv[1] + 8);
if (scale == 0.f) if (scale == 0.f)
usage();
width *= scale;
height *= scale;
// round up to multiples of 16
width = (width + 0xf) & ~0xf;
height = (height + 0xf) & ~0xf;
}
else
usage();
}
else
usage(); usage();
width *= scale;
height *= scale;
// round up to multiples of 16
width = (width + 0xf) & ~0xf;
height = (height + 0xf) & ~0xf;
}
else
usage();
}
else
usage();
/*******************/ /*******************/
createContext(); createContext();
/*******************/ /*******************/
int maxIterations = 512; int maxIterations = 512;
int *buf = new int[width*height]; int *buf = new int[width*height];
for (unsigned int i = 0; i < width*height; i++) for (unsigned int i = 0; i < width*height; i++)
buf[i] = 0;
const size_t bufsize = sizeof(int)*width*height;
devicePtr d_buf = deviceMalloc(bufsize);
memcpyH2D(d_buf, buf, bufsize);
//
// Compute the image using the ispc implementation; report the minimum
// time of three runs.
//
double minISPC = 1e30;
#if 1
for (int i = 0; i < 3; ++i) {
// Clear out the buffer
for (unsigned int i = 0; i < width * height; ++i)
buf[i] = 0; buf[i] = 0;
const size_t bufsize = sizeof(int)*width*height; reset_and_start_timer();
devicePtr d_buf = deviceMalloc(bufsize); const double t0 = rtc();
memcpyH2D(d_buf, buf, bufsize); mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf);
double dt = rtc() - t0; //get_elapsed_mcycles();
minISPC = std::min(minISPC, dt);
}
#endif
// memcpyD2H(buf, d_buf, bufsize);
// Compute the image using the ispc implementation; report the minimum deviceFree(d_buf);
// time of three runs.
//
double minISPC = 1e30;
for (int i = 0; i < 3; ++i) {
// Clear out the buffer
for (unsigned int i = 0; i < width * height; ++i)
buf[i] = 0;
reset_and_start_timer();
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf);
double dt = get_elapsed_mcycles();
minISPC = std::min(minISPC, dt);
}
memcpyD2H(buf, d_buf, bufsize); printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
deviceFree(d_buf); writePPM(buf, width, height, "mandelbrot-ispc.ppm");
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
// //
// And run the serial implementation 3 times, again reporting the // And run the serial implementation 3 times, again reporting the
// minimum time. // minimum time.
// //
double minSerial = 1e30; double minSerial = 1e30;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
// Clear out the buffer // Clear out the buffer
for (unsigned int i = 0; i < width * height; ++i) for (unsigned int i = 0; i < width * height; ++i)
buf[i] = 0; buf[i] = 0;
reset_and_start_timer(); reset_and_start_timer();
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); const double t0 = rtc();
double dt = get_elapsed_mcycles(); mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
minSerial = std::min(minSerial, dt); double dt = rtc() - t0; //get_elapsed_mcycles();
} minSerial = std::min(minSerial, dt);
}
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
writePPM(buf, width, height, "mandelbrot-serial.ppm"); writePPM(buf, width, height, "mandelbrot-serial.ppm");
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
return 0; return 0;
} }

Binary file not shown.

View File

@@ -49,14 +49,28 @@ using namespace ispc;
#include <iostream> #include <iostream>
#include <cuda.h> #include <cuda.h>
#include "drvapi_error_string.h" #include "drvapi_error_string.h"
#include <sys/time.h>
double rtc(void)
{
struct timeval Tvalue;
double etime;
struct timezone dummy;
gettimeofday(&Tvalue,&dummy);
etime = (double) Tvalue.tv_sec +
1.e-6*((double) Tvalue.tv_usec);
return etime;
}
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
// These are the inline versions for all of the SDK helper functions // These are the inline versions for all of the SDK helper functions
void __checkCudaErrors(CUresult err, const char *file, const int line) { void __checkCudaErrors(CUresult err, const char *file, const int line) {
if(CUDA_SUCCESS != err) { if(CUDA_SUCCESS != err) {
std::cerr << "checkCudeErrors() Driver API error = " << err << "\"" std::cerr << "checkCudeErrors() Driver API error = " << err << "\""
<< getCudaDrvErrorString(err) << "\" from file <" << file << getCudaDrvErrorString(err) << "\" from file <" << file
<< ", line " << line << "\n"; << ", line " << line << "\n";
exit(-1); exit(-1);
} }
} }
@@ -112,7 +126,7 @@ CUfunction getFunction(CUmodule &cudaModule, const char * function)
checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function)); checkCudaErrors(cuModuleGetFunction(&cudaFunction, cudaModule, function));
return cudaFunction; return cudaFunction;
} }
CUdeviceptr deviceMalloc(const size_t size) CUdeviceptr deviceMalloc(const size_t size)
{ {
CUdeviceptr d_buf; CUdeviceptr d_buf;
@@ -133,13 +147,13 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size)
} }
#define deviceLaunch(func,nbx,nby,nbz,params) \ #define deviceLaunch(func,nbx,nby,nbz,params) \
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \ checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \
checkCudaErrors( \ checkCudaErrors( \
cuLaunchKernel( \ cuLaunchKernel( \
(func), \ (func), \
((nbx-1)/(128/32)+1), (nby), (nbz), \ ((nbx-1)/(128/32)+1), (nby), (nbz), \
128, 1, 1, \ 128, 1, 1, \
0, NULL, (params), NULL \ 0, NULL, (params), NULL \
)); ));
typedef CUdeviceptr devicePtr; typedef CUdeviceptr devicePtr;
@@ -272,6 +286,7 @@ int main() {
// the minimum time of three runs. // the minimum time of three runs.
// //
double minTimeISPC = 1e30; double minTimeISPC = 1e30;
#if 0
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width, loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
@@ -280,6 +295,7 @@ int main() {
double dt = get_elapsed_mcycles(); double dt = get_elapsed_mcycles();
minTimeISPC = std::min(minTimeISPC, dt); minTimeISPC = std::min(minTimeISPC, dt);
} }
#endif
printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC); printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC);
@@ -296,10 +312,11 @@ int main() {
double minTimeISPCTasks = 1e30; double minTimeISPCTasks = 1e30;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
const double t0 = rtc();
loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width, loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
width, Nz - width, Nx, Ny, Nz, (double*)d_coeff, (double*)d_vsq, width, Nz - width, Nx, Ny, Nz, (double*)d_coeff, (double*)d_vsq,
(double*)d_Aispc0, (double*)d_Aispc1); (double*)d_Aispc0, (double*)d_Aispc1);
double dt = get_elapsed_mcycles(); double dt = rtc() - t0; //get_elapsed_mcycles();
minTimeISPCTasks = std::min(minTimeISPCTasks, dt); minTimeISPCTasks = std::min(minTimeISPCTasks, dt);
} }
memcpyD2H(Aispc[1], d_Aispc1, bufsize); memcpyD2H(Aispc[1], d_Aispc1, bufsize);

View File

@@ -1,7 +1,7 @@
EXAMPLE=volume EXAMPLE=volume
CPP_SRC=volume.cpp volume_serial.cpp CPP_SRC=volume.cpp volume_serial.cpp
ISPC_SRC=volume.ispc ISPC_SRC=volume1.ispc
ISPC_IA_TARGETS=avx ISPC_IA_TARGETS=avx
include ../common.mk include ../common.mk

View File

@@ -1,4 +1,4 @@
896 1184 1792 2368
0.000155 0.000000 0.000000 -0.069927 0.000155 0.000000 0.000000 -0.069927
0.000000 -0.000155 0.000000 0.093236 0.000000 -0.000155 0.000000 0.093236

View File

@@ -44,64 +44,79 @@
#include "volume_ispc.h" #include "volume_ispc.h"
using namespace ispc; using namespace ispc;
#include <sys/time.h>
static inline double rtc(void)
{
struct timeval Tvalue;
double etime;
struct timezone dummy;
gettimeofday(&Tvalue,&dummy);
etime = (double) Tvalue.tv_sec +
1.e-6*((double) Tvalue.tv_usec);
return etime;
}
extern void volume_serial(float density[], int nVoxels[3], extern void volume_serial(float density[], int nVoxels[3],
const float raster2camera[4][4], const float raster2camera[4][4],
const float camera2world[4][4], const float camera2world[4][4],
int width, int height, float image[]); int width, int height, float image[]);
/* Write a PPM image file with the image */ /* Write a PPM image file with the image */
static void static void
writePPM(float *buf, int width, int height, const char *fn) { writePPM(float *buf, int width, int height, const char *fn) {
FILE *fp = fopen(fn, "wb"); FILE *fp = fopen(fn, "wb");
fprintf(fp, "P6\n"); fprintf(fp, "P6\n");
fprintf(fp, "%d %d\n", width, height); fprintf(fp, "%d %d\n", width, height);
fprintf(fp, "255\n"); fprintf(fp, "255\n");
for (int i = 0; i < width*height; ++i) { for (int i = 0; i < width*height; ++i) {
float v = buf[i] * 255.f; float v = buf[i] * 255.f;
if (v < 0.f) v = 0.f; if (v < 0.f) v = 0.f;
else if (v > 255.f) v = 255.f; else if (v > 255.f) v = 255.f;
unsigned char c = (unsigned char)v; unsigned char c = (unsigned char)v;
for (int j = 0; j < 3; ++j) for (int j = 0; j < 3; ++j)
fputc(c, fp); fputc(c, fp);
} }
fclose(fp); fclose(fp);
printf("Wrote image file %s\n", fn); printf("Wrote image file %s\n", fn);
} }
/* Load image and viewing parameters from a camera data file. /* Load image and viewing parameters from a camera data file.
FIXME: we should add support to be able to specify viewing parameters FIXME: we should add support to be able to specify viewing parameters
in the program here directly. */ in the program here directly. */
static void static void
loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4], loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4],
float camera2world[4][4]) { float camera2world[4][4]) {
FILE *f = fopen(fn, "r"); FILE *f = fopen(fn, "r");
if (!f) { if (!f) {
perror(fn); perror(fn);
exit(1); exit(1);
} }
if (fscanf(f, "%d %d", width, height) != 2) { if (fscanf(f, "%d %d", width, height) != 2) {
fprintf(stderr, "Unexpected end of file in camera file\n");
exit(1);
}
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
if (fscanf(f, "%f", &raster2camera[i][j]) != 1) {
fprintf(stderr, "Unexpected end of file in camera file\n"); fprintf(stderr, "Unexpected end of file in camera file\n");
exit(1); exit(1);
}
} }
}
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) { for (int j = 0; j < 4; ++j) {
if (fscanf(f, "%f", &raster2camera[i][j]) != 1) { if (fscanf(f, "%f", &camera2world[i][j]) != 1) {
fprintf(stderr, "Unexpected end of file in camera file\n"); fprintf(stderr, "Unexpected end of file in camera file\n");
exit(1); exit(1);
} }
}
} }
for (int i = 0; i < 4; ++i) { }
for (int j = 0; j < 4; ++j) { fclose(f);
if (fscanf(f, "%f", &camera2world[i][j]) != 1) {
fprintf(stderr, "Unexpected end of file in camera file\n");
exit(1);
}
}
}
fclose(f);
} }
@@ -110,105 +125,108 @@ loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4],
floating-point values (also as strings) to give the densities. */ floating-point values (also as strings) to give the densities. */
static float * static float *
loadVolume(const char *fn, int n[3]) { loadVolume(const char *fn, int n[3]) {
FILE *f = fopen(fn, "r"); FILE *f = fopen(fn, "r");
if (!f) { if (!f) {
perror(fn); perror(fn);
exit(1); exit(1);
} }
if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) { if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) {
fprintf(stderr, "Couldn't find resolution at start of density file\n"); fprintf(stderr, "Couldn't find resolution at start of density file\n");
exit(1); exit(1);
} }
int count = n[0] * n[1] * n[2]; int count = n[0] * n[1] * n[2];
float *v = new float[count]; float *v = new float[count];
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
if (fscanf(f, "%f", &v[i]) != 1) { if (fscanf(f, "%f", &v[i]) != 1) {
fprintf(stderr, "Unexpected end of file at %d'th density value\n", i); fprintf(stderr, "Unexpected end of file at %d'th density value\n", i);
exit(1); exit(1);
}
} }
}
return v; return v;
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
if (argc != 3) { if (argc != 3) {
fprintf(stderr, "usage: volume <camera.dat> <volume_density.vol>\n"); fprintf(stderr, "usage: volume <camera.dat> <volume_density.vol>\n");
return 1; return 1;
} }
// //
// Load viewing data and the volume density data // Load viewing data and the volume density data
// //
int width, height; int width, height;
float raster2camera[4][4], camera2world[4][4]; float raster2camera[4][4], camera2world[4][4];
loadCamera(argv[1], &width, &height, raster2camera, camera2world); loadCamera(argv[1], &width, &height, raster2camera, camera2world);
float *image = new float[width*height]; float *image = new float[width*height];
int n[3]; int n[3];
float *density = loadVolume(argv[2], n); float *density = loadVolume(argv[2], n);
// //
// Compute the image using the ispc implementation; report the minimum // Compute the image using the ispc implementation; report the minimum
// time of three runs. // time of three runs.
// //
double minISPC = 1e30; double minISPC = 1e30;
for (int i = 0; i < 3; ++i) { #if 0
reset_and_start_timer(); for (int i = 0; i < 3; ++i) {
volume_ispc(density, n, raster2camera, camera2world, reset_and_start_timer();
width, height, image); volume_ispc(density, n, raster2camera, camera2world,
double dt = get_elapsed_mcycles(); width, height, image);
minISPC = std::min(minISPC, dt); double dt = get_elapsed_mcycles();
} minISPC = std::min(minISPC, dt);
}
#endif
printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC); printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC);
writePPM(image, width, height, "volume-ispc-1core.ppm"); writePPM(image, width, height, "volume-ispc-1core.ppm");
// Clear out the buffer // Clear out the buffer
for (int i = 0; i < width * height; ++i) for (int i = 0; i < width * height; ++i)
image[i] = 0.; image[i] = 0.;
// //
// Compute the image using the ispc implementation that also uses // Compute the image using the ispc implementation that also uses
// tasks; report the minimum time of three runs. // tasks; report the minimum time of three runs.
// //
double minISPCtasks = 1e30; double minISPCtasks = 1e30;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
volume_ispc_tasks(density, n, raster2camera, camera2world, const double t0 = rtc();
width, height, image); volume_ispc_tasks(density, n, raster2camera, camera2world,
double dt = get_elapsed_mcycles(); width, height, image);
minISPCtasks = std::min(minISPCtasks, dt); double dt = rtc() - t0; //get_elapsed_mcycles();
} minISPCtasks = std::min(minISPCtasks, dt);
}
printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks); printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks);
writePPM(image, width, height, "volume-ispc-tasks.ppm"); writePPM(image, width, height, "volume-ispc-tasks.ppm");
// Clear out the buffer // Clear out the buffer
for (int i = 0; i < width * height; ++i) for (int i = 0; i < width * height; ++i)
image[i] = 0.; image[i] = 0.;
// //
// And run the serial implementation 3 times, again reporting the // And run the serial implementation 3 times, again reporting the
// minimum time. // minimum time.
// //
double minSerial = 1e30; double minSerial = 1e30;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
volume_serial(density, n, raster2camera, camera2world, volume_serial(density, n, raster2camera, camera2world,
width, height, image); width, height, image);
double dt = get_elapsed_mcycles(); double dt = get_elapsed_mcycles();
minSerial = std::min(minSerial, dt); minSerial = std::min(minSerial, dt);
} }
printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial); printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial);
writePPM(image, width, height, "volume-serial.ppm"); writePPM(image, width, height, "volume-serial.ppm");
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
minSerial/minISPC, minSerial / minISPCtasks); minSerial/minISPC, minSerial / minISPCtasks);
return 0; return 0;
} }

View File

@@ -44,6 +44,19 @@
#include "volume_ispc.h" #include "volume_ispc.h"
using namespace ispc; using namespace ispc;
#include <sys/time.h>
static inline double rtc(void)
{
struct timeval Tvalue;
double etime;
struct timezone dummy;
gettimeofday(&Tvalue,&dummy);
etime = (double) Tvalue.tv_sec +
1.e-6*((double) Tvalue.tv_usec);
return etime;
}
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
#include <cuda.h> #include <cuda.h>
@@ -414,6 +427,7 @@ int main(int argc, char *argv[]) {
double minISPCtasks = 1e30; double minISPCtasks = 1e30;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
const double t0 = rtc();
volume_ispc_tasks( volume_ispc_tasks(
(float*)d_density, (float*)d_density,
(int*)d_n, (int*)d_n,
@@ -421,7 +435,7 @@ int main(int argc, char *argv[]) {
(float(*)[4])d_camera2world, (float(*)[4])d_camera2world,
width, height, width, height,
(float*)d_image); (float*)d_image);
double dt = get_elapsed_mcycles(); double dt = rtc() - t0; //get_elapsed_mcycles();
minISPCtasks = std::min(minISPCtasks, dt); minISPCtasks = std::min(minISPCtasks, dt);
} }