+added wc-timer
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
EXAMPLE=mandelbrot_tasks3d
|
EXAMPLE=mandelbrot_tasks3d
|
||||||
CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp
|
CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp
|
||||||
ISPC_SRC=mandelbrot_tasks3d.ispc
|
ISPC_SRC=mandelbrot_tasks3d.ispc
|
||||||
ISPC_IA_TARGETS=avx,sse2,sse4
|
ISPC_IA_TARGETS=avx
|
||||||
ISPC_ARM_TARGETS=neon
|
ISPC_ARM_TARGETS=neon
|
||||||
|
|
||||||
include ../common.mk
|
include ../common.mk
|
||||||
|
|||||||
@@ -43,6 +43,21 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
|
||||||
|
double rtc(void)
|
||||||
|
{
|
||||||
|
struct timeval Tvalue;
|
||||||
|
double etime;
|
||||||
|
struct timezone dummy;
|
||||||
|
|
||||||
|
gettimeofday(&Tvalue,&dummy);
|
||||||
|
etime = (double) Tvalue.tv_sec +
|
||||||
|
1.e-6*((double) Tvalue.tv_usec);
|
||||||
|
return etime;
|
||||||
|
}
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -54,17 +69,17 @@
|
|||||||
void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
||||||
if(CUDA_SUCCESS != err) {
|
if(CUDA_SUCCESS != err) {
|
||||||
std::cerr << "checkCudeErrors() Driver API error = " << err << "\""
|
std::cerr << "checkCudeErrors() Driver API error = " << err << "\""
|
||||||
<< getCudaDrvErrorString(err) << "\" from file <" << file
|
<< getCudaDrvErrorString(err) << "\" from file <" << file
|
||||||
<< ", line " << line << "\n";
|
<< ", line " << line << "\n";
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
extern "C"
|
extern "C"
|
||||||
void mandelbrot_ispc(
|
void mandelbrot_ispc(
|
||||||
float x0, float y0,
|
float x0, float y0,
|
||||||
float x1, float y1,
|
float x1, float y1,
|
||||||
int width, int height,
|
int width, int height,
|
||||||
int maxIterations, int output[]) ;
|
int maxIterations, int output[]) ;
|
||||||
|
|
||||||
|
|
||||||
/**********************/
|
/**********************/
|
||||||
@@ -203,19 +218,19 @@ extern "C"
|
|||||||
CUmodule loadModule(const char * module_name, const char * module_data)
|
CUmodule loadModule(const char * module_name, const char * module_data)
|
||||||
{
|
{
|
||||||
const ModuleMap::iterator it = findModule(module_name)
|
const ModuleMap::iterator it = findModule(module_name)
|
||||||
if (it != ModuleMap::end)
|
if (it != ModuleMap::end)
|
||||||
{
|
{
|
||||||
CUmodule cudaModule = loadModule(module);
|
CUmodule cudaModule = loadModule(module);
|
||||||
module_list.insert(std::make_pair(std::string(module_name), cudaModule));
|
module_list.insert(std::make_pair(std::string(module_name), cudaModule));
|
||||||
return cudaModule
|
return cudaModule
|
||||||
}
|
}
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
void unloadModule(const char * module_name)
|
void unloadModule(const char * module_name)
|
||||||
{
|
{
|
||||||
ModuleMap::iterator it = findModule(module_name)
|
ModuleMap::iterator it = findModule(module_name)
|
||||||
if (it != ModuleMap::end)
|
if (it != ModuleMap::end)
|
||||||
module_list.erase(it);
|
module_list.erase(it);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
@@ -259,7 +274,7 @@ extern "C"
|
|||||||
|
|
||||||
fprintf(stderr, " module_name= %s \n", module_name);
|
fprintf(stderr, " module_name= %s \n", module_name);
|
||||||
fprintf(stderr, " func_name= %s \n", func_name);
|
fprintf(stderr, " func_name= %s \n", func_name);
|
||||||
// fprintf(stderr, " ptx= %s \n", module);
|
// fprintf(stderr, " ptx= %s \n", module);
|
||||||
fprintf(stderr, " x0= %g \n", *((float*)(func_args[0])));
|
fprintf(stderr, " x0= %g \n", *((float*)(func_args[0])));
|
||||||
fprintf(stderr, " dx= %g \n", *((float*)(func_args[1])));
|
fprintf(stderr, " dx= %g \n", *((float*)(func_args[1])));
|
||||||
fprintf(stderr, " y0= %g \n", *((float*)(func_args[2])));
|
fprintf(stderr, " y0= %g \n", *((float*)(func_args[2])));
|
||||||
@@ -289,114 +304,118 @@ extern "C"
|
|||||||
|
|
||||||
|
|
||||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||||
int width, int height, int maxIterations,
|
int width, int height, int maxIterations,
|
||||||
int output[]);
|
int output[]);
|
||||||
|
|
||||||
/* Write a PPM image file with the image of the Mandelbrot set */
|
/* Write a PPM image file with the image of the Mandelbrot set */
|
||||||
static void
|
static void
|
||||||
writePPM(int *buf, int width, int height, const char *fn) {
|
writePPM(int *buf, int width, int height, const char *fn) {
|
||||||
FILE *fp = fopen(fn, "wb");
|
FILE *fp = fopen(fn, "wb");
|
||||||
fprintf(fp, "P6\n");
|
fprintf(fp, "P6\n");
|
||||||
fprintf(fp, "%d %d\n", width, height);
|
fprintf(fp, "%d %d\n", width, height);
|
||||||
fprintf(fp, "255\n");
|
fprintf(fp, "255\n");
|
||||||
for (int i = 0; i < width*height; ++i) {
|
for (int i = 0; i < width*height; ++i) {
|
||||||
// Map the iteration count to colors by just alternating between
|
// Map the iteration count to colors by just alternating between
|
||||||
// two greys.
|
// two greys.
|
||||||
char c = (buf[i] & 0x1) ? 240 : 20;
|
char c = (buf[i] & 0x1) ? 240 : 20;
|
||||||
for (int j = 0; j < 3; ++j)
|
for (int j = 0; j < 3; ++j)
|
||||||
fputc(c, fp);
|
fputc(c, fp);
|
||||||
}
|
}
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
printf("Wrote image file %s\n", fn);
|
printf("Wrote image file %s\n", fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void usage() {
|
static void usage() {
|
||||||
fprintf(stderr, "usage: mandelbrot [--scale=<factor>]\n");
|
fprintf(stderr, "usage: mandelbrot [--scale=<factor>]\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
unsigned int width = 1536;
|
unsigned int width = 1536;
|
||||||
unsigned int height = 1024;
|
unsigned int height = 1024;
|
||||||
float x0 = -2;
|
float x0 = -2;
|
||||||
float x1 = 1;
|
float x1 = 1;
|
||||||
float y0 = -1;
|
float y0 = -1;
|
||||||
float y1 = 1;
|
float y1 = 1;
|
||||||
|
|
||||||
if (argc == 1)
|
if (argc == 1)
|
||||||
;
|
;
|
||||||
else if (argc == 2) {
|
else if (argc == 2) {
|
||||||
if (strncmp(argv[1], "--scale=", 8) == 0) {
|
if (strncmp(argv[1], "--scale=", 8) == 0) {
|
||||||
float scale = atof(argv[1] + 8);
|
float scale = atof(argv[1] + 8);
|
||||||
if (scale == 0.f)
|
if (scale == 0.f)
|
||||||
usage();
|
usage();
|
||||||
width *= scale;
|
width *= scale;
|
||||||
height *= scale;
|
height *= scale;
|
||||||
// round up to multiples of 16
|
// round up to multiples of 16
|
||||||
width = (width + 0xf) & ~0xf;
|
width = (width + 0xf) & ~0xf;
|
||||||
height = (height + 0xf) & ~0xf;
|
height = (height + 0xf) & ~0xf;
|
||||||
}
|
|
||||||
else
|
|
||||||
usage();
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
usage();
|
usage();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
usage();
|
||||||
|
|
||||||
/*******************/
|
/*******************/
|
||||||
createContext();
|
createContext();
|
||||||
/*******************/
|
/*******************/
|
||||||
|
|
||||||
int maxIterations = 512;
|
int maxIterations = 512;
|
||||||
int *buf = new int[width*height];
|
int *buf = new int[width*height];
|
||||||
|
|
||||||
for (unsigned int i = 0; i < width*height; i++)
|
for (unsigned int i = 0; i < width*height; i++)
|
||||||
|
buf[i] = 0;
|
||||||
|
const size_t bufsize = sizeof(int)*width*height;
|
||||||
|
devicePtr d_buf = deviceMalloc(bufsize);
|
||||||
|
memcpyH2D(d_buf, buf, bufsize);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Compute the image using the ispc implementation; report the minimum
|
||||||
|
// time of three runs.
|
||||||
|
//
|
||||||
|
double minISPC = 1e30;
|
||||||
|
#if 1
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
// Clear out the buffer
|
||||||
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
buf[i] = 0;
|
buf[i] = 0;
|
||||||
const size_t bufsize = sizeof(int)*width*height;
|
reset_and_start_timer();
|
||||||
devicePtr d_buf = deviceMalloc(bufsize);
|
const double t0 = rtc();
|
||||||
memcpyH2D(d_buf, buf, bufsize);
|
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf);
|
||||||
|
double dt = rtc() - t0; //get_elapsed_mcycles();
|
||||||
|
minISPC = std::min(minISPC, dt);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//
|
memcpyD2H(buf, d_buf, bufsize);
|
||||||
// Compute the image using the ispc implementation; report the minimum
|
deviceFree(d_buf);
|
||||||
// time of three runs.
|
|
||||||
//
|
|
||||||
double minISPC = 1e30;
|
|
||||||
for (int i = 0; i < 3; ++i) {
|
|
||||||
// Clear out the buffer
|
|
||||||
for (unsigned int i = 0; i < width * height; ++i)
|
|
||||||
buf[i] = 0;
|
|
||||||
reset_and_start_timer();
|
|
||||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf);
|
|
||||||
double dt = get_elapsed_mcycles();
|
|
||||||
minISPC = std::min(minISPC, dt);
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpyD2H(buf, d_buf, bufsize);
|
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
|
||||||
deviceFree(d_buf);
|
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
||||||
|
|
||||||
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
|
|
||||||
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// And run the serial implementation 3 times, again reporting the
|
// And run the serial implementation 3 times, again reporting the
|
||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
double minSerial = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (unsigned int i = 0; i < width * height; ++i)
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
buf[i] = 0;
|
buf[i] = 0;
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
const double t0 = rtc();
|
||||||
double dt = get_elapsed_mcycles();
|
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||||
minSerial = std::min(minSerial, dt);
|
double dt = rtc() - t0; //get_elapsed_mcycles();
|
||||||
}
|
minSerial = std::min(minSerial, dt);
|
||||||
|
}
|
||||||
|
|
||||||
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
|
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
|
||||||
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
||||||
|
|
||||||
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
|
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -49,14 +49,28 @@ using namespace ispc;
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include "drvapi_error_string.h"
|
#include "drvapi_error_string.h"
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
|
||||||
|
double rtc(void)
|
||||||
|
{
|
||||||
|
struct timeval Tvalue;
|
||||||
|
double etime;
|
||||||
|
struct timezone dummy;
|
||||||
|
|
||||||
|
gettimeofday(&Tvalue,&dummy);
|
||||||
|
etime = (double) Tvalue.tv_sec +
|
||||||
|
1.e-6*((double) Tvalue.tv_usec);
|
||||||
|
return etime;
|
||||||
|
}
|
||||||
|
|
||||||
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
|
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
|
||||||
// These are the inline versions for all of the SDK helper functions
|
// These are the inline versions for all of the SDK helper functions
|
||||||
void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
||||||
if(CUDA_SUCCESS != err) {
|
if(CUDA_SUCCESS != err) {
|
||||||
std::cerr << "checkCudeErrors() Driver API error = " << err << "\""
|
std::cerr << "checkCudeErrors() Driver API error = " << err << "\""
|
||||||
<< getCudaDrvErrorString(err) << "\" from file <" << file
|
<< getCudaDrvErrorString(err) << "\" from file <" << file
|
||||||
<< ", line " << line << "\n";
|
<< ", line " << line << "\n";
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -133,13 +147,13 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size)
|
|||||||
}
|
}
|
||||||
#define deviceLaunch(func,nbx,nby,nbz,params) \
|
#define deviceLaunch(func,nbx,nby,nbz,params) \
|
||||||
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \
|
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \
|
||||||
checkCudaErrors( \
|
checkCudaErrors( \
|
||||||
cuLaunchKernel( \
|
cuLaunchKernel( \
|
||||||
(func), \
|
(func), \
|
||||||
((nbx-1)/(128/32)+1), (nby), (nbz), \
|
((nbx-1)/(128/32)+1), (nby), (nbz), \
|
||||||
128, 1, 1, \
|
128, 1, 1, \
|
||||||
0, NULL, (params), NULL \
|
0, NULL, (params), NULL \
|
||||||
));
|
));
|
||||||
|
|
||||||
typedef CUdeviceptr devicePtr;
|
typedef CUdeviceptr devicePtr;
|
||||||
|
|
||||||
@@ -272,6 +286,7 @@ int main() {
|
|||||||
// the minimum time of three runs.
|
// the minimum time of three runs.
|
||||||
//
|
//
|
||||||
double minTimeISPC = 1e30;
|
double minTimeISPC = 1e30;
|
||||||
|
#if 0
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
|
loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
|
||||||
@@ -280,6 +295,7 @@ int main() {
|
|||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
minTimeISPC = std::min(minTimeISPC, dt);
|
minTimeISPC = std::min(minTimeISPC, dt);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC);
|
printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC);
|
||||||
|
|
||||||
@@ -296,10 +312,11 @@ int main() {
|
|||||||
double minTimeISPCTasks = 1e30;
|
double minTimeISPCTasks = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
|
const double t0 = rtc();
|
||||||
loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
|
loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
|
||||||
width, Nz - width, Nx, Ny, Nz, (double*)d_coeff, (double*)d_vsq,
|
width, Nz - width, Nx, Ny, Nz, (double*)d_coeff, (double*)d_vsq,
|
||||||
(double*)d_Aispc0, (double*)d_Aispc1);
|
(double*)d_Aispc0, (double*)d_Aispc1);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = rtc() - t0; //get_elapsed_mcycles();
|
||||||
minTimeISPCTasks = std::min(minTimeISPCTasks, dt);
|
minTimeISPCTasks = std::min(minTimeISPCTasks, dt);
|
||||||
}
|
}
|
||||||
memcpyD2H(Aispc[1], d_Aispc1, bufsize);
|
memcpyD2H(Aispc[1], d_Aispc1, bufsize);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
EXAMPLE=volume
|
EXAMPLE=volume
|
||||||
CPP_SRC=volume.cpp volume_serial.cpp
|
CPP_SRC=volume.cpp volume_serial.cpp
|
||||||
ISPC_SRC=volume.ispc
|
ISPC_SRC=volume1.ispc
|
||||||
ISPC_IA_TARGETS=avx
|
ISPC_IA_TARGETS=avx
|
||||||
|
|
||||||
include ../common.mk
|
include ../common.mk
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
896 1184
|
1792 2368
|
||||||
|
|
||||||
0.000155 0.000000 0.000000 -0.069927
|
0.000155 0.000000 0.000000 -0.069927
|
||||||
0.000000 -0.000155 0.000000 0.093236
|
0.000000 -0.000155 0.000000 0.093236
|
||||||
|
|||||||
@@ -44,64 +44,79 @@
|
|||||||
#include "volume_ispc.h"
|
#include "volume_ispc.h"
|
||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
|
||||||
|
static inline double rtc(void)
|
||||||
|
{
|
||||||
|
struct timeval Tvalue;
|
||||||
|
double etime;
|
||||||
|
struct timezone dummy;
|
||||||
|
|
||||||
|
gettimeofday(&Tvalue,&dummy);
|
||||||
|
etime = (double) Tvalue.tv_sec +
|
||||||
|
1.e-6*((double) Tvalue.tv_usec);
|
||||||
|
return etime;
|
||||||
|
}
|
||||||
|
|
||||||
extern void volume_serial(float density[], int nVoxels[3],
|
extern void volume_serial(float density[], int nVoxels[3],
|
||||||
const float raster2camera[4][4],
|
const float raster2camera[4][4],
|
||||||
const float camera2world[4][4],
|
const float camera2world[4][4],
|
||||||
int width, int height, float image[]);
|
int width, int height, float image[]);
|
||||||
|
|
||||||
/* Write a PPM image file with the image */
|
/* Write a PPM image file with the image */
|
||||||
static void
|
static void
|
||||||
writePPM(float *buf, int width, int height, const char *fn) {
|
writePPM(float *buf, int width, int height, const char *fn) {
|
||||||
FILE *fp = fopen(fn, "wb");
|
FILE *fp = fopen(fn, "wb");
|
||||||
fprintf(fp, "P6\n");
|
fprintf(fp, "P6\n");
|
||||||
fprintf(fp, "%d %d\n", width, height);
|
fprintf(fp, "%d %d\n", width, height);
|
||||||
fprintf(fp, "255\n");
|
fprintf(fp, "255\n");
|
||||||
for (int i = 0; i < width*height; ++i) {
|
for (int i = 0; i < width*height; ++i) {
|
||||||
float v = buf[i] * 255.f;
|
float v = buf[i] * 255.f;
|
||||||
if (v < 0.f) v = 0.f;
|
if (v < 0.f) v = 0.f;
|
||||||
else if (v > 255.f) v = 255.f;
|
else if (v > 255.f) v = 255.f;
|
||||||
unsigned char c = (unsigned char)v;
|
unsigned char c = (unsigned char)v;
|
||||||
for (int j = 0; j < 3; ++j)
|
for (int j = 0; j < 3; ++j)
|
||||||
fputc(c, fp);
|
fputc(c, fp);
|
||||||
}
|
}
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
printf("Wrote image file %s\n", fn);
|
printf("Wrote image file %s\n", fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Load image and viewing parameters from a camera data file.
|
/* Load image and viewing parameters from a camera data file.
|
||||||
FIXME: we should add support to be able to specify viewing parameters
|
FIXME: we should add support to be able to specify viewing parameters
|
||||||
in the program here directly. */
|
in the program here directly. */
|
||||||
static void
|
static void
|
||||||
loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4],
|
loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4],
|
||||||
float camera2world[4][4]) {
|
float camera2world[4][4]) {
|
||||||
FILE *f = fopen(fn, "r");
|
FILE *f = fopen(fn, "r");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
perror(fn);
|
perror(fn);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (fscanf(f, "%d %d", width, height) != 2) {
|
if (fscanf(f, "%d %d", width, height) != 2) {
|
||||||
|
fprintf(stderr, "Unexpected end of file in camera file\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
for (int j = 0; j < 4; ++j) {
|
||||||
|
if (fscanf(f, "%f", &raster2camera[i][j]) != 1) {
|
||||||
fprintf(stderr, "Unexpected end of file in camera file\n");
|
fprintf(stderr, "Unexpected end of file in camera file\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
for (int j = 0; j < 4; ++j) {
|
for (int j = 0; j < 4; ++j) {
|
||||||
if (fscanf(f, "%f", &raster2camera[i][j]) != 1) {
|
if (fscanf(f, "%f", &camera2world[i][j]) != 1) {
|
||||||
fprintf(stderr, "Unexpected end of file in camera file\n");
|
fprintf(stderr, "Unexpected end of file in camera file\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for (int i = 0; i < 4; ++i) {
|
}
|
||||||
for (int j = 0; j < 4; ++j) {
|
fclose(f);
|
||||||
if (fscanf(f, "%f", &camera2world[i][j]) != 1) {
|
|
||||||
fprintf(stderr, "Unexpected end of file in camera file\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fclose(f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -110,105 +125,108 @@ loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4],
|
|||||||
floating-point values (also as strings) to give the densities. */
|
floating-point values (also as strings) to give the densities. */
|
||||||
static float *
|
static float *
|
||||||
loadVolume(const char *fn, int n[3]) {
|
loadVolume(const char *fn, int n[3]) {
|
||||||
FILE *f = fopen(fn, "r");
|
FILE *f = fopen(fn, "r");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
perror(fn);
|
perror(fn);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) {
|
if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) {
|
||||||
fprintf(stderr, "Couldn't find resolution at start of density file\n");
|
fprintf(stderr, "Couldn't find resolution at start of density file\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int count = n[0] * n[1] * n[2];
|
int count = n[0] * n[1] * n[2];
|
||||||
float *v = new float[count];
|
float *v = new float[count];
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
if (fscanf(f, "%f", &v[i]) != 1) {
|
if (fscanf(f, "%f", &v[i]) != 1) {
|
||||||
fprintf(stderr, "Unexpected end of file at %d'th density value\n", i);
|
fprintf(stderr, "Unexpected end of file at %d'th density value\n", i);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
if (argc != 3) {
|
if (argc != 3) {
|
||||||
fprintf(stderr, "usage: volume <camera.dat> <volume_density.vol>\n");
|
fprintf(stderr, "usage: volume <camera.dat> <volume_density.vol>\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Load viewing data and the volume density data
|
// Load viewing data and the volume density data
|
||||||
//
|
//
|
||||||
int width, height;
|
int width, height;
|
||||||
float raster2camera[4][4], camera2world[4][4];
|
float raster2camera[4][4], camera2world[4][4];
|
||||||
loadCamera(argv[1], &width, &height, raster2camera, camera2world);
|
loadCamera(argv[1], &width, &height, raster2camera, camera2world);
|
||||||
float *image = new float[width*height];
|
float *image = new float[width*height];
|
||||||
|
|
||||||
int n[3];
|
int n[3];
|
||||||
float *density = loadVolume(argv[2], n);
|
float *density = loadVolume(argv[2], n);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Compute the image using the ispc implementation; report the minimum
|
// Compute the image using the ispc implementation; report the minimum
|
||||||
// time of three runs.
|
// time of three runs.
|
||||||
//
|
//
|
||||||
double minISPC = 1e30;
|
double minISPC = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
#if 0
|
||||||
reset_and_start_timer();
|
for (int i = 0; i < 3; ++i) {
|
||||||
volume_ispc(density, n, raster2camera, camera2world,
|
reset_and_start_timer();
|
||||||
width, height, image);
|
volume_ispc(density, n, raster2camera, camera2world,
|
||||||
double dt = get_elapsed_mcycles();
|
width, height, image);
|
||||||
minISPC = std::min(minISPC, dt);
|
double dt = get_elapsed_mcycles();
|
||||||
}
|
minISPC = std::min(minISPC, dt);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC);
|
printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC);
|
||||||
writePPM(image, width, height, "volume-ispc-1core.ppm");
|
writePPM(image, width, height, "volume-ispc-1core.ppm");
|
||||||
|
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (int i = 0; i < width * height; ++i)
|
for (int i = 0; i < width * height; ++i)
|
||||||
image[i] = 0.;
|
image[i] = 0.;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Compute the image using the ispc implementation that also uses
|
// Compute the image using the ispc implementation that also uses
|
||||||
// tasks; report the minimum time of three runs.
|
// tasks; report the minimum time of three runs.
|
||||||
//
|
//
|
||||||
double minISPCtasks = 1e30;
|
double minISPCtasks = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
volume_ispc_tasks(density, n, raster2camera, camera2world,
|
const double t0 = rtc();
|
||||||
width, height, image);
|
volume_ispc_tasks(density, n, raster2camera, camera2world,
|
||||||
double dt = get_elapsed_mcycles();
|
width, height, image);
|
||||||
minISPCtasks = std::min(minISPCtasks, dt);
|
double dt = rtc() - t0; //get_elapsed_mcycles();
|
||||||
}
|
minISPCtasks = std::min(minISPCtasks, dt);
|
||||||
|
}
|
||||||
|
|
||||||
printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks);
|
printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks);
|
||||||
writePPM(image, width, height, "volume-ispc-tasks.ppm");
|
writePPM(image, width, height, "volume-ispc-tasks.ppm");
|
||||||
|
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (int i = 0; i < width * height; ++i)
|
for (int i = 0; i < width * height; ++i)
|
||||||
image[i] = 0.;
|
image[i] = 0.;
|
||||||
|
|
||||||
//
|
//
|
||||||
// And run the serial implementation 3 times, again reporting the
|
// And run the serial implementation 3 times, again reporting the
|
||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
double minSerial = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
volume_serial(density, n, raster2camera, camera2world,
|
volume_serial(density, n, raster2camera, camera2world,
|
||||||
width, height, image);
|
width, height, image);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
minSerial = std::min(minSerial, dt);
|
minSerial = std::min(minSerial, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial);
|
printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial);
|
||||||
writePPM(image, width, height, "volume-serial.ppm");
|
writePPM(image, width, height, "volume-serial.ppm");
|
||||||
|
|
||||||
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
|
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
|
||||||
minSerial/minISPC, minSerial / minISPCtasks);
|
minSerial/minISPC, minSerial / minISPCtasks);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,6 +44,19 @@
|
|||||||
#include "volume_ispc.h"
|
#include "volume_ispc.h"
|
||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
static inline double rtc(void)
|
||||||
|
{
|
||||||
|
struct timeval Tvalue;
|
||||||
|
double etime;
|
||||||
|
struct timezone dummy;
|
||||||
|
|
||||||
|
gettimeofday(&Tvalue,&dummy);
|
||||||
|
etime = (double) Tvalue.tv_sec +
|
||||||
|
1.e-6*((double) Tvalue.tv_usec);
|
||||||
|
return etime;
|
||||||
|
}
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
@@ -414,6 +427,7 @@ int main(int argc, char *argv[]) {
|
|||||||
double minISPCtasks = 1e30;
|
double minISPCtasks = 1e30;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
|
const double t0 = rtc();
|
||||||
volume_ispc_tasks(
|
volume_ispc_tasks(
|
||||||
(float*)d_density,
|
(float*)d_density,
|
||||||
(int*)d_n,
|
(int*)d_n,
|
||||||
@@ -421,7 +435,7 @@ int main(int argc, char *argv[]) {
|
|||||||
(float(*)[4])d_camera2world,
|
(float(*)[4])d_camera2world,
|
||||||
width, height,
|
width, height,
|
||||||
(float*)d_image);
|
(float*)d_image);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = rtc() - t0; //get_elapsed_mcycles();
|
||||||
minISPCtasks = std::min(minISPCtasks, dt);
|
minISPCtasks = std::min(minISPCtasks, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user