+fixed rt.cpp to compile with nvvm

This commit is contained in:
Evghenii
2013-11-09 19:02:14 +01:00
parent d0ddec469a
commit 356e9c6810
4 changed files with 318 additions and 230 deletions

View File

@@ -3,6 +3,5 @@ EXAMPLE=rt
CPP_SRC=rt.cpp rt_serial.cpp CPP_SRC=rt.cpp rt_serial.cpp
ISPC_SRC=rt.ispc ISPC_SRC=rt.ispc
ISPC_IA_TARGETS=avx ISPC_IA_TARGETS=avx
ISPC_ARM_TARGETS=neon
include ../common.mk include ../common.mk

View File

@@ -47,222 +47,240 @@
#include "../timing.h" #include "../timing.h"
#include "rt_ispc.h" #include "rt_ispc.h"
#include <sys/time.h>
double rtc(void)
{
struct timeval Tvalue;
double etime;
struct timezone dummy;
gettimeofday(&Tvalue,&dummy);
etime = (double) Tvalue.tv_sec +
1.e-6*((double) Tvalue.tv_usec);
return etime;
}
using namespace ispc; using namespace ispc;
typedef unsigned int uint; typedef unsigned int uint;
extern void raytrace_serial(int width, int height, int baseWidth, int baseHeight, extern void raytrace_serial(int width, int height, int baseWidth, int baseHeight,
const float raster2camera[4][4], const float raster2camera[4][4],
const float camera2world[4][4], float image[], const float camera2world[4][4], float image[],
int id[], const LinearBVHNode nodes[], int id[], const LinearBVHNode nodes[],
const Triangle triangles[]); const Triangle triangles[]);
static void writeImage(int *idImage, float *depthImage, int width, int height, static void writeImage(int *idImage, float *depthImage, int width, int height,
const char *filename) { const char *filename) {
FILE *f = fopen(filename, "wb"); FILE *f = fopen(filename, "wb");
if (!f) { if (!f) {
perror(filename); perror(filename);
exit(1); exit(1);
} }
fprintf(f, "P6\n%d %d\n255\n", width, height); fprintf(f, "P6\n%d %d\n255\n", width, height);
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) { for (int x = 0; x < width; ++x) {
// use the bits from the object id of the hit object to make a // use the bits from the object id of the hit object to make a
// random color // random color
int id = idImage[y * width + x]; int id = idImage[y * width + x];
unsigned char r = 0, g = 0, b = 0; unsigned char r = 0, g = 0, b = 0;
for (int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
// extract bit 3*i for red, 3*i+1 for green, 3*i+2 for blue // extract bit 3*i for red, 3*i+1 for green, 3*i+2 for blue
int rbit = (id & (1 << (3*i))) >> (3*i); int rbit = (id & (1 << (3*i))) >> (3*i);
int gbit = (id & (1 << (3*i+1))) >> (3*i+1); int gbit = (id & (1 << (3*i+1))) >> (3*i+1);
int bbit = (id & (1 << (3*i+2))) >> (3*i+2); int bbit = (id & (1 << (3*i+2))) >> (3*i+2);
// and then set the bits of the colors starting from the // and then set the bits of the colors starting from the
// high bits... // high bits...
r |= rbit << (7-i); r |= rbit << (7-i);
g |= gbit << (7-i); g |= gbit << (7-i);
b |= bbit << (7-i); b |= bbit << (7-i);
} }
fputc(r, f); fputc(r, f);
fputc(g, f); fputc(g, f);
fputc(b, f); fputc(b, f);
}
} }
fclose(f); }
printf("Wrote image file %s\n", filename); fclose(f);
printf("Wrote image file %s\n", filename);
} }
static void usage() { static void usage() {
fprintf(stderr, "rt [--scale=<factor>] <scene name base>\n"); fprintf(stderr, "rt [--scale=<factor>] <scene name base>\n");
exit(1); exit(1);
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
float scale = 1.f; float scale = 1.f;
const char *filename = NULL; const char *filename = NULL;
for (int i = 1; i < argc; ++i) { for (int i = 1; i < argc; ++i) {
if (strncmp(argv[i], "--scale=", 8) == 0) { if (strncmp(argv[i], "--scale=", 8) == 0) {
scale = atof(argv[i] + 8); scale = atof(argv[i] + 8);
if (scale == 0.f) if (scale == 0.f)
usage();
}
else if (filename != NULL)
usage();
else
filename = argv[i];
}
if (filename == NULL)
usage(); usage();
}
else if (filename != NULL)
usage();
else
filename = argv[i];
}
if (filename == NULL)
usage();
#define READ(var, n) \ #define READ(var, n) \
if (fread(&(var), sizeof(var), n, f) != (unsigned int)n) { \ if (fread(&(var), sizeof(var), n, f) != (unsigned int)n) { \
fprintf(stderr, "Unexpected EOF reading scene file\n"); \ fprintf(stderr, "Unexpected EOF reading scene file\n"); \
return 1; \ return 1; \
} else /* eat ; */ } else /* eat ; */
// //
// Read the camera specification information from the camera file // Read the camera specification information from the camera file
// //
char fnbuf[1024]; char fnbuf[1024];
sprintf(fnbuf, "%s.camera", filename); sprintf(fnbuf, "%s.camera", filename);
FILE *f = fopen(fnbuf, "rb"); FILE *f = fopen(fnbuf, "rb");
if (!f) { if (!f) {
perror(fnbuf); perror(fnbuf);
return 1; return 1;
}
//
// Nothing fancy, and trouble if we run on a big-endian system, just
// fread in the bits
//
int baseWidth, baseHeight;
float camera2world[4][4], raster2camera[4][4];
READ(baseWidth, 1);
READ(baseHeight, 1);
READ(camera2world[0][0], 16);
READ(raster2camera[0][0], 16);
//
// Read in the serialized BVH
//
sprintf(fnbuf, "%s.bvh", filename);
f = fopen(fnbuf, "rb");
if (!f) {
perror(fnbuf);
return 1;
}
// The BVH file starts with an int that gives the total number of BVH
// nodes
uint nNodes;
READ(nNodes, 1);
LinearBVHNode *nodes = new LinearBVHNode[nNodes];
for (unsigned int i = 0; i < nNodes; ++i) {
// Each node is 6x floats for a boox, then an integer for an offset
// to the second child node, then an integer that encodes the type
// of node, the total number of int it if a leaf node, etc.
float b[6];
READ(b[0], 6);
nodes[i].bounds[0][0] = b[0];
nodes[i].bounds[0][1] = b[1];
nodes[i].bounds[0][2] = b[2];
nodes[i].bounds[1][0] = b[3];
nodes[i].bounds[1][1] = b[4];
nodes[i].bounds[1][2] = b[5];
READ(nodes[i].offset, 1);
READ(nodes[i].nPrimitives, 1);
READ(nodes[i].splitAxis, 1);
READ(nodes[i].pad, 1);
}
// And then read the triangles
uint nTris;
READ(nTris, 1);
Triangle *triangles = new Triangle[nTris];
for (uint i = 0; i < nTris; ++i) {
// 9x floats for the 3 vertices
float v[9];
READ(v[0], 9);
float *vp = v;
for (int j = 0; j < 3; ++j) {
triangles[i].p[j][0] = *vp++;
triangles[i].p[j][1] = *vp++;
triangles[i].p[j][2] = *vp++;
} }
// And create an object id
triangles[i].id = i+1;
}
fclose(f);
// int height = int(baseHeight * scale);
// Nothing fancy, and trouble if we run on a big-endian system, just int width = int(baseWidth * scale);
// fread in the bits
//
int baseWidth, baseHeight;
float camera2world[4][4], raster2camera[4][4];
READ(baseWidth, 1);
READ(baseHeight, 1);
READ(camera2world[0][0], 16);
READ(raster2camera[0][0], 16);
// // allocate images; one to hold hit object ids, one to hold depth to
// Read in the serialized BVH // the first interseciton
// int *id = new int[width*height];
sprintf(fnbuf, "%s.bvh", filename); float *image = new float[width*height];
f = fopen(fnbuf, "rb");
if (!f) {
perror(fnbuf);
return 1;
}
// The BVH file starts with an int that gives the total number of BVH //
// nodes // Run 3 iterations with ispc + 1 core, record the minimum time
uint nNodes; //
READ(nNodes, 1); double minTimeISPC = 1e30;
#if 0
for (int i = 0; i < 3; ++i) {
reset_and_start_timer();
raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = get_elapsed_mcycles();
minTimeISPC = std::min(dt, minTimeISPC);
}
printf("[rt ispc, 1 core]:\t\t[%.3f] million cycles for %d x %d image\n",
minTimeISPC, width, height);
LinearBVHNode *nodes = new LinearBVHNode[nNodes]; writeImage(id, image, width, height, "rt-ispc-1core.ppm");
for (unsigned int i = 0; i < nNodes; ++i) { #endif
// Each node is 6x floats for a boox, then an integer for an offset
// to the second child node, then an integer that encodes the type
// of node, the total number of int it if a leaf node, etc.
float b[6];
READ(b[0], 6);
nodes[i].bounds[0][0] = b[0];
nodes[i].bounds[0][1] = b[1];
nodes[i].bounds[0][2] = b[2];
nodes[i].bounds[1][0] = b[3];
nodes[i].bounds[1][1] = b[4];
nodes[i].bounds[1][2] = b[5];
READ(nodes[i].offset, 1);
READ(nodes[i].nPrimitives, 1);
READ(nodes[i].splitAxis, 1);
READ(nodes[i].pad, 1);
}
// And then read the triangles memset(id, 0, width*height*sizeof(int));
uint nTris; memset(image, 0, width*height*sizeof(float));
READ(nTris, 1);
Triangle *triangles = new Triangle[nTris];
for (uint i = 0; i < nTris; ++i) {
// 9x floats for the 3 vertices
float v[9];
READ(v[0], 9);
float *vp = v;
for (int j = 0; j < 3; ++j) {
triangles[i].p[j][0] = *vp++;
triangles[i].p[j][1] = *vp++;
triangles[i].p[j][2] = *vp++;
}
// And create an object id
triangles[i].id = i+1;
}
fclose(f);
int height = int(baseHeight * scale); //
int width = int(baseWidth * scale); // Run 3 iterations with ispc + 1 core, record the minimum time
//
double minTimeISPCtasks = 1e30;
for (int i = 0; i < 3; ++i) {
reset_and_start_timer();
const double t0 = rtc();
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = rtc() - t0; //get_elapsed_mcycles();
minTimeISPCtasks = std::min(dt, minTimeISPCtasks);
}
printf("[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n",
minTimeISPCtasks, width, height);
// allocate images; one to hold hit object ids, one to hold depth to writeImage(id, image, width, height, "rt-ispc-tasks.ppm");
// the first interseciton
int *id = new int[width*height];
float *image = new float[width*height];
// memset(id, 0, width*height*sizeof(int));
// Run 3 iterations with ispc + 1 core, record the minimum time memset(image, 0, width*height*sizeof(float));
//
double minTimeISPC = 1e30;
for (int i = 0; i < 3; ++i) {
reset_and_start_timer();
raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = get_elapsed_mcycles();
minTimeISPC = std::min(dt, minTimeISPC);
}
printf("[rt ispc, 1 core]:\t\t[%.3f] million cycles for %d x %d image\n",
minTimeISPC, width, height);
writeImage(id, image, width, height, "rt-ispc-1core.ppm"); //
// And 3 iterations with the serial implementation, reporting the
// minimum time.
//
double minTimeSerial = 1e30;
for (int i = 0; i < 3; ++i) {
reset_and_start_timer();
raytrace_serial(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = get_elapsed_mcycles();
minTimeSerial = std::min(dt, minTimeSerial);
}
printf("[rt serial]:\t\t\t[%.3f] million cycles for %d x %d image\n",
minTimeSerial, width, height);
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCtasks);
memset(id, 0, width*height*sizeof(int)); writeImage(id, image, width, height, "rt-serial.ppm");
memset(image, 0, width*height*sizeof(float));
// return 0;
// Run 3 iterations with ispc + 1 core, record the minimum time
//
double minTimeISPCtasks = 1e30;
for (int i = 0; i < 3; ++i) {
reset_and_start_timer();
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = get_elapsed_mcycles();
minTimeISPCtasks = std::min(dt, minTimeISPCtasks);
}
printf("[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n",
minTimeISPCtasks, width, height);
writeImage(id, image, width, height, "rt-ispc-tasks.ppm");
memset(id, 0, width*height*sizeof(int));
memset(image, 0, width*height*sizeof(float));
//
// And 3 iterations with the serial implementation, reporting the
// minimum time.
//
double minTimeSerial = 1e30;
for (int i = 0; i < 3; ++i) {
reset_and_start_timer();
raytrace_serial(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = get_elapsed_mcycles();
minTimeSerial = std::min(dt, minTimeSerial);
}
printf("[rt serial]:\t\t\t[%.3f] million cycles for %d x %d image\n",
minTimeSerial, width, height);
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCtasks);
writeImage(id, image, width, height, "rt-serial.ppm");
return 0;
} }

View File

@@ -31,22 +31,32 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#define bool int
#ifdef __NVPTX__ #ifdef __NVPTX__
#warning "emitting DEVICE code" #warning "emitting DEVICE code"
#define programCount warpSize() #define programCount warpSize()
#define programIndex laneIndex() #define programIndex laneIndex()
#define taskIndex blockIndex0() #define taskIndex blockIndex0()
#define taskCount blockCount0()
#else #else
#warning "emitting HOST code" #warning "emitting HOST code"
#endif #endif
#define bool int
typedef float<3> float3; typedef float<3> float3;
#if 0
#define DIRISNEG
#endif
struct Ray { struct Ray {
float3 origin, dir, invDir; float3 origin, dir, invDir;
#ifdef DIRISNEG /* this fails to compile with nvvm */
uniform unsigned int dirIsNeg[3]; uniform unsigned int dirIsNeg[3];
#else
unsigned int dirIsNeg0, dirIsNeg1, dirIsNeg2;
#endif
float mint, maxt; float mint, maxt;
int hitId; int hitId;
}; };
@@ -80,7 +90,7 @@ static inline float Dot(const float3 a, const float3 b) {
} }
static void generateRay(uniform const float raster2camera[4][4], static inline void generateRay(uniform const float raster2camera[4][4],
uniform const float camera2world[4][4], uniform const float camera2world[4][4],
float x, float y, Ray &ray) { float x, float y, Ray &ray) {
ray.mint = 0.f; ray.mint = 0.f;
@@ -110,13 +120,19 @@ static void generateRay(uniform const float raster2camera[4][4],
ray.invDir = 1.f / ray.dir; ray.invDir = 1.f / ray.dir;
#ifdef DIRISNEG
ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0; ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0; ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0; ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0;
#else
ray.dirIsNeg0 = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg1 = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg2 = any(ray.invDir.z < 0) ? 1 : 0;
#endif
} }
static bool BBoxIntersect(const uniform float bounds[2][3], static inline bool BBoxIntersect(const uniform float bounds[2][3],
const Ray &ray) { const Ray &ray) {
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] }; uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] }; uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
@@ -155,7 +171,7 @@ static bool BBoxIntersect(const uniform float bounds[2][3],
static bool TriIntersect(const uniform Triangle &tri, Ray &ray) { static inline bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] }; uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] }; uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] }; uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
@@ -195,7 +211,7 @@ static bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
} }
bool BVHIntersect(const uniform LinearBVHNode nodes[], inline inline bool BVHIntersect(const uniform LinearBVHNode nodes[],
const uniform Triangle tris[], Ray &r) { const uniform Triangle tris[], Ray &r) {
Ray ray = r; Ray ray = r;
bool hit = false; bool hit = false;
@@ -206,9 +222,11 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
while (true) { while (true) {
// Check ray against BVH node // Check ray against BVH node
uniform LinearBVHNode node = nodes[nodeNum]; uniform LinearBVHNode node = nodes[nodeNum];
if (any(BBoxIntersect(node.bounds, ray))) { if (any(BBoxIntersect(node.bounds, ray)))
{
uniform unsigned int nPrimitives = node.nPrimitives; uniform unsigned int nPrimitives = node.nPrimitives;
if (nPrimitives > 0) { if (nPrimitives > 0)
{
// Intersect ray with primitives in leaf BVH node // Intersect ray with primitives in leaf BVH node
uniform unsigned int primitivesOffset = node.offset; uniform unsigned int primitivesOffset = node.offset;
for (uniform unsigned int i = 0; i < nPrimitives; ++i) { for (uniform unsigned int i = 0; i < nPrimitives; ++i) {
@@ -219,13 +237,24 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
break; break;
nodeNum = todo[--todoOffset]; nodeNum = todo[--todoOffset];
} }
else { else
{
// Put far BVH node on _todo_ stack, advance to near node // Put far BVH node on _todo_ stack, advance to near node
if (r.dirIsNeg[node.splitAxis]) { #ifdef DIRISNEG
const int dirIsNeg = r.dirIsNeg[node.splitAxis];
#else
int dirIsNeg;
if (node.splitAxis == 0) dirIsNeg = r.dirIsNeg0;
if (node.splitAxis == 1) dirIsNeg = r.dirIsNeg1;
if (node.splitAxis == 2) dirIsNeg = r.dirIsNeg2;
#endif
if (dirIsNeg)
{
todo[todoOffset++] = nodeNum + 1; todo[todoOffset++] = nodeNum + 1;
nodeNum = node.offset; nodeNum = node.offset;
} }
else { else
{
todo[todoOffset++] = node.offset; todo[todoOffset++] = node.offset;
nodeNum = nodeNum + 1; nodeNum = nodeNum + 1;
} }
@@ -244,7 +273,7 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
} }
static void raytrace_tile(uniform int x0, uniform int x1, static inline void raytrace_tile(uniform int x0, uniform int x1,
uniform int y0, uniform int y1, uniform int y0, uniform int y1,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight, uniform int baseWidth, uniform int baseHeight,
@@ -256,15 +285,21 @@ static void raytrace_tile(uniform int x0, uniform int x1,
uniform float widthScale = (float)(baseWidth) / (float)(width); uniform float widthScale = (float)(baseWidth) / (float)(width);
uniform float heightScale = (float)(baseHeight) / (float)(height); uniform float heightScale = (float)(baseHeight) / (float)(height);
foreach_tiled (y = y0 ... y1, x = x0 ... x1) { // foreach_tiled (y = y0 ... y1, x = x0 ... x1)
for (uniform int y = y0; y < y1; y++)
for (uniform int xb = x0; xb < x1; xb += programCount)
{
const int x = xb + programIndex;
Ray ray; Ray ray;
generateRay(raster2camera, camera2world, x*widthScale, generateRay(raster2camera, camera2world, x*widthScale, y*heightScale, ray);
y*heightScale, ray);
BVHIntersect(nodes, triangles, ray); BVHIntersect(nodes, triangles, ray);
int offset = y * width + x; int offset = y * width + x;
image[offset] = ray.maxt; if (x < x1)
id[offset] = ray.hitId; {
image[offset] = ray.maxt;
id[offset] = ray.hitId;
}
} }
} }
@@ -284,37 +319,75 @@ export void raytrace_ispc(uniform int width, uniform int height,
task void raytrace_tile_task(uniform int width, uniform int height, task void raytrace_tile_task(uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight, uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4], const uniform float _raster2camera[4][4],
const uniform float camera2world[4][4], const uniform float _camera2world[4][4],
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform int dx = 16, dy = 16; // must match dx, dy below if (taskIndex >= taskCount) return;
uniform int xBuckets = (width + (dx-1)) / dx;
uniform int x0 = (taskIndex % xBuckets) * dx;
uniform int x1 = min(x0 + dx, width);
uniform int y0 = (taskIndex / xBuckets) * dy;
uniform int y1 = min(y0 + dy, height);
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight, uniform float raster2camera[4][4];
raster2camera, camera2world, image, raster2camera[0][0] = _raster2camera[0][0];
id, nodes, triangles); raster2camera[0][1] = _raster2camera[0][1];
raster2camera[0][2] = _raster2camera[0][2];
raster2camera[0][3] = _raster2camera[0][3];
raster2camera[1][0] = _raster2camera[1][0];
raster2camera[1][1] = _raster2camera[1][1];
raster2camera[1][2] = _raster2camera[1][2];
raster2camera[1][3] = _raster2camera[1][3];
raster2camera[2][0] = _raster2camera[2][0];
raster2camera[2][1] = _raster2camera[2][1];
raster2camera[2][2] = _raster2camera[2][2];
raster2camera[2][3] = _raster2camera[2][3];
raster2camera[3][0] = _raster2camera[3][0];
raster2camera[3][1] = _raster2camera[3][1];
raster2camera[3][2] = _raster2camera[3][2];
raster2camera[3][3] = _raster2camera[3][3];
uniform float camera2world[4][4];
camera2world[0][0] = _camera2world[0][0];
camera2world[0][1] = _camera2world[0][1];
camera2world[0][2] = _camera2world[0][2];
camera2world[0][3] = _camera2world[0][3];
camera2world[1][0] = _camera2world[1][0];
camera2world[1][1] = _camera2world[1][1];
camera2world[1][2] = _camera2world[1][2];
camera2world[1][3] = _camera2world[1][3];
camera2world[2][0] = _camera2world[2][0];
camera2world[2][1] = _camera2world[2][1];
camera2world[2][2] = _camera2world[2][2];
camera2world[2][3] = _camera2world[2][3];
camera2world[3][0] = _camera2world[3][0];
camera2world[3][1] = _camera2world[3][1];
camera2world[3][2] = _camera2world[3][2];
camera2world[3][3] = _camera2world[3][3];
uniform int dx = 32, dy = 16; // must match dx, dy below
uniform int xBuckets = (width + (dx-1)) / dx;
uniform int x0 = (taskIndex % xBuckets) * dx;
uniform int x1 = min(x0 + dx, width);
uniform int y0 = (taskIndex / xBuckets) * dy;
uniform int y1 = min(y0 + dy, height);
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
raster2camera, camera2world, image,
id, nodes, triangles);
} }
export void raytrace_ispc_tasks(uniform int width, uniform int height, export void raytrace_ispc_tasks(uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight, uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4], const uniform float raster2camera[4][4],
const uniform float camera2world[4][4], const uniform float camera2world[4][4],
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform int dx = 16, dy = 16; uniform int dx = 32, dy = 16;
uniform int xBuckets = (width + (dx-1)) / dx; uniform int xBuckets = (width + (dx-1)) / dx;
uniform int yBuckets = (height + (dy-1)) / dy; uniform int yBuckets = (height + (dy-1)) / dy;
uniform int nTasks = xBuckets * yBuckets; uniform int nTasks = xBuckets * yBuckets;
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight, launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
raster2camera, camera2world, raster2camera, camera2world,
image, id, nodes, triangles); image, id, nodes, triangles);
} }

View File

@@ -59,9 +59,7 @@
#define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS
#define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED
#define ISPC_USE_CILK #define ISPC_USE_CILK
*/
#define ISPC_USE_OMP #define ISPC_USE_OMP
/*
#define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_TASK_GROUP
#define ISPC_USE_TBB_PARALLEL_FOR #define ISPC_USE_TBB_PARALLEL_FOR