+fixed rt.cpp to compile with nvvm

This commit is contained in:
Evghenii
2013-11-09 19:02:14 +01:00
parent d0ddec469a
commit 356e9c6810
4 changed files with 318 additions and 230 deletions

View File

@@ -3,6 +3,5 @@ EXAMPLE=rt
CPP_SRC=rt.cpp rt_serial.cpp CPP_SRC=rt.cpp rt_serial.cpp
ISPC_SRC=rt.ispc ISPC_SRC=rt.ispc
ISPC_IA_TARGETS=avx ISPC_IA_TARGETS=avx
ISPC_ARM_TARGETS=neon
include ../common.mk include ../common.mk

View File

@@ -47,6 +47,21 @@
#include "../timing.h" #include "../timing.h"
#include "rt_ispc.h" #include "rt_ispc.h"
#include <sys/time.h>
double rtc(void)
{
struct timeval Tvalue;
double etime;
struct timezone dummy;
gettimeofday(&Tvalue,&dummy);
etime = (double) Tvalue.tv_sec +
1.e-6*((double) Tvalue.tv_usec);
return etime;
}
using namespace ispc; using namespace ispc;
typedef unsigned int uint; typedef unsigned int uint;
@@ -211,6 +226,7 @@ int main(int argc, char *argv[]) {
// Run 3 iterations with ispc + 1 core, record the minimum time // Run 3 iterations with ispc + 1 core, record the minimum time
// //
double minTimeISPC = 1e30; double minTimeISPC = 1e30;
#if 0
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera, raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera,
@@ -222,6 +238,7 @@ int main(int argc, char *argv[]) {
minTimeISPC, width, height); minTimeISPC, width, height);
writeImage(id, image, width, height, "rt-ispc-1core.ppm"); writeImage(id, image, width, height, "rt-ispc-1core.ppm");
#endif
memset(id, 0, width*height*sizeof(int)); memset(id, 0, width*height*sizeof(int));
memset(image, 0, width*height*sizeof(float)); memset(image, 0, width*height*sizeof(float));
@@ -232,9 +249,10 @@ int main(int argc, char *argv[]) {
double minTimeISPCtasks = 1e30; double minTimeISPCtasks = 1e30;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
reset_and_start_timer(); reset_and_start_timer();
const double t0 = rtc();
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera, raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles); camera2world, image, id, nodes, triangles);
double dt = get_elapsed_mcycles(); double dt = rtc() - t0; //get_elapsed_mcycles();
minTimeISPCtasks = std::min(dt, minTimeISPCtasks); minTimeISPCtasks = std::min(dt, minTimeISPCtasks);
} }
printf("[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n", printf("[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n",

View File

@@ -31,22 +31,32 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#define bool int
#ifdef __NVPTX__ #ifdef __NVPTX__
#warning "emitting DEVICE code" #warning "emitting DEVICE code"
#define programCount warpSize() #define programCount warpSize()
#define programIndex laneIndex() #define programIndex laneIndex()
#define taskIndex blockIndex0() #define taskIndex blockIndex0()
#define taskCount blockCount0()
#else #else
#warning "emitting HOST code" #warning "emitting HOST code"
#endif #endif
#define bool int
typedef float<3> float3; typedef float<3> float3;
#if 0
#define DIRISNEG
#endif
struct Ray { struct Ray {
float3 origin, dir, invDir; float3 origin, dir, invDir;
#ifdef DIRISNEG /* this fails to compile with nvvm */
uniform unsigned int dirIsNeg[3]; uniform unsigned int dirIsNeg[3];
#else
unsigned int dirIsNeg0, dirIsNeg1, dirIsNeg2;
#endif
float mint, maxt; float mint, maxt;
int hitId; int hitId;
}; };
@@ -80,7 +90,7 @@ static inline float Dot(const float3 a, const float3 b) {
} }
static void generateRay(uniform const float raster2camera[4][4], static inline void generateRay(uniform const float raster2camera[4][4],
uniform const float camera2world[4][4], uniform const float camera2world[4][4],
float x, float y, Ray &ray) { float x, float y, Ray &ray) {
ray.mint = 0.f; ray.mint = 0.f;
@@ -110,13 +120,19 @@ static void generateRay(uniform const float raster2camera[4][4],
ray.invDir = 1.f / ray.dir; ray.invDir = 1.f / ray.dir;
#ifdef DIRISNEG
ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0; ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0; ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0; ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0;
#else
ray.dirIsNeg0 = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg1 = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg2 = any(ray.invDir.z < 0) ? 1 : 0;
#endif
} }
static bool BBoxIntersect(const uniform float bounds[2][3], static inline bool BBoxIntersect(const uniform float bounds[2][3],
const Ray &ray) { const Ray &ray) {
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] }; uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] }; uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
@@ -155,7 +171,7 @@ static bool BBoxIntersect(const uniform float bounds[2][3],
static bool TriIntersect(const uniform Triangle &tri, Ray &ray) { static inline bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] }; uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] }; uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] }; uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
@@ -195,7 +211,7 @@ static bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
} }
bool BVHIntersect(const uniform LinearBVHNode nodes[], inline inline bool BVHIntersect(const uniform LinearBVHNode nodes[],
const uniform Triangle tris[], Ray &r) { const uniform Triangle tris[], Ray &r) {
Ray ray = r; Ray ray = r;
bool hit = false; bool hit = false;
@@ -206,9 +222,11 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
while (true) { while (true) {
// Check ray against BVH node // Check ray against BVH node
uniform LinearBVHNode node = nodes[nodeNum]; uniform LinearBVHNode node = nodes[nodeNum];
if (any(BBoxIntersect(node.bounds, ray))) { if (any(BBoxIntersect(node.bounds, ray)))
{
uniform unsigned int nPrimitives = node.nPrimitives; uniform unsigned int nPrimitives = node.nPrimitives;
if (nPrimitives > 0) { if (nPrimitives > 0)
{
// Intersect ray with primitives in leaf BVH node // Intersect ray with primitives in leaf BVH node
uniform unsigned int primitivesOffset = node.offset; uniform unsigned int primitivesOffset = node.offset;
for (uniform unsigned int i = 0; i < nPrimitives; ++i) { for (uniform unsigned int i = 0; i < nPrimitives; ++i) {
@@ -219,13 +237,24 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
break; break;
nodeNum = todo[--todoOffset]; nodeNum = todo[--todoOffset];
} }
else { else
{
// Put far BVH node on _todo_ stack, advance to near node // Put far BVH node on _todo_ stack, advance to near node
if (r.dirIsNeg[node.splitAxis]) { #ifdef DIRISNEG
const int dirIsNeg = r.dirIsNeg[node.splitAxis];
#else
int dirIsNeg;
if (node.splitAxis == 0) dirIsNeg = r.dirIsNeg0;
if (node.splitAxis == 1) dirIsNeg = r.dirIsNeg1;
if (node.splitAxis == 2) dirIsNeg = r.dirIsNeg2;
#endif
if (dirIsNeg)
{
todo[todoOffset++] = nodeNum + 1; todo[todoOffset++] = nodeNum + 1;
nodeNum = node.offset; nodeNum = node.offset;
} }
else { else
{
todo[todoOffset++] = node.offset; todo[todoOffset++] = node.offset;
nodeNum = nodeNum + 1; nodeNum = nodeNum + 1;
} }
@@ -244,7 +273,7 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
} }
static void raytrace_tile(uniform int x0, uniform int x1, static inline void raytrace_tile(uniform int x0, uniform int x1,
uniform int y0, uniform int y1, uniform int y0, uniform int y1,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight, uniform int baseWidth, uniform int baseHeight,
@@ -256,17 +285,23 @@ static void raytrace_tile(uniform int x0, uniform int x1,
uniform float widthScale = (float)(baseWidth) / (float)(width); uniform float widthScale = (float)(baseWidth) / (float)(width);
uniform float heightScale = (float)(baseHeight) / (float)(height); uniform float heightScale = (float)(baseHeight) / (float)(height);
foreach_tiled (y = y0 ... y1, x = x0 ... x1) { // foreach_tiled (y = y0 ... y1, x = x0 ... x1)
for (uniform int y = y0; y < y1; y++)
for (uniform int xb = x0; xb < x1; xb += programCount)
{
const int x = xb + programIndex;
Ray ray; Ray ray;
generateRay(raster2camera, camera2world, x*widthScale, generateRay(raster2camera, camera2world, x*widthScale, y*heightScale, ray);
y*heightScale, ray);
BVHIntersect(nodes, triangles, ray); BVHIntersect(nodes, triangles, ray);
int offset = y * width + x; int offset = y * width + x;
if (x < x1)
{
image[offset] = ray.maxt; image[offset] = ray.maxt;
id[offset] = ray.hitId; id[offset] = ray.hitId;
} }
} }
}
export void raytrace_ispc(uniform int width, uniform int height, export void raytrace_ispc(uniform int width, uniform int height,
@@ -284,12 +319,50 @@ export void raytrace_ispc(uniform int width, uniform int height,
task void raytrace_tile_task(uniform int width, uniform int height, task void raytrace_tile_task(uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight, uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4], const uniform float _raster2camera[4][4],
const uniform float camera2world[4][4], const uniform float _camera2world[4][4],
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform int dx = 16, dy = 16; // must match dx, dy below if (taskIndex >= taskCount) return;
uniform float raster2camera[4][4];
raster2camera[0][0] = _raster2camera[0][0];
raster2camera[0][1] = _raster2camera[0][1];
raster2camera[0][2] = _raster2camera[0][2];
raster2camera[0][3] = _raster2camera[0][3];
raster2camera[1][0] = _raster2camera[1][0];
raster2camera[1][1] = _raster2camera[1][1];
raster2camera[1][2] = _raster2camera[1][2];
raster2camera[1][3] = _raster2camera[1][3];
raster2camera[2][0] = _raster2camera[2][0];
raster2camera[2][1] = _raster2camera[2][1];
raster2camera[2][2] = _raster2camera[2][2];
raster2camera[2][3] = _raster2camera[2][3];
raster2camera[3][0] = _raster2camera[3][0];
raster2camera[3][1] = _raster2camera[3][1];
raster2camera[3][2] = _raster2camera[3][2];
raster2camera[3][3] = _raster2camera[3][3];
uniform float camera2world[4][4];
camera2world[0][0] = _camera2world[0][0];
camera2world[0][1] = _camera2world[0][1];
camera2world[0][2] = _camera2world[0][2];
camera2world[0][3] = _camera2world[0][3];
camera2world[1][0] = _camera2world[1][0];
camera2world[1][1] = _camera2world[1][1];
camera2world[1][2] = _camera2world[1][2];
camera2world[1][3] = _camera2world[1][3];
camera2world[2][0] = _camera2world[2][0];
camera2world[2][1] = _camera2world[2][1];
camera2world[2][2] = _camera2world[2][2];
camera2world[2][3] = _camera2world[2][3];
camera2world[3][0] = _camera2world[3][0];
camera2world[3][1] = _camera2world[3][1];
camera2world[3][2] = _camera2world[3][2];
camera2world[3][3] = _camera2world[3][3];
uniform int dx = 32, dy = 16; // must match dx, dy below
uniform int xBuckets = (width + (dx-1)) / dx; uniform int xBuckets = (width + (dx-1)) / dx;
uniform int x0 = (taskIndex % xBuckets) * dx; uniform int x0 = (taskIndex % xBuckets) * dx;
uniform int x1 = min(x0 + dx, width); uniform int x1 = min(x0 + dx, width);
@@ -309,7 +382,7 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height,
uniform float image[], uniform int id[], uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[], const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) { const uniform Triangle triangles[]) {
uniform int dx = 16, dy = 16; uniform int dx = 32, dy = 16;
uniform int xBuckets = (width + (dx-1)) / dx; uniform int xBuckets = (width + (dx-1)) / dx;
uniform int yBuckets = (height + (dy-1)) / dy; uniform int yBuckets = (height + (dy-1)) / dy;
uniform int nTasks = xBuckets * yBuckets; uniform int nTasks = xBuckets * yBuckets;

View File

@@ -59,9 +59,7 @@
#define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS
#define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED
#define ISPC_USE_CILK #define ISPC_USE_CILK
*/
#define ISPC_USE_OMP #define ISPC_USE_OMP
/*
#define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_TASK_GROUP
#define ISPC_USE_TBB_PARALLEL_FOR #define ISPC_USE_TBB_PARALLEL_FOR