This commit is contained in:
Evghenii
2014-02-21 09:00:22 +01:00
parent a85972ffbc
commit 3fc95779ac
15 changed files with 976 additions and 388 deletions

2
examples/portable/rt/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
rt
*.ppm

View File

@@ -0,0 +1,8 @@
EXAMPLE=rt
CPP_SRC=rt.cpp
ISPC_SRC=rt.ispc
ISPC_IA_TARGETS=avx1-i32x8
ISPC_ARM_TARGETS=neon
include ../common_cpu.mk

View File

@@ -0,0 +1,13 @@
PROG=rt
ISPC_SRC=rt.ispc
CU_SRC=rt.cu
CXX_SRC=rt.cpp
PTXCC_REGMAX=32
#LLVM_GPU=1
NVVM_GPU=1
include ../common_ptx.mk

Binary file not shown.

Binary file not shown.

229
examples/portable/rt/rt.cpp Normal file
View File

@@ -0,0 +1,229 @@
/*
Copyright (c) 2010-2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#define NOMINMAX
#pragma warning (disable: 4244)
#pragma warning (disable: 4305)
#endif
#include <cstdio>
#include <cmath>
#include <algorithm>
#include <cassert>
#include <cstring>
#include <sys/types.h>
#include "timing.h"
#include "rt_ispc.h"
#include "ispc_malloc.h"
using namespace ispc;
typedef unsigned int uint;
static void writeImage(int *idImage, float *depthImage, int width, int height,
const char *filename) {
FILE *f = fopen(filename, "wb");
if (!f) {
perror(filename);
exit(1);
}
fprintf(f, "P6\n%d %d\n255\n", width, height);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
// use the bits from the object id of the hit object to make a
// random color
int id = idImage[y * width + x];
unsigned char r = 0, g = 0, b = 0;
for (int i = 0; i < 8; ++i) {
// extract bit 3*i for red, 3*i+1 for green, 3*i+2 for blue
int rbit = (id & (1 << (3*i))) >> (3*i);
int gbit = (id & (1 << (3*i+1))) >> (3*i+1);
int bbit = (id & (1 << (3*i+2))) >> (3*i+2);
// and then set the bits of the colors starting from the
// high bits...
r |= rbit << (7-i);
g |= gbit << (7-i);
b |= bbit << (7-i);
}
fputc(r, f);
fputc(g, f);
fputc(b, f);
}
}
fclose(f);
printf("Wrote image file %s\n", filename);
}
static void usage() {
fprintf(stderr, "rt <scene name base> [--scale=<factor>] [ispc iterations] [tasks iterations] [serial iterations]\n");
exit(1);
}
int main(int argc, char *argv[]) {
static unsigned int test_iterations[] = {3, 7, 1};
float scale = 1.f;
const char *filename = NULL;
if (argc < 2) usage();
filename = argv[1];
if (argc > 2) {
if (strncmp(argv[2], "--scale=", 8) == 0) {
scale = atof(argv[2] + 8);
}
}
if ((argc == 6) || (argc == 5)) {
for (int i = 0; i < 3; i++) {
test_iterations[i] = atoi(argv[argc - 3 + i]);
}
}
#define READ(var, n) \
if (fread(&(var), sizeof(var), n, f) != (unsigned int)n) { \
fprintf(stderr, "Unexpected EOF reading scene file\n"); \
return 1; \
} else /* eat ; */
//
// Read the camera specification information from the camera file
//
char fnbuf[1024];
sprintf(fnbuf, "%s.camera", filename);
FILE *f = fopen(fnbuf, "rb");
if (!f) {
perror(fnbuf);
return 1;
}
//
// Nothing fancy, and trouble if we run on a big-endian system, just
// fread in the bits
//
int baseWidth, baseHeight;
// float camera2world[4][4], raster2camera[4][4];
float *camera2world_ispc = new float[4*4];
float *raster2camera_ispc = new float[4*4];
float (*camera2world )[4] = (float (*)[4])camera2world_ispc;
float (*raster2camera)[4] = (float (*)[4])raster2camera_ispc;
READ(baseWidth, 1);
READ(baseHeight, 1);
READ(camera2world[0][0], 16);
READ(raster2camera[0][0], 16);
//
// Read in the serialized BVH
//
sprintf(fnbuf, "%s.bvh", filename);
f = fopen(fnbuf, "rb");
if (!f) {
perror(fnbuf);
return 1;
}
// The BVH file starts with an int that gives the total number of BVH
// nodes
uint nNodes;
READ(nNodes, 1);
LinearBVHNode *nodes = new LinearBVHNode[nNodes];
for (unsigned int i = 0; i < nNodes; ++i) {
// Each node is 6x floats for a boox, then an integer for an offset
// to the second child node, then an integer that encodes the type
// of node, the total number of int it if a leaf node, etc.
float b[6];
READ(b[0], 6);
nodes[i].bounds[0][0] = b[0];
nodes[i].bounds[0][1] = b[1];
nodes[i].bounds[0][2] = b[2];
nodes[i].bounds[1][0] = b[3];
nodes[i].bounds[1][1] = b[4];
nodes[i].bounds[1][2] = b[5];
READ(nodes[i].offset, 1);
READ(nodes[i].nPrimitives, 1);
READ(nodes[i].splitAxis, 1);
READ(nodes[i].pad, 1);
}
// And then read the triangles
uint nTris;
READ(nTris, 1);
Triangle *triangles = new Triangle[nTris];
for (uint i = 0; i < nTris; ++i) {
// 9x floats for the 3 vertices
float v[9];
READ(v[0], 9);
float *vp = v;
for (int j = 0; j < 3; ++j) {
triangles[i].p[j][0] = *vp++;
triangles[i].p[j][1] = *vp++;
triangles[i].p[j][2] = *vp++;
}
// And create an object id
triangles[i].id = i+1;
}
fclose(f);
int height = int(baseHeight * scale);
int width = int(baseWidth * scale);
// allocate images; one to hold hit object ids, one to hold depth to
// the first interseciton
int *id = new int[width*height];
float *image = new float[width*height];
ispc_memset(id, 0, width*height*sizeof(int));
ispc_memset(image, 0, width*height*sizeof(float));
//
// Run 3 iterations with ispc + 1 core, record the minimum time
//
double minTimeISPCtasks = 1e30;
for (int i = 0; i < test_iterations[1]; ++i) {
reset_and_start_timer();
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
camera2world, image, id, nodes, triangles);
double dt = get_elapsed_msec();
printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec\n", dt);
minTimeISPCtasks = std::min(dt, minTimeISPCtasks);
}
printf("[rt ispc + tasks]:\t\t[%.3f] msec for %d x %d image\n",
minTimeISPCtasks, width, height);
writeImage(id, image, width, height, "rt-ispc-tasks.ppm");
return 0;
}

373
examples/portable/rt/rt.cu Normal file
View File

@@ -0,0 +1,373 @@
#include "cuda_helpers.cuh"
#define float3 Float3
struct Float3
{
float x,y,z;
__device__ friend Float3 operator+(const Float3 a, const Float3 b)
{
Float3 c;
c.x = a.x+b.x;
c.y = a.y+b.y;
c.z = a.z+b.z;
return c;
}
__device__ friend Float3 operator-(const Float3 a, const Float3 b)
{
Float3 c;
c.x = a.x-b.x;
c.y = a.y-b.y;
c.z = a.z-b.z;
return c;
}
__device__ friend Float3 operator/(const Float3 a, const Float3 b)
{
Float3 c;
c.x = a.x/b.x;
c.y = a.y/b.y;
c.z = a.z/b.z;
return c;
}
__device__ friend Float3 operator/(const float a, const Float3 b)
{
Float3 c;
c.x = a/b.x;
c.y = a/b.y;
c.z = a/b.z;
return c;
}
__device__ friend Float3 operator*(const Float3 a, const Float3 b)
{
Float3 c;
c.x = a.x*b.x;
c.y = a.y*b.y;
c.z = a.z*b.z;
return c;
}
__device__ friend Float3 operator*(const Float3 a, const float b)
{
Float3 c;
c.x = a.x*b;
c.y = a.y*b;
c.z = a.z*b;
return c;
}
};
#define int8 char
#define int16 short
struct Ray {
float3 origin, dir, invDir;
unsigned int dirIsNeg0, dirIsNeg1, dirIsNeg2;
float mint, maxt;
int hitId;
};
struct Triangle {
float p[3][4];
int id;
int pad[3];
};
struct LinearBVHNode {
float bounds[2][3];
unsigned int offset; // num primitives for leaf, second child for interior
unsigned int8 nPrimitives;
unsigned int8 splitAxis;
unsigned int16 pad;
};
__device__
static inline float3 Cross(const float3 v1, const float3 v2) {
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
float3 ret;
ret.x = (v1y * v2z) - (v1z * v2y);
ret.y = (v1z * v2x) - (v1x * v2z);
ret.z = (v1x * v2y) - (v1y * v2x);
return ret;
}
__device__
static inline float Dot(const float3 a, const float3 b) {
return a.x * b.x + a.y * b.y + a.z * b.z;
}
__device__
inline
static void generateRay( const float raster2camera[4][4],
const float camera2world[4][4],
float x, float y, Ray &ray) {
ray.mint = 0.f;
ray.maxt = 1e30f;
ray.hitId = 0;
// transform raster coordinate (x, y, 0) to camera space
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
float camz = raster2camera[2][3];
float camw = raster2camera[3][3];
camx /= camw;
camy /= camw;
camz /= camw;
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy +
camera2world[0][2] * camz;
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy +
camera2world[1][2] * camz;
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy +
camera2world[2][2] * camz;
ray.origin.x = camera2world[0][3] / camera2world[3][3];
ray.origin.y = camera2world[1][3] / camera2world[3][3];
ray.origin.z = camera2world[2][3] / camera2world[3][3];
ray.invDir = 1.f / ray.dir;
#if 0
ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0;
#else
ray.dirIsNeg0 = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg1 = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg2 = any(ray.invDir.z < 0) ? 1 : 0;
#endif
}
__device__
inline
static bool BBoxIntersect(const float bounds[2][3],
const Ray &ray) {
float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
float t0 = ray.mint, t1 = ray.maxt;
// Check all three axis-aligned slabs. Don't try to early out; it's
// not worth the trouble
float3 tNear = (bounds0 - ray.origin) * ray.invDir;
float3 tFar = (bounds1 - ray.origin) * ray.invDir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
tFar.x = tmp;
}
t0 = max(tNear.x, t0);
t1 = min(tFar.x, t1);
if (tNear.y > tFar.y) {
float tmp = tNear.y;
tNear.y = tFar.y;
tFar.y = tmp;
}
t0 = max(tNear.y, t0);
t1 = min(tFar.y, t1);
if (tNear.z > tFar.z) {
float tmp = tNear.z;
tNear.z = tFar.z;
tFar.z = tmp;
}
t0 = max(tNear.z, t0);
t1 = min(tFar.z, t1);
return (t0 <= t1);
}
__device__
inline
static bool TriIntersect(const Triangle &tri, Ray &ray) {
float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
float3 e1 = p1 - p0;
float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
bool hit = true;
if (divisor == 0.)
hit = false;
float invDivisor = 1.f / divisor;
// Compute first barycentric coordinate
float3 d = ray.origin - p0;
float b1 = Dot(d, s1) * invDivisor;
if (b1 < 0. || b1 > 1.)
hit = false;
// Compute second barycentric coordinate
float3 s2 = Cross(d, e1);
float b2 = Dot(ray.dir, s2) * invDivisor;
if (b2 < 0. || b1 + b2 > 1.)
hit = false;
// Compute _t_ to intersection point
float t = Dot(e2, s2) * invDivisor;
if (t < ray.mint || t > ray.maxt)
hit = false;
if (hit) {
ray.maxt = t;
ray.hitId = tri.id;
}
return hit;
}
__device__
inline
bool BVHIntersect(const LinearBVHNode nodes[],
const Triangle tris[], Ray &r,
int todo[]) {
Ray ray = r;
bool hit = false;
// Follow ray through BVH nodes to find primitive intersections
int todoOffset = 0, nodeNum = 0;
while (true) {
// Check ray against BVH node
LinearBVHNode node = nodes[nodeNum];
if (any(BBoxIntersect(node.bounds, ray))) {
unsigned int nPrimitives = node.nPrimitives;
if (nPrimitives > 0) {
// Intersect ray with primitives in leaf BVH node
unsigned int primitivesOffset = node.offset;
for ( unsigned int i = 0; i < nPrimitives; ++i) {
if (TriIntersect(tris[primitivesOffset+i], ray))
hit = true;
}
if (todoOffset == 0)
break;
nodeNum = todo[--todoOffset];
}
else {
// Put far BVH node on _todo_ stack, advance to near node
int dirIsNeg;
if (node.splitAxis == 0) dirIsNeg = r.dirIsNeg0;
if (node.splitAxis == 1) dirIsNeg = r.dirIsNeg1;
if (node.splitAxis == 2) dirIsNeg = r.dirIsNeg2;
if (dirIsNeg) {
todo[todoOffset++] = nodeNum + 1;
nodeNum = node.offset;
}
else {
todo[todoOffset++] = node.offset;
nodeNum = nodeNum + 1;
}
}
}
else {
if (todoOffset == 0)
break;
nodeNum = todo[--todoOffset];
}
}
r.maxt = ray.maxt;
r.hitId = ray.hitId;
return hit;
}
__device__
inline
static void raytrace_tile( int x0, int x1,
int y0, int y1,
int width, int height,
int baseWidth, int baseHeight,
const float raster2camera[4][4],
const float camera2world[4][4],
float image[], int id[],
const LinearBVHNode nodes[],
const Triangle triangles[]) {
float widthScale = (float)(baseWidth) / (float)(width);
float heightScale = (float)(baseHeight) / (float)(height);
#if 0
int * todo = new int[64];
#define ALLOC
#else
int todo[64];
#endif
for (int y = y0 ;y < y1; y++)
for (int x = x0 + programIndex; x < x1; x += programCount)
if (x < x1)
{
Ray ray;
generateRay(raster2camera, camera2world, x*widthScale,
y*heightScale, ray);
BVHIntersect(nodes, triangles, ray, todo);
int offset = y * width + x;
image[offset] = ray.maxt;
id[offset] = ray.hitId;
}
#ifdef ALLOC
delete todo;
#endif
}
__global__
void raytrace_tile_task( int width, int height,
int baseWidth, int baseHeight,
const float raster2camera[4][4],
const float camera2world[4][4],
float image[], int id[],
const LinearBVHNode nodes[],
const Triangle triangles[]) {
int dx = 64, dy = 8; // must match dx, dy below
int xBuckets = (width + (dx-1)) / dx;
int x0 = (taskIndex % xBuckets) * dx;
int x1 = min(x0 + dx, width);
int y0 = (taskIndex / xBuckets) * dy;
int y1 = min(y0 + dy, height);
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
raster2camera, camera2world, image,
id, nodes, triangles);
}
extern "C" __global__ void raytrace_ispc_tasks___export( int width, int height,
int baseWidth, int baseHeight,
const float raster2camera[4][4],
const float camera2world[4][4],
float image[], int id[],
const LinearBVHNode nodes[],
const Triangle triangles[]) {
int dx = 64, dy = 8;
int xBuckets = (width + (dx-1)) / dx;
int yBuckets = (height + (dy-1)) / dy;
int nTasks = xBuckets * yBuckets;
launch(nTasks,1,1,raytrace_tile_task)
(width, height, baseWidth, baseHeight,
raster2camera, camera2world,
image, id, nodes, triangles);
cudaDeviceSynchronize();
}
extern "C" __host__ void raytrace_ispc_tasks( int width, int height,
int baseWidth, int baseHeight,
const float raster2camera[4][4],
const float camera2world[4][4],
float image[], int id[],
const LinearBVHNode nodes[],
const Triangle triangles[]) {
raytrace_ispc_tasks___export<<<1,32>>>( width, height,
baseWidth, baseHeight,
raster2camera,
camera2world,
image, id,
nodes,
triangles);
cudaDeviceSynchronize();
}

View File

@@ -0,0 +1,351 @@
/*
Copyright (c) 2010-2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if 1
typedef int bool_t;
#else
typedef bool bool_t;
#endif
typedef float<3> float3;
#ifdef __NVPTX__
#define uniform_t varying
#else
#define uniform_t uniform
#endif
struct int3
{
int x,y,z;
};
struct Ray {
float3 origin, dir, invDir;
uniform unsigned int dirIsNeg[3];
float mint, maxt;
int hitId;
};
struct Triangle {
float p[3][4];
int id;
int pad[3];
};
struct LinearBVHNode {
float bounds[2][3];
unsigned int offset; // num primitives for leaf, second child for interior
unsigned int8 nPrimitives;
unsigned int8 splitAxis;
unsigned int16 pad;
};
static inline float3 Cross(const float3 v1, const float3 v2) {
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
float3 ret;
ret.x = (v1y * v2z) - (v1z * v2y);
ret.y = (v1z * v2x) - (v1x * v2z);
ret.z = (v1x * v2y) - (v1y * v2x);
return ret;
}
static inline float Dot(const float3 a, const float3 b) {
return a.x * b.x + a.y * b.y + a.z * b.z;
}
#if 1
inline
#endif
static void generateRay(uniform const float raster2camera[4][4],
uniform const float camera2world[4][4],
float x, float y, Ray &ray) {
ray.mint = 0.f;
ray.maxt = 1e30f;
ray.hitId = 0;
// transform raster coordinate (x, y, 0) to camera space
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
float camz = raster2camera[2][3];
float camw = raster2camera[3][3];
camx /= camw;
camy /= camw;
camz /= camw;
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy +
camera2world[0][2] * camz;
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy +
camera2world[1][2] * camz;
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy +
camera2world[2][2] * camz;
ray.origin.x = camera2world[0][3] / camera2world[3][3];
ray.origin.y = camera2world[1][3] / camera2world[3][3];
ray.origin.z = camera2world[2][3] / camera2world[3][3];
ray.invDir = 1.f / ray.dir;
ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0;
}
#if 1
inline
#endif
static bool_t BBoxIntersect(const uniform float bounds[2][3],
const Ray &ray) {
const uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
const uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
float t0 = ray.mint, t1 = ray.maxt;
// Check all three axis-aligned slabs. Don't try to early out; it's
// not worth the trouble
float3 tNear = (bounds0 - ray.origin) * ray.invDir;
float3 tFar = (bounds1 - ray.origin) * ray.invDir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
tFar.x = tmp;
}
t0 = max(tNear.x, t0);
t1 = min(tFar.x, t1);
if (tNear.y > tFar.y) {
float tmp = tNear.y;
tNear.y = tFar.y;
tFar.y = tmp;
}
t0 = max(tNear.y, t0);
t1 = min(tFar.y, t1);
if (tNear.z > tFar.z) {
float tmp = tNear.z;
tNear.z = tFar.z;
tFar.z = tmp;
}
t0 = max(tNear.z, t0);
t1 = min(tFar.z, t1);
return (t0 <= t1);
}
#if 1
inline
#endif
static bool_t TriIntersect(const uniform_t Triangle tri, Ray &ray) {
const uniform_t float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
const uniform_t float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
const uniform_t float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
const uniform_t float3 e1 = p1 - p0;
const uniform_t float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
bool_t hit = true;
if (divisor == 0.)
hit = false;
float invDivisor = 1.f / divisor;
// Compute first barycentric coordinate
float3 d = ray.origin - p0;
float b1 = Dot(d, s1) * invDivisor;
if (b1 < 0. || b1 > 1.)
hit = false;
// Compute second barycentric coordinate
float3 s2 = Cross(d, e1);
float b2 = Dot(ray.dir, s2) * invDivisor;
if (b2 < 0. || b1 + b2 > 1.)
hit = false;
// Compute _t_ to intersection point
float t = Dot(e2, s2) * invDivisor;
if (t < ray.mint || t > ray.maxt)
hit = false;
if (hit) {
ray.maxt = t;
ray.hitId = tri.id;
}
return hit;
}
#if 1
inline
#endif
bool_t
BVHIntersect(const uniform LinearBVHNode nodes[],
const uniform Triangle tris[], Ray &r) {
Ray ray = r;
bool_t hit = false;
// Follow ray through BVH nodes to find primitive intersections
uniform int todoOffset = 0, nodeNum = 0;
uniform int todo[64];
while (true) {
// Check ray against BVH node
const uniform LinearBVHNode node = nodes[nodeNum];
if (any(BBoxIntersect(node.bounds, ray))) {
const uniform unsigned int nPrimitives = node.nPrimitives;
if (nPrimitives > 0) {
// Intersect ray with primitives in leaf BVH node
const uniform unsigned int primitivesOffset = node.offset;
for (uniform_t unsigned int i = 0; i < nPrimitives; ++i) {
if (TriIntersect(tris[primitivesOffset+i], ray))
hit = true;
}
if (todoOffset == 0)
break;
nodeNum = todo[--todoOffset];
}
else {
// Put far BVH node on _todo_ stack, advance to near node
#if 0 /* fails */
int dirIsNeg = r.dirIsNeg[node.splitAxis];
#else
int dirIsNeg;
if (node.splitAxis == 0) dirIsNeg = r.dirIsNeg[0];
if (node.splitAxis == 1) dirIsNeg = r.dirIsNeg[1];
if (node.splitAxis == 2) dirIsNeg = r.dirIsNeg[2];
#endif
if (dirIsNeg) {
todo[todoOffset++] = nodeNum + 1;
nodeNum = node.offset;
}
else {
todo[todoOffset++] = node.offset;
nodeNum = nodeNum + 1;
}
}
}
else {
if (todoOffset == 0)
break;
nodeNum = todo[--todoOffset];
}
}
r.maxt = ray.maxt;
r.hitId = ray.hitId;
return hit;
}
#if 1
inline
#endif
static void raytrace_tile(uniform int x0, uniform int x1,
uniform int y0, uniform int y1,
uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) {
const uniform float widthScale = (float)(baseWidth) / (float)(width);
const uniform float heightScale = (float)(baseHeight) / (float)(height);
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
Ray ray;
generateRay(raster2camera, camera2world, x*widthScale,
y*heightScale, ray);
BVHIntersect(nodes, triangles, ray);
int offset = y * width + x;
image[offset] = ray.maxt;
id[offset] = ray.hitId;
}
}
export void raytrace_ispc(uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) {
raytrace_tile(0, width, 0, height, width, height, baseWidth, baseHeight,
raster2camera, camera2world, image,
id, nodes, triangles);
}
task void raytrace_tile_task(uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) {
const uniform int dx = 64, dy = 8; // must match dx, dy below
const uniform int xBuckets = (width + (dx-1)) / dx;
const uniform int x0 = (taskIndex % xBuckets) * dx;
const uniform int x1 = min(x0 + dx, width);
const uniform int y0 = (taskIndex / xBuckets) * dy;
const uniform int y1 = min(y0 + dy, height);
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
raster2camera, camera2world, image,
id, nodes, triangles);
}
export void raytrace_ispc_tasks(uniform int width, uniform int height,
uniform int baseWidth, uniform int baseHeight,
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform float image[], uniform int id[],
const uniform LinearBVHNode nodes[],
const uniform Triangle triangles[]) {
const uniform int dx = 64, dy = 8;
const uniform int xBuckets = (width + (dx-1)) / dx;
const uniform int yBuckets = (height + (dy-1)) / dy;
const uniform int nTasks = xBuckets * yBuckets;
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
raster2camera, camera2world,
image, id, nodes, triangles);
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,13 +0,0 @@
PROG=volume
ISPC_SRC=volume.ispc
CU_SRC=volume.cu
CXX_SRC=volume.cpp
PTXCC_REGMAX=64
LLVM_GPU=1
NVVM_GPU=1
include ../common_gpu.mk

View File

@@ -1,341 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
typedef float<3> float3;
struct Ray {
float3 origin, dir;
};
static void
generateRay(const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
float x, float y, Ray &ray) {
// transform raster coordinate (x, y, 0) to camera space
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
float camz = raster2camera[2][3];
float camw = raster2camera[3][3];
camx /= camw;
camy /= camw;
camz /= camw;
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
ray.origin.x = camera2world[0][3] / camera2world[3][3];
ray.origin.y = camera2world[1][3] / camera2world[3][3];
ray.origin.z = camera2world[2][3] / camera2world[3][3];
}
static inline bool
Inside(float3 p, float3 pMin, float3 pMax) {
return (p.x >= pMin.x && p.x <= pMax.x &&
p.y >= pMin.y && p.y <= pMax.y &&
p.z >= pMin.z && p.z <= pMax.z);
}
static bool
IntersectP(Ray ray, float3 pMin, float3 pMax, float &hit0, float &hit1) {
float t0 = -1e30, t1 = 1e30;
float3 tNear = (pMin - ray.origin) / ray.dir;
float3 tFar = (pMax - ray.origin) / ray.dir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
tFar.x = tmp;
}
t0 = max(tNear.x, t0);
t1 = min(tFar.x, t1);
if (tNear.y > tFar.y) {
float tmp = tNear.y;
tNear.y = tFar.y;
tFar.y = tmp;
}
t0 = max(tNear.y, t0);
t1 = min(tFar.y, t1);
if (tNear.z > tFar.z) {
float tmp = tNear.z;
tNear.z = tFar.z;
tFar.z = tmp;
}
t0 = max(tNear.z, t0);
t1 = min(tFar.z, t1);
if (t0 <= t1) {
hit0 = t0;
hit1 = t1;
return true;
}
else
return false;
}
static inline float Lerp(float t, float a, float b) {
return (1.f - t) * a + t * b;
}
static inline float D(int x, int y, int z, uniform int nVoxels[3],
uniform float density[]) {
x = clamp(x, 0, nVoxels[0]-1);
y = clamp(y, 0, nVoxels[1]-1);
z = clamp(z, 0, nVoxels[2]-1);
return density[z*nVoxels[0]*nVoxels[1] + y*nVoxels[0] + x];
}
static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
return (p - pMin) / (pMax - pMin);
}
static float Density(float3 Pobj, float3 pMin, float3 pMax,
uniform float density[], uniform int nVoxels[3]) {
if (!Inside(Pobj, pMin, pMax))
return 0;
// Compute voxel coordinates and offsets for _Pobj_
float3 vox = Offset(Pobj, pMin, pMax);
vox.x = vox.x * nVoxels[0] - .5f;
vox.y = vox.y * nVoxels[1] - .5f;
vox.z = vox.z * nVoxels[2] - .5f;
int vx = (int)(vox.x), vy = (int)(vox.y), vz = (int)(vox.z);
float dx = vox.x - vx, dy = vox.y - vy, dz = vox.z - vz;
// Trilinearly interpolate density values to compute local density
float d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
D(vx+1, vy, vz, nVoxels, density));
float d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
D(vx+1, vy+1, vz, nVoxels, density));
float d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
D(vx+1, vy, vz+1, nVoxels, density));
float d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
D(vx+1, vy+1, vz+1, nVoxels, density));
float d0 = Lerp(dy, d00, d10);
float d1 = Lerp(dy, d01, d11);
return Lerp(dz, d0, d1);
}
/* Returns the transmittance between two points p0 and p1, in a volume
with extent (pMin,pMax) with transmittance coefficient sigma_t,
defined by nVoxels[3] voxels in each dimension in the given density
array. */
static float
transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
uniform float3 pMax, uniform float sigma_t,
uniform float density[], uniform int nVoxels[3]) {
float rayT0, rayT1;
Ray ray;
ray.origin = p1;
ray.dir = p0 - p1;
// Find the parametric t range along the ray that is inside the volume.
if (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
return 1.;
rayT0 = max(rayT0, 0.f);
// Accumulate beam transmittance in tau
float tau = 0;
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
ray.dir.z * ray.dir.z);
uniform float stepDist = 0.2;
float stepT = stepDist / rayLength;
float t = rayT0;
float3 pos = ray.origin + ray.dir * rayT0;
float3 dirStep = ray.dir * stepT;
while (t < rayT1) {
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels);
pos = pos + dirStep;
t += stepT;
}
return exp(-tau);
}
static inline float
distanceSquared(float3 a, float3 b) {
float3 d = a-b;
return d.x*d.x + d.y*d.y + d.z*d.z;
}
static float
raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
float rayT0, rayT1;
uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
uniform float3 lightPos = { -1, 4, 1.5 };
cif (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
return 0.;
rayT0 = max(rayT0, 0.f);
// Parameters that define the volume scattering characteristics and
// sampling rate for raymarching
uniform float Le = .25; // Emission coefficient
uniform float sigma_a = 10; // Absorption coefficient
uniform float sigma_s = 10; // Scattering coefficient
uniform float stepDist = 0.025; // Ray step amount
uniform float lightIntensity = 40; // Light source intensity
float tau = 0.f; // accumulated beam transmittance
float L = 0; // radiance along the ray
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
ray.dir.z * ray.dir.z);
float stepT = stepDist / rayLength;
float t = rayT0;
float3 pos = ray.origin + ray.dir * rayT0;
float3 dirStep = ray.dir * stepT;
cwhile (t < rayT1) {
float d = Density(pos, pMin, pMax, density, nVoxels);
// terminate once attenuation is high
float atten = exp(-tau);
if (atten < .005)
break;
// direct lighting
float Li = lightIntensity / distanceSquared(lightPos, pos) *
transmittance(lightPos, pos, pMin, pMax, sigma_a + sigma_s,
density, nVoxels);
L += stepDist * atten * d * sigma_s * (Li + Le);
// update beam transmittance
tau += stepDist * (sigma_a + sigma_s) * d;
pos = pos + dirStep;
t += stepT;
}
// Gamma correction
return pow(L, 1.f / 2.2f);
}
/* Utility routine used by both the task-based and the single-core entrypoints.
Renders a tile of the image, covering [x0,x0) * [y0, y1), storing the
result into the image[] array.
*/
static void
volume_tile(uniform int x0, uniform int y0, uniform int x1,
uniform int y1, uniform float density[], uniform int nVoxels[3],
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform int width, uniform int height, uniform float image[]) {
// Work on 4x4=16 pixel big tiles of the image. This function thus
// implicitly assumes that both (x1-x0) and (y1-y0) are evenly divisble
// by 4.
for (uniform int y = y0; y < y1; y += 4) {
for (uniform int x = x0; x < x1; x += 4) {
foreach (o = 0 ... 16) {
// These two arrays encode the mapping from [0,15] to
// offsets within the 4x4 pixel block so that we render
// each pixel inside the block
const uniform int xoffsets[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
0, 1, 0, 1, 2, 3, 2, 3 };
const uniform int yoffsets[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
2, 2, 3, 3, 2, 2, 3, 3 };
// Figure out the pixel to render for this program instance
int xo = x + xoffsets[o], yo = y + yoffsets[o];
// Use viewing parameters to compute the corresponding ray
// for the pixel
Ray ray;
generateRay(raster2camera, camera2world, xo, yo, ray);
// And raymarch through the volume to compute the pixel's
// value
int offset = yo * width + xo;
image[offset] = raymarch(density, nVoxels, ray);
}
}
}
}
task void
volume_task(uniform float density[], uniform int nVoxels[3],
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform int width, uniform int height, uniform float image[]) {
uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks
uniform int xbuckets = (width + (dx-1)) / dx;
uniform int ybuckets = (height + (dy-1)) / dy;
uniform int x0 = (taskIndex % xbuckets) * dx;
uniform int y0 = (taskIndex / xbuckets) * dy;
uniform int x1 = x0 + dx, y1 = y0 + dy;
x1 = min(x1, width);
y1 = min(y1, height);
volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera,
camera2world, width, height, image);
}
export void
volume_ispc(uniform float density[], uniform int nVoxels[3],
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform int width, uniform int height, uniform float image[]) {
volume_tile(0, 0, width, height, density, nVoxels, raster2camera,
camera2world, width, height, image);
}
export void
volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform int width, uniform int height, uniform float image[]) {
// Launch tasks to work on (dx,dy)-sized tiles of the image
uniform int dx = 8, dy = 8;
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
width, height, image);
}

View File

@@ -1,34 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>volume</RootNamespace>
<ISPC_file>volume</ISPC_file>
<default_targets>sse2,sse4-x2,avx1-i32x8</default_targets>
</PropertyGroup>
<Import Project="..\common.props" />
<ItemGroup>
<ClCompile Include="volume.cpp" />
<ClCompile Include="volume_serial.cpp" />
<ClCompile Include="../tasksys.cpp" />
</ItemGroup>
</Project>