This commit is contained in:
Evghenii
2014-01-05 12:23:48 +01:00
parent 62deb82bf8
commit a7c2a7d4f9
8 changed files with 76 additions and 27 deletions

View File

@@ -1,5 +1,6 @@
PROG=ao_gpu
PROG=ao
ISPC_SRC=ao.ispc
CU_SRC=ao.cu
CXX_SRC=ao.cpp ao_serial.cpp
PTXCC_REGMAX=64

View File

@@ -5,10 +5,11 @@ CXX=g++
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_gpu/ -D_CUDA_
#
NVCC=nvcc
NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_ -I../
NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_ -I../ -Xptxas=-v
ifdef PTXCC_REGMAX
NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX)
endif
NVCC_FLAGS+=--use_fast_math
#
LD=nvcc
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35

View File

@@ -6,5 +6,6 @@
#define taskCount0 (gridDim.x*4)
#define taskIndex1 (blockIdx.y)
#define taskCount1 (gridDim.y)
#define taskIndex (taskIndex0 + taskCount0*taskIndex1)
#define warpIdx (threadIdx.x >> 5)
#define launch(ntx,nty,ntz,func) if (programIndex==0) func<<<dim3(((ntx)+4-1)/4,nty,ntz),128>>>

View File

@@ -197,7 +197,12 @@ int main(int argc, char *argv[]) {
// And then read the triangles
uint nTris;
READ(nTris, 1);
#if 0
Triangle *triangles = new Triangle[nTris];
#else
Triangle *triangles;
ispc_malloc((void**)&triangles, nTris*sizeof(Triangle));
#endif
for (uint i = 0; i < nTris; ++i) {
// 9x floats for the 3 vertices
float v[9];
@@ -246,8 +251,8 @@ int main(int argc, char *argv[]) {
writeImage(id, image, width, height, "rt-ispc-1core.ppm");
#endif
memset(id, 0, width*height*sizeof(int));
memset(image, 0, width*height*sizeof(float));
ispc_memset(id, 0, width*height*sizeof(int));
ispc_memset(image, 0, width*height*sizeof(float));
//
// Run 3 iterations with ispc + 1 core, record the minimum time
@@ -266,8 +271,8 @@ int main(int argc, char *argv[]) {
writeImage(id, image, width, height, "rt-ispc-tasks.ppm");
memset(id, 0, width*height*sizeof(int));
memset(image, 0, width*height*sizeof(float));
ispc_memset(id, 0, width*height*sizeof(int));
ispc_memset(image, 0, width*height*sizeof(float));
//
// And 3 iterations with the serial implementation, reporting the

View File

@@ -1,8 +1,4 @@
#define programCount 32
#define programIndex (threadIdx.x & 31)
#define taskIndex (blockIdx.x*4 + (threadIdx.x >> 5))
#define taskCount (gridDim.x*4)
#define warpIdx (threadIdx.x >> 5)
#include "cuda_helpers.cuh"
#define float3 Float3
struct Float3
@@ -339,7 +335,7 @@ void raytrace_tile_task( int width, int height,
}
extern "C" __global__ void raytrace_ispc_tasks( int width, int height,
extern "C" __global__ void raytrace_ispc_tasks___export( int width, int height,
int baseWidth, int baseHeight,
const float raster2camera[4][4],
const float camera2world[4][4],
@@ -350,10 +346,28 @@ extern "C" __global__ void raytrace_ispc_tasks( int width, int height,
int xBuckets = (width + (dx-1)) / dx;
int yBuckets = (height + (dy-1)) / dy;
int nTasks = xBuckets * yBuckets;
if (programIndex == 0)
raytrace_tile_task<<<(nTasks+4-1)/4,128>>>(width, height, baseWidth, baseHeight,
raster2camera, camera2world,
image, id, nodes, triangles);
launch(nTasks,1,1,raytrace_tile_task)
(width, height, baseWidth, baseHeight,
raster2camera, camera2world,
image, id, nodes, triangles);
cudaDeviceSynchronize();
}
extern "C" __host__ void raytrace_ispc_tasks( int width, int height,
int baseWidth, int baseHeight,
const float raster2camera[4][4],
const float camera2world[4][4],
float image[], int id[],
const LinearBVHNode nodes[],
const Triangle triangles[]) {
raytrace_ispc_tasks___export<<<1,32>>>( width, height,
baseWidth, baseHeight,
raster2camera,
camera2world,
image, id,
nodes,
triangles);
cudaDeviceSynchronize();
}

View File

@@ -31,13 +31,25 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define bool int
typedef bool bool_t;
typedef float<3> float3;
#if 1
#define __ORIG
#endif
struct int3
{
int x,y,z;
};
struct Ray {
float3 origin, dir, invDir;
#ifdef __ORIG
uniform unsigned int dirIsNeg[3];
#else
uniform int3 dirIsNeg;
#endif
float mint, maxt;
int hitId;
};
@@ -101,13 +113,19 @@ static void generateRay(uniform const float raster2camera[4][4],
ray.invDir = 1.f / ray.dir;
#ifdef __ORIG
ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0;
#else
ray.dirIsNeg.x = any(ray.invDir.x < 0) ? 1 : 0;
ray.dirIsNeg.y = any(ray.invDir.y < 0) ? 1 : 0;
ray.dirIsNeg.z = any(ray.invDir.z < 0) ? 1 : 0;
#endif
}
static bool BBoxIntersect(const uniform float bounds[2][3],
static bool_t BBoxIntersect(const uniform float bounds[2][3],
const Ray &ray) {
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
@@ -146,7 +164,7 @@ static bool BBoxIntersect(const uniform float bounds[2][3],
static bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
static bool_t TriIntersect(const uniform Triangle &tri, Ray &ray) {
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
@@ -155,7 +173,7 @@ static bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
bool hit = true;
bool_t hit = true;
if (divisor == 0.)
hit = false;
@@ -186,10 +204,11 @@ static bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
}
bool BVHIntersect(const uniform LinearBVHNode nodes[],
bool_t
BVHIntersect(const uniform LinearBVHNode nodes[],
const uniform Triangle tris[], Ray &r) {
Ray ray = r;
bool hit = false;
bool_t hit = false;
// Follow ray through BVH nodes to find primitive intersections
uniform int todoOffset = 0, nodeNum = 0;
uniform int todo[64];
@@ -212,7 +231,15 @@ bool BVHIntersect(const uniform LinearBVHNode nodes[],
}
else {
// Put far BVH node on _todo_ stack, advance to near node
if (r.dirIsNeg[node.splitAxis]) {
#ifdef __ORIG
int dirIsNeg = r.dirIsNeg[node.splitAxis];
#else
int dirIsNeg;
if (node.splitAxis == 0) dirIsNeg = r.dirIsNeg.x;
if (node.splitAxis == 1) dirIsNeg = r.dirIsNeg.y;
if (node.splitAxis == 2) dirIsNeg = r.dirIsNeg.z;
#endif
if (dirIsNeg) {
todo[todoOffset++] = nodeNum + 1;
nodeNum = node.offset;
}