added tunnings for aobench
This commit is contained in:
@@ -3,6 +3,7 @@ ISPC_SRC=ao.ispc
|
|||||||
CU_SRC=ao.cu
|
CU_SRC=ao.cu
|
||||||
CXX_SRC=ao.cpp ao_serial.cpp
|
CXX_SRC=ao.cpp ao_serial.cpp
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
||||||
|
|
||||||
LLVM_GPU=1
|
LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|||||||
@@ -135,6 +135,7 @@ int main(int argc, char **argv)
|
|||||||
// Run the ispc path, test_iterations times, and report the minimum
|
// Run the ispc path, test_iterations times, and report the minimum
|
||||||
// time for any of them.
|
// time for any of them.
|
||||||
//
|
//
|
||||||
|
#define _CUDA_
|
||||||
#ifndef _CUDA_
|
#ifndef _CUDA_
|
||||||
double minTimeISPC = 1e30;
|
double minTimeISPC = 1e30;
|
||||||
for (unsigned int i = 0; i < test_iterations[0]; i++) {
|
for (unsigned int i = 0; i < test_iterations[0]; i++) {
|
||||||
|
|||||||
@@ -189,7 +189,8 @@ ray_plane_intersect(Isect &isect,const Ray &ray, const Plane &plane) {
|
|||||||
float d = -dot(plane.p, plane.n);
|
float d = -dot(plane.p, plane.n);
|
||||||
float v = dot(ray.dir, plane.n);
|
float v = dot(ray.dir, plane.n);
|
||||||
|
|
||||||
if (abs(v) < 1.0e-17)
|
#if 0
|
||||||
|
if (abs(v) < 1.0f-17)
|
||||||
return;
|
return;
|
||||||
else {
|
else {
|
||||||
float t = -(dot(ray.org, plane.n) + d) / v;
|
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||||
@@ -201,6 +202,17 @@ ray_plane_intersect(Isect &isect,const Ray &ray, const Plane &plane) {
|
|||||||
isect.n = plane.n;
|
isect.n = plane.n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
if (abs(v) <= 1.0e-17)
|
||||||
|
return;
|
||||||
|
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||||
|
if ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + ray.dir * t;
|
||||||
|
isect.n = plane.n;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -213,6 +225,7 @@ ray_sphere_intersect(Isect &isect,const Ray &ray, const Sphere &sphere) {
|
|||||||
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||||
float D = B * B - C;
|
float D = B * B - C;
|
||||||
|
|
||||||
|
#if 0
|
||||||
if (D > 0.) {
|
if (D > 0.) {
|
||||||
float t = -B - sqrt(D);
|
float t = -B - sqrt(D);
|
||||||
|
|
||||||
@@ -224,6 +237,21 @@ ray_sphere_intersect(Isect &isect,const Ray &ray, const Sphere &sphere) {
|
|||||||
vnormalize(isect.n);
|
vnormalize(isect.n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
if (D <= 0.0f)
|
||||||
|
return;
|
||||||
|
|
||||||
|
float t = -B - sqrt(D);
|
||||||
|
|
||||||
|
if ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + ray.dir * t;
|
||||||
|
isect.n = isect.p - sphere.center;
|
||||||
|
vnormalize(isect.n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -231,16 +259,16 @@ __device__
|
|||||||
static inline void
|
static inline void
|
||||||
orthoBasis(vec basis[3], vec n) {
|
orthoBasis(vec basis[3], vec n) {
|
||||||
basis[2] = n;
|
basis[2] = n;
|
||||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
basis[1].x = 0.0f; basis[1].y = 0.0f; basis[1].z = 0.0f;
|
||||||
|
|
||||||
if ((n.x < 0.6) && (n.x > -0.6)) {
|
if ((n.x < 0.6f) && (n.x > -0.6f)) {
|
||||||
basis[1].x = 1.0;
|
basis[1].x = 1.0f;
|
||||||
} else if ((n.y < 0.6) && (n.y > -0.6)) {
|
} else if ((n.y < 0.6f) && (n.y > -0.6f)) {
|
||||||
basis[1].y = 1.0;
|
basis[1].y = 1.0f;
|
||||||
} else if ((n.z < 0.6) && (n.z > -0.6)) {
|
} else if ((n.z < 0.6f) && (n.z > -0.6f)) {
|
||||||
basis[1].z = 1.0;
|
basis[1].z = 1.0f;
|
||||||
} else {
|
} else {
|
||||||
basis[1].x = 1.0;
|
basis[1].x = 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
basis[0] = vcross(basis[1], basis[2]);
|
basis[0] = vcross(basis[1], basis[2]);
|
||||||
@@ -258,7 +286,7 @@ ambient_occlusion(Isect &isect, const Plane &plane, const Sphere spheres[3],
|
|||||||
float eps = 0.0001f;
|
float eps = 0.0001f;
|
||||||
vec p; //, n;
|
vec p; //, n;
|
||||||
vec basis[3];
|
vec basis[3];
|
||||||
float occlusion = 0.0;
|
float occlusion = 0.0f;
|
||||||
|
|
||||||
p = isect.p + isect.n * eps;
|
p = isect.p + isect.n * eps;
|
||||||
|
|
||||||
@@ -275,7 +303,7 @@ ambient_occlusion(Isect &isect, const Plane &plane, const Sphere spheres[3],
|
|||||||
float phi = 2.0f * M_PI * frandom(&rngstate);
|
float phi = 2.0f * M_PI * frandom(&rngstate);
|
||||||
float x = cos(phi) * theta;
|
float x = cos(phi) * theta;
|
||||||
float y = sin(phi) * theta;
|
float y = sin(phi) * theta;
|
||||||
float z = sqrt(1.0 - theta * theta);
|
float z = sqrtf(1.0f - theta * theta);
|
||||||
|
|
||||||
// local . global
|
// local . global
|
||||||
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
||||||
@@ -287,14 +315,14 @@ ambient_occlusion(Isect &isect, const Plane &plane, const Sphere spheres[3],
|
|||||||
ray.dir.y = ry;
|
ray.dir.y = ry;
|
||||||
ray.dir.z = rz;
|
ray.dir.z = rz;
|
||||||
|
|
||||||
occIsect.t = 1.0e+17;
|
occIsect.t = 1.0f+17;
|
||||||
occIsect.hit = 0;
|
occIsect.hit = 0;
|
||||||
|
|
||||||
for ( int snum = 0; snum < 3; ++snum)
|
for ( int snum = 0; snum < 3; ++snum)
|
||||||
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
||||||
ray_plane_intersect (occIsect, ray, plane);
|
ray_plane_intersect (occIsect, ray, plane);
|
||||||
|
|
||||||
if (occIsect.hit) occlusion += 1.0;
|
if (occIsect.hit) occlusion += 1.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -360,8 +388,8 @@ static inline void ao_tiles(
|
|||||||
|
|
||||||
// Note use of 'coherent' if statement; the set of rays we
|
// Note use of 'coherent' if statement; the set of rays we
|
||||||
// trace will often all hit or all miss the scene
|
// trace will often all hit or all miss the scene
|
||||||
if (isect.hit) {
|
if (any(isect.hit)) {
|
||||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
ret = isect.hit*ambient_occlusion(isect, plane, spheres, rngstate);
|
||||||
ret *= invSamples * invSamples;
|
ret *= invSamples * invSamples;
|
||||||
res += ret;
|
res += ret;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -89,6 +89,7 @@ ray_plane_intersect(Isect &isect, Ray &ray, const uniform Plane &plane) {
|
|||||||
float d = -dot(plane.p, plane.n);
|
float d = -dot(plane.p, plane.n);
|
||||||
float v = dot(ray.dir, plane.n);
|
float v = dot(ray.dir, plane.n);
|
||||||
|
|
||||||
|
#if 0
|
||||||
cif (abs(v) < 1.0e-17)
|
cif (abs(v) < 1.0e-17)
|
||||||
return;
|
return;
|
||||||
else {
|
else {
|
||||||
@@ -101,6 +102,17 @@ ray_plane_intersect(Isect &isect, Ray &ray, const uniform Plane &plane) {
|
|||||||
isect.n = plane.n;
|
isect.n = plane.n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
cif (abs(v) <= 1.0e-17)
|
||||||
|
return;
|
||||||
|
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||||
|
cif ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + ray.dir * t;
|
||||||
|
isect.n = plane.n;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -112,6 +124,7 @@ ray_sphere_intersect(Isect &isect, Ray &ray, const uniform Sphere &sphere) {
|
|||||||
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||||
float D = B * B - C;
|
float D = B * B - C;
|
||||||
|
|
||||||
|
#if 0
|
||||||
cif (D > 0.) {
|
cif (D > 0.) {
|
||||||
float t = -B - sqrt(D);
|
float t = -B - sqrt(D);
|
||||||
|
|
||||||
@@ -123,6 +136,19 @@ ray_sphere_intersect(Isect &isect, Ray &ray, const uniform Sphere &sphere) {
|
|||||||
vnormalize(isect.n);
|
vnormalize(isect.n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
cif (D <=0.0f)
|
||||||
|
return;
|
||||||
|
|
||||||
|
float t = -B - sqrt(D);
|
||||||
|
cif ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + t * ray.dir;
|
||||||
|
isect.n = isect.p - sphere.center;
|
||||||
|
vnormalize(isect.n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -255,11 +281,22 @@ static inline void ao_tiles(
|
|||||||
|
|
||||||
// Note use of 'coherent' if statement; the set of rays we
|
// Note use of 'coherent' if statement; the set of rays we
|
||||||
// trace will often all hit or all miss the scene
|
// trace will often all hit or all miss the scene
|
||||||
|
#if 0
|
||||||
if (isect.hit) {
|
if (isect.hit) {
|
||||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||||
ret *= invSamples * invSamples;
|
ret *= invSamples * invSamples;
|
||||||
res += ret;
|
res += ret;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
if(any(isect.hit))
|
||||||
|
{
|
||||||
|
ret = isect.hit*ambient_occlusion(isect, plane, spheres, rngstate);
|
||||||
|
ret *= invSamples * invSamples;
|
||||||
|
res += ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
image[offset ] = res;
|
image[offset ] = res;
|
||||||
|
|||||||
Reference in New Issue
Block a user