fixed helpers and added ao_bench example
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
EXAMPLE=ao
|
||||
CPP_SRC=ao.cpp ao_serial.cpp
|
||||
ISPC_SRC=ao.ispc
|
||||
ISPC_IA_TARGETS=sse2-i32x4,sse4-i32x4,avx1-i32x8,avx2-i32x8
|
||||
ISPC_IA_TARGETS=avx1-i32x8
|
||||
ISPC_ARM_TARGETS=neon
|
||||
|
||||
include ../common.mk
|
||||
|
||||
@@ -137,8 +137,8 @@ int main(int argc, char **argv)
|
||||
//
|
||||
#ifndef _CUDA_
|
||||
double minTimeISPC = 1e30;
|
||||
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
for (unsigned int i = 0; i < test_iterations[0]; i++) {
|
||||
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
assert(NSUBSAMPLES == 2);
|
||||
reset_and_start_timer();
|
||||
ispc::ao_ispc(width, height, NSUBSAMPLES, fimg);
|
||||
@@ -158,8 +158,8 @@ int main(int argc, char **argv)
|
||||
// minimum time for any of them.
|
||||
//
|
||||
double minTimeISPCTasks = 1e30;
|
||||
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
for (unsigned int i = 0; i < test_iterations[1]; i++) {
|
||||
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
assert(NSUBSAMPLES == 2);
|
||||
|
||||
reset_and_start_timer();
|
||||
@@ -179,8 +179,8 @@ int main(int argc, char **argv)
|
||||
// minimum time.
|
||||
//
|
||||
double minTimeSerial = 1e30;
|
||||
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
for (unsigned int i = 0; i < test_iterations[2]; i++) {
|
||||
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
reset_and_start_timer();
|
||||
ao_serial(width, height, NSUBSAMPLES, fimg);
|
||||
double t = get_elapsed_msec();
|
||||
@@ -199,6 +199,9 @@ int main(int argc, char **argv)
|
||||
minTimeSerial / minTimeISPCTasks);
|
||||
#endif
|
||||
savePPM("ao-serial.ppm", width, height);
|
||||
|
||||
ispc_free(img);
|
||||
ispc_free(fimg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -259,7 +259,7 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
static void task ao_task(uniform int width, uniform int height,
|
||||
uniform int nsubsamples, uniform float image[]) {
|
||||
ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image);
|
||||
@@ -270,3 +270,95 @@ export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
launch[h] ao_task(w, h, nsubsamples, image);
|
||||
}
|
||||
#else
|
||||
|
||||
static inline void ao_tile(
|
||||
uniform int x0, uniform int x1,
|
||||
uniform int y0, uniform int y1,
|
||||
uniform int w, uniform int h,
|
||||
uniform int nsubsamples,
|
||||
uniform float image[])
|
||||
{
|
||||
uniform Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
uniform Sphere spheres[3] = {
|
||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||
{ { -0.5f, 0.0f, -3.0f }, 0.5f },
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
|
||||
float invSamples = 1.f / nsubsamples;
|
||||
foreach_tiled (y = y0 ... y1, x = x0 ... x1)
|
||||
{
|
||||
const int offset = 3 * (y * w + x);
|
||||
float res = 0.0f;
|
||||
|
||||
for (uniform int u = 0; u < nsubsamples; u++)
|
||||
for (uniform int v = 0; v < nsubsamples; v++)
|
||||
{
|
||||
float du = (float)u * invSamples, dv = (float)v * invSamples;
|
||||
|
||||
// Figure out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
|
||||
ray.org = 0.f;
|
||||
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
if (isect.hit) {
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
ret *= invSamples * invSamples;
|
||||
res += ret;
|
||||
}
|
||||
}
|
||||
|
||||
image[offset ] = res;
|
||||
image[offset+1] = res;
|
||||
image[offset+2] = res;
|
||||
}
|
||||
}
|
||||
|
||||
#define TILEX 64
|
||||
#define TILEY 4
|
||||
|
||||
void task ao_task(uniform int width, uniform int height,
|
||||
uniform int nsubsamples, uniform float image[])
|
||||
{
|
||||
if (taskIndex0 >= taskCount0) return;
|
||||
if (taskIndex1 >= taskCount1) return;
|
||||
|
||||
const uniform int x0 = taskIndex0 * TILEX;
|
||||
const uniform int x1 = min(x0 + TILEX, width);
|
||||
|
||||
const uniform int y0 = taskIndex1 * TILEY;
|
||||
const uniform int y1 = min(y0 + TILEY, height);
|
||||
ao_tile(x0,x1,y0,y1, width, height, nsubsamples, image);
|
||||
}
|
||||
|
||||
|
||||
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[])
|
||||
{
|
||||
const uniform int ntilex = (w+TILEX-1)/TILEX;
|
||||
const uniform int ntiley = (h+TILEY-1)/TILEY;
|
||||
launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image);
|
||||
sync;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#ifdef _CUDA_
|
||||
extern void ispc_malloc(void **ptr, const size_t size);
|
||||
extern void ispc_free(void *ptr);
|
||||
extern void ispc_memset(void *ptr, int value, size_t size);
|
||||
#else
|
||||
#include <cstdlib>
|
||||
static inline void ispc_malloc(void **ptr, const size_t size)
|
||||
@@ -13,5 +14,9 @@ static inline void ispc_free(void *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
static inline void ispc_memset(void *ptr, int value, size_t size)
|
||||
{
|
||||
memset(ptr, value, size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -12,5 +12,9 @@ void ispc_free(void *ptr)
|
||||
{
|
||||
cudaFree(ptr);
|
||||
}
|
||||
void ispc_memset(void *ptr, int value, size_t size)
|
||||
{
|
||||
cudaMemset(ptr, value, size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user