fixed helpers and added ao_bench example

This commit is contained in:
Evghenii
2014-01-05 10:30:25 +01:00
parent 89169d5506
commit 478f4687b1
5 changed files with 109 additions and 5 deletions

View File

@@ -2,7 +2,7 @@
EXAMPLE=ao
CPP_SRC=ao.cpp ao_serial.cpp
ISPC_SRC=ao.ispc
ISPC_IA_TARGETS=sse2-i32x4,sse4-i32x4,avx1-i32x8,avx2-i32x8
ISPC_IA_TARGETS=avx1-i32x8
ISPC_ARM_TARGETS=neon
include ../common.mk

View File

@@ -137,8 +137,8 @@ int main(int argc, char **argv)
//
#ifndef _CUDA_
double minTimeISPC = 1e30;
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
for (unsigned int i = 0; i < test_iterations[0]; i++) {
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
assert(NSUBSAMPLES == 2);
reset_and_start_timer();
ispc::ao_ispc(width, height, NSUBSAMPLES, fimg);
@@ -158,8 +158,8 @@ int main(int argc, char **argv)
// minimum time for any of them.
//
double minTimeISPCTasks = 1e30;
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
for (unsigned int i = 0; i < test_iterations[1]; i++) {
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
assert(NSUBSAMPLES == 2);
reset_and_start_timer();
@@ -179,8 +179,8 @@ int main(int argc, char **argv)
// minimum time.
//
double minTimeSerial = 1e30;
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
for (unsigned int i = 0; i < test_iterations[2]; i++) {
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
reset_and_start_timer();
ao_serial(width, height, NSUBSAMPLES, fimg);
double t = get_elapsed_msec();
@@ -199,6 +199,9 @@ int main(int argc, char **argv)
minTimeSerial / minTimeISPCTasks);
#endif
savePPM("ao-serial.ppm", width, height);
ispc_free(img);
ispc_free(fimg);
return 0;
}

View File

@@ -259,7 +259,7 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
ao_scanlines(0, h, w, h, nsubsamples, image);
}
#if 0
static void task ao_task(uniform int width, uniform int height,
uniform int nsubsamples, uniform float image[]) {
ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image);
@@ -270,3 +270,95 @@ export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
uniform float image[]) {
launch[h] ao_task(w, h, nsubsamples, image);
}
#else
static inline void ao_tile(
uniform int x0, uniform int x1,
uniform int y0, uniform int y1,
uniform int w, uniform int h,
uniform int nsubsamples,
uniform float image[])
{
uniform Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
uniform Sphere spheres[3] = {
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
{ { -0.5f, 0.0f, -3.0f }, 0.5f },
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
RNGState rngstate;
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
float invSamples = 1.f / nsubsamples;
foreach_tiled (y = y0 ... y1, x = x0 ... x1)
{
const int offset = 3 * (y * w + x);
float res = 0.0f;
for (uniform int u = 0; u < nsubsamples; u++)
for (uniform int v = 0; v < nsubsamples; v++)
{
float du = (float)u * invSamples, dv = (float)v * invSamples;
// Figure out x,y pixel in NDC
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
float ret = 0.f;
Ray ray;
Isect isect;
ray.org = 0.f;
// Poor man's perspective projection
ray.dir.x = px;
ray.dir.y = py;
ray.dir.z = -1.0;
vnormalize(ray.dir);
isect.t = 1.0e+17;
isect.hit = 0;
for (uniform int snum = 0; snum < 3; ++snum)
ray_sphere_intersect(isect, ray, spheres[snum]);
ray_plane_intersect(isect, ray, plane);
// Note use of 'coherent' if statement; the set of rays we
// trace will often all hit or all miss the scene
if (isect.hit) {
ret = ambient_occlusion(isect, plane, spheres, rngstate);
ret *= invSamples * invSamples;
res += ret;
}
}
image[offset ] = res;
image[offset+1] = res;
image[offset+2] = res;
}
}
#define TILEX 64
#define TILEY 4
void task ao_task(uniform int width, uniform int height,
uniform int nsubsamples, uniform float image[])
{
if (taskIndex0 >= taskCount0) return;
if (taskIndex1 >= taskCount1) return;
const uniform int x0 = taskIndex0 * TILEX;
const uniform int x1 = min(x0 + TILEX, width);
const uniform int y0 = taskIndex1 * TILEY;
const uniform int y1 = min(y0 + TILEY, height);
ao_tile(x0,x1,y0,y1, width, height, nsubsamples, image);
}
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
uniform float image[])
{
const uniform int ntilex = (w+TILEX-1)/TILEX;
const uniform int ntiley = (h+TILEY-1)/TILEY;
launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image);
sync;
}
#endif

View File

@@ -3,6 +3,7 @@
#ifdef _CUDA_
extern void ispc_malloc(void **ptr, const size_t size);
extern void ispc_free(void *ptr);
extern void ispc_memset(void *ptr, int value, size_t size);
#else
#include <cstdlib>
static inline void ispc_malloc(void **ptr, const size_t size)
@@ -13,5 +14,9 @@ static inline void ispc_free(void *ptr)
{
free(ptr);
}
static inline void ispc_memset(void *ptr, int value, size_t size)
{
memset(ptr, value, size);
}
#endif

View File

@@ -12,5 +12,9 @@ void ispc_free(void *ptr)
{
cudaFree(ptr);
}
void ispc_memset(void *ptr, int value, size_t size)
{
cudaMemset(ptr, value, size);
}