+1
This commit is contained in:
2
examples_ptx/aobench/.gitignore
vendored
Normal file
2
examples_ptx/aobench/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
ao
|
||||||
|
*.ppm
|
||||||
8
examples_ptx/aobench/Makefile_cpu
Normal file
8
examples_ptx/aobench/Makefile_cpu
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
EXAMPLE=ao
|
||||||
|
CPP_SRC=ao.cpp ao_serial.cpp
|
||||||
|
ISPC_SRC=ao.ispc
|
||||||
|
ISPC_IA_TARGETS=avx1-i32x8
|
||||||
|
ISPC_ARM_TARGETS=neon
|
||||||
|
|
||||||
|
include ../common.mk
|
||||||
9
examples_ptx/aobench/Makefile_gpu
Normal file
9
examples_ptx/aobench/Makefile_gpu
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
PROG=ao_gpu
|
||||||
|
ISPC_SRC=ao.ispc
|
||||||
|
CXX_SRC=ao.cpp ao_serial.cpp
|
||||||
|
PTXCC_REGMAX=64
|
||||||
|
|
||||||
|
include ../common_gpu.mk
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
204
examples_ptx/aobench/ao.cpp
Normal file
204
examples_ptx/aobench/ao.cpp
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
#define NOMINMAX
|
||||||
|
#pragma warning (disable: 4244)
|
||||||
|
#pragma warning (disable: 4305)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cassert>
|
||||||
|
#ifdef __linux__
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
#include <math.h>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include "ao_ispc.h"
|
||||||
|
|
||||||
|
#include "../timing.h"
|
||||||
|
#include "../ispc_malloc.h"
|
||||||
|
|
||||||
|
#define NSUBSAMPLES 2
|
||||||
|
|
||||||
|
extern void ao_serial(int w, int h, int nsubsamples, float image[]);
|
||||||
|
|
||||||
|
static unsigned int test_iterations[] = {3, 7, 1};
|
||||||
|
static unsigned int width, height;
|
||||||
|
static unsigned char *img;
|
||||||
|
static float *fimg;
|
||||||
|
|
||||||
|
|
||||||
|
static unsigned char
|
||||||
|
clamp(float f)
|
||||||
|
{
|
||||||
|
int i = (int)(f * 255.5);
|
||||||
|
|
||||||
|
if (i < 0) i = 0;
|
||||||
|
if (i > 255) i = 255;
|
||||||
|
|
||||||
|
return (unsigned char)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
savePPM(const char *fname, int w, int h)
|
||||||
|
{
|
||||||
|
for (int y = 0; y < h; y++) {
|
||||||
|
for (int x = 0; x < w; x++) {
|
||||||
|
img[3 * (y * w + x) + 0] = clamp(fimg[3 *(y * w + x) + 0]);
|
||||||
|
img[3 * (y * w + x) + 1] = clamp(fimg[3 *(y * w + x) + 1]);
|
||||||
|
img[3 * (y * w + x) + 2] = clamp(fimg[3 *(y * w + x) + 2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *fp = fopen(fname, "wb");
|
||||||
|
if (!fp) {
|
||||||
|
perror(fname);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(fp, "P6\n");
|
||||||
|
fprintf(fp, "%d %d\n", w, h);
|
||||||
|
fprintf(fp, "255\n");
|
||||||
|
fwrite(img, w * h * 3, 1, fp);
|
||||||
|
fclose(fp);
|
||||||
|
printf("Wrote image file %s\n", fname);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc < 3) {
|
||||||
|
printf ("%s\n", argv[0]);
|
||||||
|
printf ("Usage: ao [width] [height] [ispc iterations] [tasks iterations] [serial iterations]\n");
|
||||||
|
getchar();
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (argc == 6) {
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
test_iterations[i] = atoi(argv[3 + i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
width = atoi (argv[1]);
|
||||||
|
height = atoi (argv[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate space for output images
|
||||||
|
#if 0
|
||||||
|
img = new unsigned char[width * height * 3];
|
||||||
|
fimg = new float[width * height * 3];
|
||||||
|
#else
|
||||||
|
ispc_malloc((void**) &img, sizeof(unsigned char)*width*height*3);
|
||||||
|
ispc_malloc((void**)&fimg, sizeof( float)*width*height*3);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Run the ispc path, test_iterations times, and report the minimum
|
||||||
|
// time for any of them.
|
||||||
|
//
|
||||||
|
#ifndef _CUDA_
|
||||||
|
double minTimeISPC = 1e30;
|
||||||
|
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||||
|
for (unsigned int i = 0; i < test_iterations[0]; i++) {
|
||||||
|
assert(NSUBSAMPLES == 2);
|
||||||
|
reset_and_start_timer();
|
||||||
|
ispc::ao_ispc(width, height, NSUBSAMPLES, fimg);
|
||||||
|
double t = get_elapsed_msec();
|
||||||
|
printf("@time of ISPC run:\t\t\t[%.3f] msec\n", t);
|
||||||
|
minTimeISPC = std::min(minTimeISPC, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report results and save image
|
||||||
|
printf("[aobench ispc]:\t\t\t[%.3f] msec (%d x %d image)\n",
|
||||||
|
minTimeISPC, width, height);
|
||||||
|
savePPM("ao-ispc.ppm", width, height);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Run the ispc + tasks path, test_iterations times, and report the
|
||||||
|
// minimum time for any of them.
|
||||||
|
//
|
||||||
|
double minTimeISPCTasks = 1e30;
|
||||||
|
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||||
|
for (unsigned int i = 0; i < test_iterations[1]; i++) {
|
||||||
|
assert(NSUBSAMPLES == 2);
|
||||||
|
|
||||||
|
reset_and_start_timer();
|
||||||
|
ispc::ao_ispc_tasks(width, height, NSUBSAMPLES, fimg);
|
||||||
|
double t = get_elapsed_msec();
|
||||||
|
printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec\n", t);
|
||||||
|
minTimeISPCTasks = std::min(minTimeISPCTasks, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report results and save image
|
||||||
|
printf("[aobench ispc + tasks]:\t\t[%.3f] msec (%d x %d image)\n",
|
||||||
|
minTimeISPCTasks, width, height);
|
||||||
|
savePPM("ao-ispc-tasks.ppm", width, height);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Run the serial path, again test_iteration times, and report the
|
||||||
|
// minimum time.
|
||||||
|
//
|
||||||
|
double minTimeSerial = 1e30;
|
||||||
|
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||||
|
for (unsigned int i = 0; i < test_iterations[2]; i++) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
ao_serial(width, height, NSUBSAMPLES, fimg);
|
||||||
|
double t = get_elapsed_msec();
|
||||||
|
printf("@time of serial run:\t\t\t\t[%.3f] msec\n", t);
|
||||||
|
minTimeSerial = std::min(minTimeSerial, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report more results, save another image...
|
||||||
|
printf("[aobench serial]:\t\t[%.3f] msec (%d x %d image)\n", minTimeSerial,
|
||||||
|
width, height);
|
||||||
|
#ifndef _CUDA_
|
||||||
|
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n",
|
||||||
|
minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCTasks);
|
||||||
|
#else
|
||||||
|
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n",
|
||||||
|
minTimeSerial / minTimeISPCTasks);
|
||||||
|
#endif
|
||||||
|
savePPM("ao-serial.ppm", width, height);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
272
examples_ptx/aobench/ao.ispc
Normal file
272
examples_ptx/aobench/ao.ispc
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
// -*- mode: c++ -*-
|
||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Based on Syoyo Fujita's aobench: http://code.google.com/p/aobench
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define NAO_SAMPLES 8
|
||||||
|
#define M_PI 3.1415926535f
|
||||||
|
|
||||||
|
typedef float<3> vec;
|
||||||
|
|
||||||
|
struct Isect {
|
||||||
|
float t;
|
||||||
|
vec p;
|
||||||
|
vec n;
|
||||||
|
int hit;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Sphere {
|
||||||
|
vec center;
|
||||||
|
float radius;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Plane {
|
||||||
|
vec p;
|
||||||
|
vec n;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Ray {
|
||||||
|
vec org;
|
||||||
|
vec dir;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline float dot(vec a, vec b) {
|
||||||
|
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline vec vcross(vec v0, vec v1) {
|
||||||
|
vec ret;
|
||||||
|
ret.x = v0.y * v1.z - v0.z * v1.y;
|
||||||
|
ret.y = v0.z * v1.x - v0.x * v1.z;
|
||||||
|
ret.z = v0.x * v1.y - v0.y * v1.x;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vnormalize(vec &v) {
|
||||||
|
float len2 = dot(v, v);
|
||||||
|
float invlen = rsqrt(len2);
|
||||||
|
v *= invlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
ray_plane_intersect(Isect &isect, Ray &ray, const uniform Plane &plane) {
|
||||||
|
float d = -dot(plane.p, plane.n);
|
||||||
|
float v = dot(ray.dir, plane.n);
|
||||||
|
|
||||||
|
cif (abs(v) < 1.0e-17)
|
||||||
|
return;
|
||||||
|
else {
|
||||||
|
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||||
|
|
||||||
|
cif ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + ray.dir * t;
|
||||||
|
isect.n = plane.n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
ray_sphere_intersect(Isect &isect, Ray &ray, const uniform Sphere &sphere) {
|
||||||
|
vec rs = ray.org - sphere.center;
|
||||||
|
|
||||||
|
float B = dot(rs, ray.dir);
|
||||||
|
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||||
|
float D = B * B - C;
|
||||||
|
|
||||||
|
cif (D > 0.) {
|
||||||
|
float t = -B - sqrt(D);
|
||||||
|
|
||||||
|
cif ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + t * ray.dir;
|
||||||
|
isect.n = isect.p - sphere.center;
|
||||||
|
vnormalize(isect.n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
orthoBasis(vec basis[3], vec n) {
|
||||||
|
basis[2] = n;
|
||||||
|
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||||
|
|
||||||
|
if ((n.x < 0.6) && (n.x > -0.6)) {
|
||||||
|
basis[1].x = 1.0;
|
||||||
|
} else if ((n.y < 0.6) && (n.y > -0.6)) {
|
||||||
|
basis[1].y = 1.0;
|
||||||
|
} else if ((n.z < 0.6) && (n.z > -0.6)) {
|
||||||
|
basis[1].z = 1.0;
|
||||||
|
} else {
|
||||||
|
basis[1].x = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
basis[0] = vcross(basis[1], basis[2]);
|
||||||
|
vnormalize(basis[0]);
|
||||||
|
|
||||||
|
basis[1] = vcross(basis[2], basis[0]);
|
||||||
|
vnormalize(basis[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static float
|
||||||
|
ambient_occlusion(Isect &isect, const uniform Plane &plane, const uniform Sphere spheres[3],
|
||||||
|
RNGState &rngstate) {
|
||||||
|
float eps = 0.0001f;
|
||||||
|
vec p, n;
|
||||||
|
vec basis[3];
|
||||||
|
float occlusion = 0.0;
|
||||||
|
|
||||||
|
p = isect.p + eps * isect.n;
|
||||||
|
|
||||||
|
orthoBasis(basis, isect.n);
|
||||||
|
|
||||||
|
static const uniform int ntheta = NAO_SAMPLES;
|
||||||
|
static const uniform int nphi = NAO_SAMPLES;
|
||||||
|
for (uniform int j = 0; j < ntheta; j++) {
|
||||||
|
for (uniform int i = 0; i < nphi; i++) {
|
||||||
|
Ray ray;
|
||||||
|
Isect occIsect;
|
||||||
|
|
||||||
|
float theta = sqrt(frandom(&rngstate));
|
||||||
|
float phi = 2.0f * M_PI * frandom(&rngstate);
|
||||||
|
float x = cos(phi) * theta;
|
||||||
|
float y = sin(phi) * theta;
|
||||||
|
float z = sqrt(1.0 - theta * theta);
|
||||||
|
|
||||||
|
// local . global
|
||||||
|
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
||||||
|
float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
|
||||||
|
float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
|
||||||
|
|
||||||
|
ray.org = p;
|
||||||
|
ray.dir.x = rx;
|
||||||
|
ray.dir.y = ry;
|
||||||
|
ray.dir.z = rz;
|
||||||
|
|
||||||
|
occIsect.t = 1.0e+17;
|
||||||
|
occIsect.hit = 0;
|
||||||
|
|
||||||
|
for (uniform int snum = 0; snum < 3; ++snum)
|
||||||
|
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
||||||
|
ray_plane_intersect (occIsect, ray, plane);
|
||||||
|
|
||||||
|
if (occIsect.hit) occlusion += 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
occlusion = (ntheta * nphi - occlusion) / (float)(ntheta * nphi);
|
||||||
|
return occlusion;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||||
|
of width w and height h.
|
||||||
|
*/
|
||||||
|
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||||
|
uniform int h, uniform int nsubsamples,
|
||||||
|
uniform float image[]) {
|
||||||
|
const static uniform Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||||
|
const static uniform Sphere spheres[3] = {
|
||||||
|
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||||
|
{ { -0.5f, 0.0f, -3.0f }, 0.5f },
|
||||||
|
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||||
|
RNGState rngstate;
|
||||||
|
|
||||||
|
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
|
||||||
|
float invSamples = 1.f / nsubsamples;
|
||||||
|
|
||||||
|
foreach_tiled(y = y0 ... y1, x = 0 ... w,
|
||||||
|
u = 0 ... nsubsamples, v = 0 ... nsubsamples) {
|
||||||
|
float du = (float)u * invSamples, dv = (float)v * invSamples;
|
||||||
|
|
||||||
|
// Figure out x,y pixel in NDC
|
||||||
|
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||||
|
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||||
|
float ret = 0.f;
|
||||||
|
Ray ray;
|
||||||
|
Isect isect;
|
||||||
|
|
||||||
|
ray.org = 0.f;
|
||||||
|
|
||||||
|
// Poor man's perspective projection
|
||||||
|
ray.dir.x = px;
|
||||||
|
ray.dir.y = py;
|
||||||
|
ray.dir.z = -1.0;
|
||||||
|
vnormalize(ray.dir);
|
||||||
|
|
||||||
|
isect.t = 1.0e+17;
|
||||||
|
isect.hit = 0;
|
||||||
|
|
||||||
|
for (uniform int snum = 0; snum < 3; ++snum)
|
||||||
|
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||||
|
ray_plane_intersect(isect, ray, plane);
|
||||||
|
|
||||||
|
// Note use of 'coherent' if statement; the set of rays we
|
||||||
|
// trace will often all hit or all miss the scene
|
||||||
|
cif (isect.hit) {
|
||||||
|
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||||
|
ret *= invSamples * invSamples;
|
||||||
|
|
||||||
|
int offset = 3 * (y * w + x);
|
||||||
|
atomic_add_local(&image[offset], ret);
|
||||||
|
atomic_add_local(&image[offset+1], ret);
|
||||||
|
atomic_add_local(&image[offset+2], ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
|
||||||
|
uniform float image[]) {
|
||||||
|
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void task ao_task(uniform int width, uniform int height,
|
||||||
|
uniform int nsubsamples, uniform float image[]) {
|
||||||
|
ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||||
|
uniform float image[]) {
|
||||||
|
launch[h] ao_task(w, h, nsubsamples, image);
|
||||||
|
}
|
||||||
314
examples_ptx/aobench/ao_serial.cpp
Normal file
314
examples_ptx/aobench/ao_serial.cpp
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
// -*- mode: c++ -*-
|
||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Based on Syoyo Fujita's aobench: http://code.google.com/p/aobench
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
#define NOMINMAX
|
||||||
|
#pragma warning (disable: 4244)
|
||||||
|
#pragma warning (disable: 4305)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
static long long drand48_x = 0x1234ABCD330E;
|
||||||
|
|
||||||
|
static inline void srand48(int x) {
|
||||||
|
drand48_x = x ^ (x << 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double drand48() {
|
||||||
|
drand48_x = drand48_x * 0x5DEECE66D + 0xB;
|
||||||
|
return (drand48_x & 0xFFFFFFFFFFFF) * (1.0 / 281474976710656.0);
|
||||||
|
}
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__declspec(align(16))
|
||||||
|
#endif
|
||||||
|
struct vec {
|
||||||
|
vec() { x=y=z=pad=0.; }
|
||||||
|
vec(float xx, float yy, float zz) { x = xx; y = yy; z = zz; }
|
||||||
|
|
||||||
|
vec operator*(float f) const { return vec(x*f, y*f, z*f); }
|
||||||
|
vec operator+(const vec &f2) const {
|
||||||
|
return vec(x+f2.x, y+f2.y, z+f2.z);
|
||||||
|
}
|
||||||
|
vec operator-(const vec &f2) const {
|
||||||
|
return vec(x-f2.x, y-f2.y, z-f2.z);
|
||||||
|
}
|
||||||
|
vec operator*(const vec &f2) const {
|
||||||
|
return vec(x*f2.x, y*f2.y, z*f2.z);
|
||||||
|
}
|
||||||
|
float x, y, z;
|
||||||
|
float pad;
|
||||||
|
}
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
__attribute__ ((aligned(16)))
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
inline vec operator*(float f, const vec &v) { return vec(f*v.x, f*v.y, f*v.z); }
|
||||||
|
|
||||||
|
|
||||||
|
#define NAO_SAMPLES 8
|
||||||
|
|
||||||
|
#ifdef M_PI
|
||||||
|
#undef M_PI
|
||||||
|
#endif
|
||||||
|
#define M_PI 3.1415926535f
|
||||||
|
|
||||||
|
struct Isect {
|
||||||
|
float t;
|
||||||
|
vec p;
|
||||||
|
vec n;
|
||||||
|
int hit;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Sphere {
|
||||||
|
vec center;
|
||||||
|
float radius;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Plane {
|
||||||
|
vec p;
|
||||||
|
vec n;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Ray {
|
||||||
|
vec org;
|
||||||
|
vec dir;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline float dot(const vec &a, const vec &b) {
|
||||||
|
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline vec vcross(const vec &v0, const vec &v1) {
|
||||||
|
vec ret;
|
||||||
|
ret.x = v0.y * v1.z - v0.z * v1.y;
|
||||||
|
ret.y = v0.z * v1.x - v0.x * v1.z;
|
||||||
|
ret.z = v0.x * v1.y - v0.y * v1.x;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vnormalize(vec &v) {
|
||||||
|
float len2 = dot(v, v);
|
||||||
|
float invlen = 1.f / sqrtf(len2);
|
||||||
|
v = v * invlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
ray_plane_intersect(Isect &isect, Ray &ray,
|
||||||
|
Plane &plane) {
|
||||||
|
float d = -dot(plane.p, plane.n);
|
||||||
|
float v = dot(ray.dir, plane.n);
|
||||||
|
|
||||||
|
if (fabsf(v) < 1.0e-17f)
|
||||||
|
return;
|
||||||
|
else {
|
||||||
|
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||||
|
|
||||||
|
if ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + ray.dir * t;
|
||||||
|
isect.n = plane.n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
ray_sphere_intersect(Isect &isect, Ray &ray,
|
||||||
|
Sphere &sphere) {
|
||||||
|
vec rs = ray.org - sphere.center;
|
||||||
|
|
||||||
|
float B = dot(rs, ray.dir);
|
||||||
|
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||||
|
float D = B * B - C;
|
||||||
|
|
||||||
|
if (D > 0.) {
|
||||||
|
float t = -B - sqrtf(D);
|
||||||
|
|
||||||
|
if ((t > 0.0) && (t < isect.t)) {
|
||||||
|
isect.t = t;
|
||||||
|
isect.hit = 1;
|
||||||
|
isect.p = ray.org + t * ray.dir;
|
||||||
|
isect.n = isect.p - sphere.center;
|
||||||
|
vnormalize(isect.n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
orthoBasis(vec basis[3], const vec &n) {
|
||||||
|
basis[2] = n;
|
||||||
|
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||||
|
|
||||||
|
if ((n.x < 0.6f) && (n.x > -0.6f)) {
|
||||||
|
basis[1].x = 1.0;
|
||||||
|
} else if ((n.y < 0.6f) && (n.y > -0.6f)) {
|
||||||
|
basis[1].y = 1.0;
|
||||||
|
} else if ((n.z < 0.6f) && (n.z > -0.6f)) {
|
||||||
|
basis[1].z = 1.0;
|
||||||
|
} else {
|
||||||
|
basis[1].x = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
basis[0] = vcross(basis[1], basis[2]);
|
||||||
|
vnormalize(basis[0]);
|
||||||
|
|
||||||
|
basis[1] = vcross(basis[2], basis[0]);
|
||||||
|
vnormalize(basis[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static float
|
||||||
|
ambient_occlusion(Isect &isect, Plane &plane,
|
||||||
|
Sphere spheres[3]) {
|
||||||
|
float eps = 0.0001f;
|
||||||
|
vec p, n;
|
||||||
|
vec basis[3];
|
||||||
|
float occlusion = 0.0;
|
||||||
|
|
||||||
|
p = isect.p + eps * isect.n;
|
||||||
|
|
||||||
|
orthoBasis(basis, isect.n);
|
||||||
|
|
||||||
|
static const int ntheta = NAO_SAMPLES;
|
||||||
|
static const int nphi = NAO_SAMPLES;
|
||||||
|
for (int j = 0; j < ntheta; j++) {
|
||||||
|
for (int i = 0; i < nphi; i++) {
|
||||||
|
Ray ray;
|
||||||
|
Isect occIsect;
|
||||||
|
|
||||||
|
float theta = sqrtf(drand48());
|
||||||
|
float phi = 2.0f * M_PI * drand48();
|
||||||
|
float x = cosf(phi) * theta;
|
||||||
|
float y = sinf(phi) * theta;
|
||||||
|
float z = sqrtf(1.0f - theta * theta);
|
||||||
|
|
||||||
|
// local . global
|
||||||
|
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
||||||
|
float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
|
||||||
|
float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
|
||||||
|
|
||||||
|
ray.org = p;
|
||||||
|
ray.dir.x = rx;
|
||||||
|
ray.dir.y = ry;
|
||||||
|
ray.dir.z = rz;
|
||||||
|
|
||||||
|
occIsect.t = 1.0e+17f;
|
||||||
|
occIsect.hit = 0;
|
||||||
|
|
||||||
|
for (int snum = 0; snum < 3; ++snum)
|
||||||
|
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
||||||
|
ray_plane_intersect (occIsect, ray, plane);
|
||||||
|
|
||||||
|
if (occIsect.hit) occlusion += 1.f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
occlusion = (ntheta * nphi - occlusion) / (float)(ntheta * nphi);
|
||||||
|
return occlusion;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||||
|
of width w and height h.
|
||||||
|
*/
|
||||||
|
static void ao_scanlines(int y0, int y1, int w, int h, int nsubsamples,
|
||||||
|
float image[]) {
|
||||||
|
static Plane plane = { vec(0.0f, -0.5f, 0.0f), vec(0.f, 1.f, 0.f) };
|
||||||
|
static Sphere spheres[3] = {
|
||||||
|
{ vec(-2.0f, 0.0f, -3.5f), 0.5f },
|
||||||
|
{ vec(-0.5f, 0.0f, -3.0f), 0.5f },
|
||||||
|
{ vec(1.0f, 0.0f, -2.2f), 0.5f } };
|
||||||
|
|
||||||
|
srand48(y0);
|
||||||
|
|
||||||
|
for (int y = y0; y < y1; ++y) {
|
||||||
|
for (int x = 0; x < w; ++x) {
|
||||||
|
int offset = 3 * (y * w + x);
|
||||||
|
for (int u = 0; u < nsubsamples; ++u) {
|
||||||
|
for (int v = 0; v < nsubsamples; ++v) {
|
||||||
|
float px = (x + (u / (float)nsubsamples) - (w / 2.0f)) / (w / 2.0f);
|
||||||
|
float py = -(y + (v / (float)nsubsamples) - (h / 2.0f)) / (h / 2.0f);
|
||||||
|
float ret = 0.f;
|
||||||
|
Ray ray;
|
||||||
|
Isect isect;
|
||||||
|
|
||||||
|
ray.org = vec(0.f, 0.f, 0.f);
|
||||||
|
|
||||||
|
ray.dir.x = px;
|
||||||
|
ray.dir.y = py;
|
||||||
|
ray.dir.z = -1.0f;
|
||||||
|
vnormalize(ray.dir);
|
||||||
|
|
||||||
|
isect.t = 1.0e+17f;
|
||||||
|
isect.hit = 0;
|
||||||
|
|
||||||
|
for (int snum = 0; snum < 3; ++snum)
|
||||||
|
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||||
|
ray_plane_intersect(isect, ray, plane);
|
||||||
|
|
||||||
|
if (isect.hit)
|
||||||
|
ret = ambient_occlusion(isect, plane, spheres);
|
||||||
|
|
||||||
|
// Update image for AO for this ray
|
||||||
|
image[offset+0] += ret;
|
||||||
|
image[offset+1] += ret;
|
||||||
|
image[offset+2] += ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Normalize image pixels by number of samples taken per pixel
|
||||||
|
image[offset+0] /= nsubsamples * nsubsamples;
|
||||||
|
image[offset+1] /= nsubsamples * nsubsamples;
|
||||||
|
image[offset+2] /= nsubsamples * nsubsamples;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ao_serial(int w, int h, int nsubsamples,
|
||||||
|
float image[]) {
|
||||||
|
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||||
|
}
|
||||||
34
examples_ptx/aobench/aobench.vcxproj
Normal file
34
examples_ptx/aobench/aobench.vcxproj
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
|
||||||
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
<RootNamespace>aobench</RootNamespace>
|
||||||
|
<ISPC_file>ao</ISPC_file>
|
||||||
|
<default_targets>sse2,sse4,avx1-i32x8</default_targets>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="..\common.props" />
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="ao.cpp" />
|
||||||
|
<ClCompile Include="ao_serial.cpp" />
|
||||||
|
<ClCompile Include="../tasksys.cpp" />
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
@@ -11,7 +11,11 @@ LD=nvcc
|
|||||||
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
|
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
|
||||||
#
|
#
|
||||||
PTXCC=ptxcc
|
PTXCC=ptxcc
|
||||||
PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v
|
PTXCC_FLAGS = -Xptxas=-v
|
||||||
|
ifdef PTXCC_REGMAX
|
||||||
|
PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX)
|
||||||
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||||
|
|||||||
Reference in New Issue
Block a user