+some uniform related improvements
This commit is contained in:
@@ -4,7 +4,7 @@ CU_SRC=ao.cu
|
||||
CXX_SRC=ao.cpp ao_serial.cpp
|
||||
PTXCC_REGMAX=64
|
||||
|
||||
LLVM_GPU=1
|
||||
# LLVM_GPU=1
|
||||
NVVM_GPU=1
|
||||
|
||||
include ../common_gpu.mk
|
||||
|
||||
@@ -82,8 +82,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
const uniform float dx = (x1 - x0) / width;
|
||||
const uniform float dy = (y1 - y0) / height;
|
||||
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
|
||||
const uniform int yspan = 16;
|
||||
|
||||
|
||||
@@ -38,6 +38,13 @@ typedef bool bool_t;
|
||||
#endif
|
||||
typedef float<3> float3;
|
||||
|
||||
#ifdef __NVPTX__
|
||||
#define uniform_t varying
|
||||
#else
|
||||
#define uniform_t uniform
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
struct int3
|
||||
{
|
||||
@@ -124,8 +131,8 @@ inline
|
||||
#endif
|
||||
static bool_t BBoxIntersect(const uniform float bounds[2][3],
|
||||
const Ray &ray) {
|
||||
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||
const uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||
const uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
|
||||
// Check all three axis-aligned slabs. Don't try to early out; it's
|
||||
@@ -164,12 +171,12 @@ static bool_t BBoxIntersect(const uniform float bounds[2][3],
|
||||
#if 1
|
||||
inline
|
||||
#endif
|
||||
static bool_t TriIntersect(const uniform Triangle &tri, Ray &ray) {
|
||||
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||
uniform float3 e1 = p1 - p0;
|
||||
uniform float3 e2 = p2 - p0;
|
||||
static bool_t TriIntersect(const uniform_t Triangle tri, Ray &ray) {
|
||||
const uniform_t float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||
const uniform_t float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||
const uniform_t float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||
const uniform_t float3 e1 = p1 - p0;
|
||||
const uniform_t float3 e2 = p2 - p0;
|
||||
|
||||
float3 s1 = Cross(ray.dir, e2);
|
||||
float divisor = Dot(s1, e1);
|
||||
@@ -218,13 +225,13 @@ BVHIntersect(const uniform LinearBVHNode nodes[],
|
||||
|
||||
while (true) {
|
||||
// Check ray against BVH node
|
||||
uniform LinearBVHNode node = nodes[nodeNum];
|
||||
const uniform LinearBVHNode node = nodes[nodeNum];
|
||||
if (any(BBoxIntersect(node.bounds, ray))) {
|
||||
uniform unsigned int nPrimitives = node.nPrimitives;
|
||||
const uniform unsigned int nPrimitives = node.nPrimitives;
|
||||
if (nPrimitives > 0) {
|
||||
// Intersect ray with primitives in leaf BVH node
|
||||
uniform unsigned int primitivesOffset = node.offset;
|
||||
for (uniform unsigned int i = 0; i < nPrimitives; ++i) {
|
||||
const uniform unsigned int primitivesOffset = node.offset;
|
||||
for (uniform_t unsigned int i = 0; i < nPrimitives; ++i) {
|
||||
if (TriIntersect(tris[primitivesOffset+i], ray))
|
||||
hit = true;
|
||||
}
|
||||
@@ -277,8 +284,8 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
||||
uniform float image[], uniform int id[],
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||
const uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||
const uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||
|
||||
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
|
||||
Ray ray;
|
||||
@@ -313,12 +320,12 @@ task void raytrace_tile_task(uniform int width, uniform int height,
|
||||
uniform float image[], uniform int id[],
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
uniform int dx = 64, dy = 8; // must match dx, dy below
|
||||
uniform int xBuckets = (width + (dx-1)) / dx;
|
||||
uniform int x0 = (taskIndex % xBuckets) * dx;
|
||||
uniform int x1 = min(x0 + dx, width);
|
||||
uniform int y0 = (taskIndex / xBuckets) * dy;
|
||||
uniform int y1 = min(y0 + dy, height);
|
||||
const uniform int dx = 64, dy = 8; // must match dx, dy below
|
||||
const uniform int xBuckets = (width + (dx-1)) / dx;
|
||||
const uniform int x0 = (taskIndex % xBuckets) * dx;
|
||||
const uniform int x1 = min(x0 + dx, width);
|
||||
const uniform int y0 = (taskIndex / xBuckets) * dy;
|
||||
const uniform int y1 = min(y0 + dy, height);
|
||||
|
||||
raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight,
|
||||
raster2camera, camera2world, image,
|
||||
@@ -333,10 +340,10 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height,
|
||||
uniform float image[], uniform int id[],
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
uniform int dx = 64, dy = 8;
|
||||
uniform int xBuckets = (width + (dx-1)) / dx;
|
||||
uniform int yBuckets = (height + (dy-1)) / dy;
|
||||
uniform int nTasks = xBuckets * yBuckets;
|
||||
const uniform int dx = 64, dy = 8;
|
||||
const uniform int xBuckets = (width + (dx-1)) / dx;
|
||||
const uniform int yBuckets = (height + (dy-1)) / dy;
|
||||
const uniform int nTasks = xBuckets * yBuckets;
|
||||
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
|
||||
raster2camera, camera2world,
|
||||
image, id, nodes, triangles);
|
||||
|
||||
@@ -181,7 +181,7 @@ transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
|
||||
float tau = 0;
|
||||
float rayLength = sqrt(ray.dir.x * ray.dir.x + ray.dir.y * ray.dir.y +
|
||||
ray.dir.z * ray.dir.z);
|
||||
uniform float stepDist = 0.2;
|
||||
const uniform float stepDist = 0.2;
|
||||
float stepT = stepDist / rayLength;
|
||||
|
||||
float t = rayT0;
|
||||
@@ -207,8 +207,8 @@ distanceSquared(float3 a, float3 b) {
|
||||
static inline float
|
||||
raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||
float rayT0, rayT1;
|
||||
uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
|
||||
uniform float3 lightPos = { -1, 4, 1.5 };
|
||||
const uniform float3 pMin = {.3, -.2, .3}, pMax = {1.8, 2.3, 1.8};
|
||||
const uniform float3 lightPos = { -1, 4, 1.5 };
|
||||
|
||||
if (!IntersectP(ray, pMin, pMax, rayT0, rayT1))
|
||||
return 0.;
|
||||
@@ -217,11 +217,11 @@ raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||
|
||||
// Parameters that define the volume scattering characteristics and
|
||||
// sampling rate for raymarching
|
||||
uniform float Le = .25; // Emission coefficient
|
||||
uniform float sigma_a = 10; // Absorption coefficient
|
||||
uniform float sigma_s = 10; // Scattering coefficient
|
||||
uniform float stepDist = 0.025; // Ray step amount
|
||||
uniform float lightIntensity = 40; // Light source intensity
|
||||
const uniform float Le = .25; // Emission coefficient
|
||||
const uniform float sigma_a = 10; // Absorption coefficient
|
||||
const uniform float sigma_s = 10; // Scattering coefficient
|
||||
const uniform float stepDist = 0.025; // Ray step amount
|
||||
const uniform float lightIntensity = 40; // Light source intensity
|
||||
|
||||
float tau = 0.f; // accumulated beam transmittance
|
||||
float L = 0; // radiance along the ray
|
||||
@@ -375,15 +375,14 @@ volume_task(uniform float density[], uniform int _nVoxels[3],
|
||||
#define camera2world _camera2world
|
||||
#endif
|
||||
|
||||
uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks
|
||||
uniform int xbuckets = (width + (dx-1)) / dx;
|
||||
uniform int ybuckets = (height + (dy-1)) / dy;
|
||||
const uniform int dx = 8, dy = 8; // must match value in volume_ispc_tasks
|
||||
const uniform int xbuckets = (width + (dx-1)) / dx;
|
||||
const uniform int ybuckets = (height + (dy-1)) / dy;
|
||||
|
||||
uniform int x0 = (taskIndex % xbuckets) * dx;
|
||||
uniform int y0 = (taskIndex / xbuckets) * dy;
|
||||
uniform int x1 = x0 + dx, y1 = y0 + dy;
|
||||
x1 = min(x1, width);
|
||||
y1 = min(y1, height);
|
||||
const uniform int x0 = (taskIndex % xbuckets) * dx;
|
||||
const uniform int y0 = (taskIndex / xbuckets) * dy;
|
||||
const uniform int x1 = min(x0 + dx, width);
|
||||
const uniform int y1 = min(y0 + dy, height);
|
||||
|
||||
volume_tile(x0, y0, x1, y1, density, nVoxels, raster2camera,
|
||||
camera2world, width, height, image);
|
||||
@@ -406,8 +405,8 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform int width, uniform int height, uniform float image[]) {
|
||||
// Launch tasks to work on (dx,dy)-sized tiles of the image
|
||||
uniform int dx = 8, dy = 8;
|
||||
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||
const uniform int dx = 8, dy = 8;
|
||||
const uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
sync;
|
||||
|
||||
Reference in New Issue
Block a user