Add support for pointers to the language.
Pointers can be either uniform or varying, and behave correspondingly. e.g.: "uniform float * varying" is a varying pointer to uniform float data in memory, and "float * uniform" is a uniform pointer to varying data in memory. Like other types, pointers are varying by default. Pointer-based expressions, & and *, sizeof, ->, pointer arithmetic, and the array/pointer duality all bahave as in C. Array arguments to functions are converted to pointers, also like C. There is a built-in NULL for a null pointer value; conversion from compile-time constant 0 values to NULL still needs to be implemented. Other changes: - Syntax for references has been updated to be C++ style; a useful warning is now issued if the "reference" keyword is used. - It is now illegal to pass a varying lvalue as a reference parameter to a function; references are essentially uniform pointers. This case had previously been handled via special case call by value return code. That path has been removed, now that varying pointers are available to handle this use case (and much more). - Some stdlib routines have been updated to take pointers as arguments where appropriate (e.g. prefetch and the atomics). A number of others still need attention. - All of the examples have been updated - Many new tests TODO: documentation
This commit is contained in:
@@ -75,7 +75,7 @@ static inline vec vcross(vec v0, vec v1) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void vnormalize(reference vec v) {
|
||||
static inline void vnormalize(vec &v) {
|
||||
float len2 = dot(v, v);
|
||||
float invlen = rsqrt(len2);
|
||||
v *= invlen;
|
||||
@@ -83,8 +83,7 @@ static inline void vnormalize(reference vec v) {
|
||||
|
||||
|
||||
static inline void
|
||||
ray_plane_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Plane plane) {
|
||||
ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
|
||||
@@ -104,8 +103,7 @@ ray_plane_intersect(reference Isect isect, reference Ray ray,
|
||||
|
||||
|
||||
static inline void
|
||||
ray_sphere_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Sphere sphere) {
|
||||
ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
|
||||
vec rs = ray.org - sphere.center;
|
||||
|
||||
float B = dot(rs, ray.dir);
|
||||
@@ -127,7 +125,7 @@ ray_sphere_intersect(reference Isect isect, reference Ray ray,
|
||||
|
||||
|
||||
static inline void
|
||||
orthoBasis(reference vec basis[3], vec n) {
|
||||
orthoBasis(vec basis[3], vec n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
|
||||
@@ -150,8 +148,8 @@ orthoBasis(reference vec basis[3], vec n) {
|
||||
|
||||
|
||||
static inline float
|
||||
ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
reference Sphere spheres[3], reference RNGState rngstate) {
|
||||
ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
|
||||
RNGState &rngstate) {
|
||||
float eps = 0.0001f;
|
||||
vec p, n;
|
||||
vec basis[3];
|
||||
@@ -168,8 +166,8 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
Ray ray;
|
||||
Isect occIsect;
|
||||
|
||||
float theta = sqrt(frandom(rngstate));
|
||||
float phi = 2.0f * M_PI * frandom(rngstate);
|
||||
float theta = sqrt(frandom(&rngstate));
|
||||
float phi = 2.0f * M_PI * frandom(&rngstate);
|
||||
float x = cos(phi) * theta;
|
||||
float y = sin(phi) * theta;
|
||||
float z = sqrt(1.0 - theta * theta);
|
||||
@@ -205,7 +203,7 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
*/
|
||||
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
uniform int h, uniform int nsubsamples,
|
||||
reference uniform float image[]) {
|
||||
uniform float image[]) {
|
||||
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
static Sphere spheres[3] = {
|
||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||
@@ -213,7 +211,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(rngstate, y0);
|
||||
seed_rng(&rngstate, y0);
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
|
||||
@@ -75,7 +75,7 @@ static inline vec vcross(vec v0, vec v1) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void vnormalize(reference vec v) {
|
||||
static inline void vnormalize(vec &v) {
|
||||
float len2 = dot(v, v);
|
||||
float invlen = rsqrt(len2);
|
||||
v *= invlen;
|
||||
@@ -83,8 +83,7 @@ static inline void vnormalize(reference vec v) {
|
||||
|
||||
|
||||
static inline void
|
||||
ray_plane_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Plane plane) {
|
||||
ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
|
||||
@@ -104,8 +103,7 @@ ray_plane_intersect(reference Isect isect, reference Ray ray,
|
||||
|
||||
|
||||
static inline void
|
||||
ray_sphere_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Sphere sphere) {
|
||||
ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
|
||||
vec rs = ray.org - sphere.center;
|
||||
|
||||
float B = dot(rs, ray.dir);
|
||||
@@ -127,7 +125,7 @@ ray_sphere_intersect(reference Isect isect, reference Ray ray,
|
||||
|
||||
|
||||
static inline void
|
||||
orthoBasis(reference vec basis[3], vec n) {
|
||||
orthoBasis(vec basis[3], vec n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
|
||||
@@ -150,8 +148,8 @@ orthoBasis(reference vec basis[3], vec n) {
|
||||
|
||||
|
||||
static inline float
|
||||
ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
reference Sphere spheres[3], reference RNGState rngstate) {
|
||||
ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
|
||||
RNGState &rngstate) {
|
||||
float eps = 0.0001f;
|
||||
vec p, n;
|
||||
vec basis[3];
|
||||
@@ -168,8 +166,8 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
Ray ray;
|
||||
Isect occIsect;
|
||||
|
||||
float theta = sqrt(frandom(rngstate));
|
||||
float phi = 2.0f * M_PI * frandom(rngstate);
|
||||
float theta = sqrt(frandom(&rngstate));
|
||||
float phi = 2.0f * M_PI * frandom(&rngstate);
|
||||
float x = cos(phi) * theta;
|
||||
float y = sin(phi) * theta;
|
||||
float z = sqrt(1.0 - theta * theta);
|
||||
@@ -203,8 +201,9 @@ ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||
of width w and height h.
|
||||
*/
|
||||
void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
uniform int nsubsamples, reference uniform float image[]) {
|
||||
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
static Sphere spheres[3] = {
|
||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||
@@ -212,7 +211,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(rngstate, y0);
|
||||
seed_rng(&rngstate, y0);
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
@@ -231,6 +230,9 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
// direction we do per iteration and ny the number in y.
|
||||
uniform int nx = 1, ny = 1;
|
||||
|
||||
// FIXME: We actually need ny to be 1 regardless of the decomposition,
|
||||
// since the task decomposition is one scanline high.
|
||||
|
||||
if (programCount == 8) {
|
||||
// Do two pixels at once in the x direction
|
||||
nx = 2;
|
||||
@@ -239,19 +241,21 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
++du;
|
||||
}
|
||||
else if (programCount == 16) {
|
||||
// Two at once in both x and y
|
||||
nx = ny = 2;
|
||||
if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12)
|
||||
nx = 4;
|
||||
ny = 1;
|
||||
if (programIndex >= 4 && programIndex < 8)
|
||||
++du;
|
||||
if (programIndex >= 8)
|
||||
++dv;
|
||||
if (programIndex >= 8 && programIndex < 12)
|
||||
du += 2;
|
||||
if (programIndex >= 12)
|
||||
du += 3;
|
||||
}
|
||||
|
||||
// Now loop over all of the pixels, stepping in x and y as calculated
|
||||
// above. (Assumes that ny divides y and nx divides x...)
|
||||
for (uniform int y = y0; y < y1; y += ny) {
|
||||
for (uniform int x = 0; x < w; x += nx) {
|
||||
// Figur out x,y pixel in NDC
|
||||
// Figure out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
@@ -293,7 +297,7 @@ void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
|
||||
// offset to the first pixel in the image
|
||||
uniform int offset = 3 * (y * w + x);
|
||||
for (uniform int p = 0; p < programCount; p += 4, ++offset) {
|
||||
for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
|
||||
// Get the four sample values for this pixel
|
||||
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
||||
retArray[p+3];
|
||||
@@ -315,3 +319,15 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||
}
|
||||
|
||||
|
||||
static void task ao_task(uniform int width, uniform int height,
|
||||
uniform int nsubsamples, uniform float image[]) {
|
||||
ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image);
|
||||
}
|
||||
|
||||
|
||||
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
launch[h] < ao_task(w, h, nsubsamples, image) >;
|
||||
}
|
||||
|
||||
@@ -35,22 +35,22 @@
|
||||
|
||||
struct InputDataArrays
|
||||
{
|
||||
uniform float zBuffer[];
|
||||
uniform unsigned int16 normalEncoded_x[]; // half float
|
||||
uniform unsigned int16 normalEncoded_y[]; // half float
|
||||
uniform unsigned int16 specularAmount[]; // half float
|
||||
uniform unsigned int16 specularPower[]; // half float
|
||||
uniform unsigned int8 albedo_x[]; // unorm8
|
||||
uniform unsigned int8 albedo_y[]; // unorm8
|
||||
uniform unsigned int8 albedo_z[]; // unorm8
|
||||
uniform float lightPositionView_x[];
|
||||
uniform float lightPositionView_y[];
|
||||
uniform float lightPositionView_z[];
|
||||
uniform float lightAttenuationBegin[];
|
||||
uniform float lightColor_x[];
|
||||
uniform float lightColor_y[];
|
||||
uniform float lightColor_z[];
|
||||
uniform float lightAttenuationEnd[];
|
||||
uniform float * uniform zBuffer;
|
||||
uniform unsigned int16 * uniform normalEncoded_x; // half float
|
||||
uniform unsigned int16 * uniform normalEncoded_y; // half float
|
||||
uniform unsigned int16 * uniform specularAmount; // half float
|
||||
uniform unsigned int16 * uniform specularPower; // half float
|
||||
uniform unsigned int8 * uniform albedo_x; // unorm8
|
||||
uniform unsigned int8 * uniform albedo_y; // unorm8
|
||||
uniform unsigned int8 * uniform albedo_z; // unorm8
|
||||
uniform float * uniform lightPositionView_x;
|
||||
uniform float * uniform lightPositionView_y;
|
||||
uniform float * uniform lightPositionView_z;
|
||||
uniform float * uniform lightAttenuationBegin;
|
||||
uniform float * uniform lightColor_x;
|
||||
uniform float * uniform lightColor_y;
|
||||
uniform float * uniform lightColor_z;
|
||||
uniform float * uniform lightAttenuationEnd;
|
||||
};
|
||||
|
||||
struct InputHeader
|
||||
@@ -77,8 +77,7 @@ dot3(float x, float y, float z, float a, float b, float c) {
|
||||
|
||||
|
||||
static inline void
|
||||
normalize3(float x, float y, float z, reference float ox,
|
||||
reference float oy, reference float oz) {
|
||||
normalize3(float x, float y, float z, float &ox, float &oy, float &oz) {
|
||||
float n = rsqrt(x*x + y*y + z*z);
|
||||
ox = x * n;
|
||||
oy = y * n;
|
||||
@@ -110,8 +109,8 @@ ComputeZBounds(
|
||||
uniform float cameraProj_33, uniform float cameraProj_43,
|
||||
uniform float cameraNear, uniform float cameraFar,
|
||||
// Output
|
||||
reference uniform float minZ,
|
||||
reference uniform float maxZ
|
||||
uniform float &minZ,
|
||||
uniform float &maxZ
|
||||
)
|
||||
{
|
||||
// Find Z bounds
|
||||
@@ -156,7 +155,7 @@ IntersectLightsWithTileMinMax(
|
||||
uniform float light_positionView_z_array[],
|
||||
uniform float light_attenuationEnd_array[],
|
||||
// Output
|
||||
reference uniform int32 tileLightIndices[]
|
||||
uniform int32 tileLightIndices[]
|
||||
)
|
||||
{
|
||||
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||
@@ -268,7 +267,7 @@ IntersectLightsWithTile(
|
||||
uniform float light_positionView_z_array[],
|
||||
uniform float light_attenuationEnd_array[],
|
||||
// Output
|
||||
reference uniform int32 tileLightIndices[]
|
||||
uniform int32 tileLightIndices[]
|
||||
)
|
||||
{
|
||||
uniform float minZ, maxZ;
|
||||
@@ -293,19 +292,19 @@ ShadeTile(
|
||||
uniform int32 tileStartX, uniform int32 tileEndX,
|
||||
uniform int32 tileStartY, uniform int32 tileEndY,
|
||||
uniform int32 gBufferWidth, uniform int32 gBufferHeight,
|
||||
reference uniform InputDataArrays inputData,
|
||||
uniform InputDataArrays &inputData,
|
||||
// Camera data
|
||||
uniform float cameraProj_11, uniform float cameraProj_22,
|
||||
uniform float cameraProj_33, uniform float cameraProj_43,
|
||||
// Light list
|
||||
reference uniform int32 tileLightIndices[],
|
||||
uniform int32 tileLightIndices[],
|
||||
uniform int32 tileNumLights,
|
||||
// UI
|
||||
uniform bool visualizeLightCount,
|
||||
// Output
|
||||
reference uniform unsigned int8 framebuffer_r[],
|
||||
reference uniform unsigned int8 framebuffer_g[],
|
||||
reference uniform unsigned int8 framebuffer_b[]
|
||||
uniform unsigned int8 framebuffer_r[],
|
||||
uniform unsigned int8 framebuffer_g[],
|
||||
uniform unsigned int8 framebuffer_b[]
|
||||
)
|
||||
{
|
||||
if (tileNumLights == 0 || visualizeLightCount) {
|
||||
@@ -478,13 +477,13 @@ ShadeTile(
|
||||
|
||||
task void
|
||||
RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
||||
reference uniform InputHeader inputHeader,
|
||||
reference uniform InputDataArrays inputData,
|
||||
uniform InputHeader &inputHeader,
|
||||
uniform InputDataArrays &inputData,
|
||||
uniform int visualizeLightCount,
|
||||
// Output
|
||||
reference uniform unsigned int8 framebuffer_r[],
|
||||
reference uniform unsigned int8 framebuffer_g[],
|
||||
reference uniform unsigned int8 framebuffer_b[]) {
|
||||
uniform unsigned int8 framebuffer_r[],
|
||||
uniform unsigned int8 framebuffer_g[],
|
||||
uniform unsigned int8 framebuffer_b[]) {
|
||||
uniform int32 group_y = taskIndex / num_groups_x;
|
||||
uniform int32 group_x = taskIndex % num_groups_x;
|
||||
uniform int32 tile_start_x = group_x * MIN_TILE_WIDTH;
|
||||
@@ -526,13 +525,13 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
||||
|
||||
|
||||
export void
|
||||
RenderStatic(reference uniform InputHeader inputHeader,
|
||||
reference uniform InputDataArrays inputData,
|
||||
RenderStatic(uniform InputHeader &inputHeader,
|
||||
uniform InputDataArrays &inputData,
|
||||
uniform int visualizeLightCount,
|
||||
// Output
|
||||
reference uniform unsigned int8 framebuffer_r[],
|
||||
reference uniform unsigned int8 framebuffer_g[],
|
||||
reference uniform unsigned int8 framebuffer_b[]) {
|
||||
uniform unsigned int8 framebuffer_r[],
|
||||
uniform unsigned int8 framebuffer_g[],
|
||||
uniform unsigned int8 framebuffer_b[]) {
|
||||
uniform int num_groups_x = (inputHeader.framebufferWidth +
|
||||
MIN_TILE_WIDTH - 1) / MIN_TILE_WIDTH;
|
||||
uniform int num_groups_y = (inputHeader.framebufferHeight +
|
||||
@@ -564,8 +563,8 @@ ComputeZBoundsRow(
|
||||
uniform float cameraProj_33, uniform float cameraProj_43,
|
||||
uniform float cameraNear, uniform float cameraFar,
|
||||
// Output
|
||||
reference uniform float minZArray[],
|
||||
reference uniform float maxZArray[]
|
||||
uniform float minZArray[],
|
||||
uniform float maxZArray[]
|
||||
)
|
||||
{
|
||||
for (uniform int32 tileX = 0; tileX < numTilesX; ++tileX) {
|
||||
@@ -596,7 +595,7 @@ SplitTileMinMax(
|
||||
// Camera data
|
||||
uniform float cameraProj_11, uniform float cameraProj_22,
|
||||
// Light Data
|
||||
reference uniform int32 lightIndices[],
|
||||
uniform int32 lightIndices[],
|
||||
uniform int32 numLights,
|
||||
uniform float light_positionView_x_array[],
|
||||
uniform float light_positionView_y_array[],
|
||||
@@ -605,9 +604,9 @@ SplitTileMinMax(
|
||||
// Outputs
|
||||
// TODO: ISPC doesn't currently like multidimensionsal arrays so we'll do the
|
||||
// indexing math ourselves
|
||||
reference uniform int32 subtileIndices[],
|
||||
uniform int32 subtileIndices[],
|
||||
uniform int32 subtileIndicesPitch,
|
||||
reference uniform int32 subtileNumLights[]
|
||||
uniform int32 subtileNumLights[]
|
||||
)
|
||||
{
|
||||
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||
|
||||
@@ -51,7 +51,7 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations,
|
||||
reference uniform int output[])
|
||||
uniform int output[])
|
||||
{
|
||||
float dx = (x1 - x0) / width;
|
||||
float dy = (y1 - y0) / height;
|
||||
|
||||
@@ -57,7 +57,7 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int maxIterations,
|
||||
reference uniform int output[]) {
|
||||
uniform int output[]) {
|
||||
uniform int ystart = ybase + taskIndex * span;
|
||||
uniform int yend = ystart + span;
|
||||
|
||||
@@ -77,7 +77,7 @@ task void
|
||||
mandelbrot_chunk(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, reference uniform int output[]) {
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * (height/taskCount);
|
||||
uniform int yend = (taskIndex+1) * (height/taskCount);
|
||||
uniform int span = 1;
|
||||
@@ -91,7 +91,7 @@ export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, reference uniform int output[]) {
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ static inline float Dot(const float3 a, const float3 b) {
|
||||
|
||||
static void generateRay(uniform const float raster2camera[4][4],
|
||||
uniform const float camera2world[4][4],
|
||||
float x, float y, reference Ray ray) {
|
||||
float x, float y, Ray &ray) {
|
||||
ray.mint = 0.f;
|
||||
ray.maxt = 1e30f;
|
||||
|
||||
@@ -105,7 +105,7 @@ static void generateRay(uniform const float raster2camera[4][4],
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
const reference Ray ray) {
|
||||
const Ray &ray) {
|
||||
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
@@ -143,7 +143,7 @@ static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
|
||||
|
||||
|
||||
static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) {
|
||||
static inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||
@@ -184,7 +184,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray)
|
||||
|
||||
|
||||
bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
reference Ray r) {
|
||||
Ray &r) {
|
||||
Ray ray = r;
|
||||
bool hit = false;
|
||||
// Follow ray through BVH nodes to find primitive intersections
|
||||
|
||||
@@ -8,7 +8,7 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64 --opt=32-bit-addressing
|
||||
|
||||
OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o
|
||||
|
||||
@@ -41,7 +41,7 @@ struct Ray {
|
||||
static void
|
||||
generateRay(const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
float x, float y, reference Ray ray) {
|
||||
float x, float y, Ray &ray) {
|
||||
// transform raster coordinate (x, y, 0) to camera space
|
||||
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
|
||||
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
|
||||
@@ -70,7 +70,7 @@ Inside(float3 p, float3 pMin, float3 pMax) {
|
||||
|
||||
|
||||
static bool
|
||||
IntersectP(Ray ray, float3 pMin, float3 pMax, reference float hit0, reference float hit1) {
|
||||
IntersectP(Ray ray, float3 pMin, float3 pMax, float &hit0, float &hit1) {
|
||||
float t0 = -1e30, t1 = 1e30;
|
||||
|
||||
float3 tNear = (pMin - ray.origin) / ray.dir;
|
||||
@@ -141,7 +141,7 @@ static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
|
||||
|
||||
static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3],
|
||||
reference uniform bool checkForSameVoxel) {
|
||||
uniform bool &checkForSameVoxel) {
|
||||
if (!Inside(Pobj, pMin, pMax))
|
||||
return 0;
|
||||
// Compute voxel coordinates and offsets for _Pobj_
|
||||
@@ -155,8 +155,8 @@ static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
// Trilinearly interpolate density values to compute local density
|
||||
float d00, d10, d01, d11;
|
||||
uniform int uvx, uvy, uvz;
|
||||
if (checkForSameVoxel && reduce_equal(vx, uvx) && reduce_equal(vy, uvy) &&
|
||||
reduce_equal(vz, uvz)) {
|
||||
if (checkForSameVoxel && reduce_equal(vx, &uvx) && reduce_equal(vy, &uvy) &&
|
||||
reduce_equal(vz, &uvz)) {
|
||||
// If all of the program instances are inside the same voxel, then
|
||||
// we'll call the 'uniform' variant of the voxel density lookup
|
||||
// function, thus doing a single load for each value rather than a
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
@@ -158,13 +158,13 @@
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 --opt=32-bit-addressing
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 --opt=32-bit-addressing
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
|
||||
Reference in New Issue
Block a user