From d3dc5e0df1b990ac1b9c13d88834c0fb2bae029f Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 8 Jan 2014 14:25:21 +0100 Subject: [PATCH] patched examples to work with uniform for nvptx. function calls with non-generic pointers fail. need fix --- examples_ptx/deferred/dynamic_c.cpp | 4 ++++ examples_ptx/deferred/kernels.ispc | 18 ++++++++++++------ examples_ptx/volume_rendering/volume.cu | 2 +- examples_ptx/volume_rendering/volume.ispc | 2 +- stmt.cpp | 14 ++++++++++---- 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/examples_ptx/deferred/dynamic_c.cpp b/examples_ptx/deferred/dynamic_c.cpp index 8ed9a648..68bf60df 100644 --- a/examples_ptx/deferred/dynamic_c.cpp +++ b/examples_ptx/deferred/dynamic_c.cpp @@ -51,8 +51,12 @@ #endif // ISPC_IS_LINUX // Currently tile widths must be a multiple of SIMD width (i.e. 8 for ispc sse4x2)! +#ifndef MIN_TILE_WIDTH #define MIN_TILE_WIDTH 16 +#endif +#ifndef MIN_TILE_HEIGHT #define MIN_TILE_HEIGHT 16 +#endif #define DYNAMIC_TREE_LEVELS 5 diff --git a/examples_ptx/deferred/kernels.ispc b/examples_ptx/deferred/kernels.ispc index a39723c4..aa54d455 100644 --- a/examples_ptx/deferred/kernels.ispc +++ b/examples_ptx/deferred/kernels.ispc @@ -33,6 +33,12 @@ #include "deferred.h" +#ifdef __NVPTX__ +#define uniform_t varying +#else +#define uniform_t uniform +#endif + struct InputDataArrays { float *zBuffer; @@ -166,19 +172,19 @@ IntersectLightsWithTileMinMax( uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; uniform float gBufferScale_y = 0.5f * (float)gBufferHeight; - uniform float frustumPlanes_xy[4] = { + uniform_t float frustumPlanes_xy[4] = { -(cameraProj_11 * gBufferScale_x), (cameraProj_11 * gBufferScale_x), (cameraProj_22 * gBufferScale_y), -(cameraProj_22 * gBufferScale_y) }; - uniform float frustumPlanes_z[4] = { + uniform_t float frustumPlanes_z[4] = { tileEndX - gBufferScale_x, -tileStartX + gBufferScale_x, tileEndY - gBufferScale_y, -tileStartY + gBufferScale_y }; for (uniform int i = 0; i < 4; ++i) { - uniform float norm = rsqrt(frustumPlanes_xy[i] * frustumPlanes_xy[i] + + uniform_t float norm = rsqrt(frustumPlanes_xy[i] * frustumPlanes_xy[i] + frustumPlanes_z[i] * frustumPlanes_z[i]); frustumPlanes_xy[i] *= norm; frustumPlanes_z[i] *= norm; @@ -621,13 +627,13 @@ SplitTileMinMax( uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; uniform float gBufferScale_y = 0.5f * (float)gBufferHeight; - uniform float frustumPlanes_xy[2] = { -(cameraProj_11 * gBufferScale_x), + uniform_t float frustumPlanes_xy[2] = { -(cameraProj_11 * gBufferScale_x), (cameraProj_22 * gBufferScale_y) }; - uniform float frustumPlanes_z[2] = { tileMidX - gBufferScale_x, + uniform_t float frustumPlanes_z[2] = { tileMidX - gBufferScale_x, tileMidY - gBufferScale_y }; // Normalize - uniform float norm[2] = { rsqrt(frustumPlanes_xy[0] * frustumPlanes_xy[0] + + uniform_t float norm[2] = { rsqrt(frustumPlanes_xy[0] * frustumPlanes_xy[0] + frustumPlanes_z[0] * frustumPlanes_z[0]), rsqrt(frustumPlanes_xy[1] * frustumPlanes_xy[1] + frustumPlanes_z[1] * frustumPlanes_z[1]) }; diff --git a/examples_ptx/volume_rendering/volume.cu b/examples_ptx/volume_rendering/volume.cu index 44b87f27..d5ebc370 100644 --- a/examples_ptx/volume_rendering/volume.cu +++ b/examples_ptx/volume_rendering/volume.cu @@ -366,7 +366,7 @@ volume_task(float density[], int _nVoxels[3], int width, int height, float image[]) { if (taskIndex0 >= taskCount0) return; -#if 1 +#if 0 int nVoxels[3]; nVoxels[0] = _nVoxels[0]; nVoxels[1] = _nVoxels[1]; diff --git a/examples_ptx/volume_rendering/volume.ispc b/examples_ptx/volume_rendering/volume.ispc index 8906324c..fd90af9c 100644 --- a/examples_ptx/volume_rendering/volume.ispc +++ b/examples_ptx/volume_rendering/volume.ispc @@ -328,7 +328,7 @@ volume_task(uniform float density[], uniform int _nVoxels[3], { if (taskIndex >= taskCount) return; -#if 1 +#if 0 /* cannot pass shared memory pointers to functions, need to find a way to solve this one :S */ uniform int nVoxels[3]; nVoxels[0] = _nVoxels[0]; nVoxels[1] = _nVoxels[1]; diff --git a/stmt.cpp b/stmt.cpp index d498cb59..fc0148c1 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -263,13 +263,14 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { g->target->getISA() == Target::NVPTX) { /* deal with "const uniform" or "uniform" arrays for nvptx target */ - PerformanceWarning(sym->pos, - "\"uniform\" arrays might be slow with \"nvptx\" target. " - "Unless data sharing between program instances is required, use \"varying\" if possible."); + if (!sym->type->IsConstType()) + PerformanceWarning(sym->pos, + "\"uniform\" arrays might be slow with \"nvptx\" target. " + "Unless data sharing between program instances is required, use \"varying\" if possible."); if (initExpr != NULL && !sym->type->IsConstType()) Error(initExpr->pos, "It is not possible to initialize non-constant \"uniform\" array \"%s\" with \"nvptx\" target. " - "Please use \"varying\" or \"const static uniform\".", + "Please use \"varying\", \"const static uniform\" or define initializer in the global scope.", sym->name.c_str()); llvm::Constant *cinit = NULL; @@ -277,7 +278,11 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { int addressSpace; if (sym->type->IsConstType()) { +#if 0 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */ addressSpace = 4; /* constant */ +#else + addressSpace = 0; /* use global for now */ +#endif if (initExpr != NULL) { if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false) continue; @@ -309,6 +314,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { } else { + /* fails if pointer passed to function argument, need conversion beforehand */ addressSpace = 3; /* local */ const ArrayType *at = CastType(sym->type); const int nel = at->GetElementCount();