added any/none/all for bool

This commit is contained in:
Evghenii
2013-11-11 12:59:40 +01:00
parent a91c8e15e2
commit f2c66dc4c3
5 changed files with 43 additions and 43 deletions

View File

@@ -484,25 +484,28 @@ svml_stubs(double,d,WIDTH)
define i64 @__movmsk(<1 x i1>) nounwind readnone alwaysinline { define i64 @__movmsk(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0 %v = extractelement <1 x i1> %0, i32 0
%v64 = zext i1 %v to i64 %v64 = zext i1 %v to i64
ret i64 %v64 ret i64 %v64
} }
define i1 @__any(<1 x i1>) nounwind readnone alwaysinline { define i1 @__any(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0 %v = extractelement <1 x i1> %0, i32 0
%cmp = icmp ne i1 %v, 0 %res = call i32 @__ballot(i1 %v)
%cmp = icmp ne i32 %res, 0
ret i1 %cmp ret i1 %cmp
} }
define i1 @__all(<1 x i1>) nounwind readnone alwaysinline { define i1 @__all(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0 %v = extractelement <1 x i1> %0, i32 0
%cmp = icmp eq i1 %v, 1 %res = call i32 @__ballot(i1 %v)
%cmp = icmp eq i32 %res, 31
ret i1 %cmp ret i1 %cmp
} }
define i1 @__none(<1 x i1>) nounwind readnone alwaysinline { define i1 @__none(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0 %v = extractelement <1 x i1> %0, i32 0
%cmp = icmp eq i1 %v, 0 %res = call i32 @__ballot(i1 %v)
%cmp = icmp eq i32 %res, 0
ret i1 %cmp ret i1 %cmp
} }

View File

@@ -131,6 +131,7 @@ CreateInputDataFromFile(const char *path) {
fprintf(stderr, "Preumature EOF reading file \"%s\"\n", path); fprintf(stderr, "Preumature EOF reading file \"%s\"\n", path);
return NULL; return NULL;
} }
fprintf(stderr, " numLights= %d\n", input->header.numLights);
// Load data chunk and update pointers // Load data chunk and update pointers
input->chunk = (uint8_t *)lAlignedMalloc(input->header.inputDataChunkSize, input->chunk = (uint8_t *)lAlignedMalloc(input->header.inputDataChunkSize,

View File

@@ -200,36 +200,33 @@ IntersectLightsWithTileMinMax(
// don't actually need to mask the rest of this function - this is // don't actually need to mask the rest of this function - this is
// just a greedy early-out. Could also structure all of this as // just a greedy early-out. Could also structure all of this as
// nested if() statements, but this a bit easier to read // nested if() statements, but this a bit easier to read
bool active = false; if (any(inFrustum)) {
if (any(inFrustum)) { float light_positionView_x = light_positionView_x_array[lightIndex];
float light_positionView_x = light_positionView_x_array[lightIndex]; float light_positionView_y = light_positionView_y_array[lightIndex];
float light_positionView_y = light_positionView_y_array[lightIndex];
d = light_positionView_z * frustumPlanes_z[0] + d = light_positionView_z * frustumPlanes_z[0] +
light_positionView_x * frustumPlanes_xy[0]; light_positionView_x * frustumPlanes_xy[0];
inFrustum = inFrustum && (d >= light_attenuationEndNeg); inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[1] + d = light_positionView_z * frustumPlanes_z[1] +
light_positionView_x * frustumPlanes_xy[1]; light_positionView_x * frustumPlanes_xy[1];
inFrustum = inFrustum && (d >= light_attenuationEndNeg); inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[2] + d = light_positionView_z * frustumPlanes_z[2] +
light_positionView_y * frustumPlanes_xy[2]; light_positionView_y * frustumPlanes_xy[2];
inFrustum = inFrustum && (d >= light_attenuationEndNeg); inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[3] + d = light_positionView_z * frustumPlanes_z[3] +
light_positionView_y * frustumPlanes_xy[3]; light_positionView_y * frustumPlanes_xy[3];
inFrustum = inFrustum && (d >= light_attenuationEndNeg); inFrustum = inFrustum && (d >= light_attenuationEndNeg);
// Pack and store intersecting lights // Pack and store intersecting lights
if (inFrustum) const bool active = inFrustum && lightIndex < numLights;
active = true;
}
if (lightIndex >= numLights)
active = false;
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex); if (any(active))
} tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
}
}
return tileNumLights; return tileNumLights;
} }

View File

@@ -37,6 +37,7 @@
#define programIndex laneIndex() #define programIndex laneIndex()
#define taskIndex blockIndex0() #define taskIndex blockIndex0()
#define taskCount blockCount0() #define taskCount blockCount0()
#define cif if
#else #else
#warning "emitting HOST code" #warning "emitting HOST code"
#endif #endif
@@ -212,8 +213,8 @@ IntersectLightsWithTileMinMax(
// don't actually need to mask the rest of this function - this is // don't actually need to mask the rest of this function - this is
// just a greedy early-out. Could also structure all of this as // just a greedy early-out. Could also structure all of this as
// nested if() statements, but this a bit easier to read // nested if() statements, but this a bit easier to read
bool active = false; if (any(inFrustum))
if (any(inFrustum)) { {
float light_positionView_x = light_positionView_x_array[lightIndex]; float light_positionView_x = light_positionView_x_array[lightIndex];
float light_positionView_y = light_positionView_y_array[lightIndex]; float light_positionView_y = light_positionView_y_array[lightIndex];
@@ -234,13 +235,11 @@ IntersectLightsWithTileMinMax(
inFrustum = inFrustum && (d >= light_attenuationEndNeg); inFrustum = inFrustum && (d >= light_attenuationEndNeg);
// Pack and store intersecting lights // Pack and store intersecting lights
if (inFrustum) const bool active = inFrustum && lightIndex < numLights;
active = true;
}
if (lightIndex >= numLights)
active = false;
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex); if(any(active))
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
}
} }
return tileNumLights; return tileNumLights;
@@ -402,7 +401,7 @@ ShadeTile(
// Clip at end of attenuation // Clip at end of attenuation
float light_attenutaionEnd2 = light_attenuationEnd * light_attenuationEnd; float light_attenutaionEnd2 = light_attenuationEnd * light_attenuationEnd;
if (distanceToLight2 < light_attenutaionEnd2) { cif (distanceToLight2 < light_attenutaionEnd2) {
float distanceToLight = sqrt(distanceToLight2); float distanceToLight = sqrt(distanceToLight2);
// HLSL "rcp" is allowed to be fairly inaccurate // HLSL "rcp" is allowed to be fairly inaccurate
@@ -416,7 +415,7 @@ ShadeTile(
surface_normal_z, L_x, L_y, L_z); surface_normal_z, L_x, L_y, L_z);
// Clip back facing // Clip back facing
if (NdotL > 0.0f) { cif (NdotL > 0.0f) {
uniform float light_attenuationBegin = uniform float light_attenuationBegin =
inputData.lightAttenuationBegin[lightIndex]; inputData.lightAttenuationBegin[lightIndex];

View File

@@ -186,7 +186,7 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size)
checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size)); checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size));
} }
#define deviceLaunch(func,nbx,nby,nbz,params) \ #define deviceLaunch(func,nbx,nby,nbz,params) \
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_EQUAL)); \ checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \
checkCudaErrors( \ checkCudaErrors( \
cuLaunchKernel( \ cuLaunchKernel( \
(func), \ (func), \
@@ -251,7 +251,7 @@ extern "C"
assert(module_1 != NULL); assert(module_1 != NULL);
assert(func_name != NULL); assert(func_name != NULL);
assert(func_args != NULL); assert(func_args != NULL);
#if 1 #if 0
const char * module = module_1; const char * module = module_1;
#else #else
const std::vector<char> module_str = readBinary("kernel.cubin"); const std::vector<char> module_str = readBinary("kernel.cubin");
@@ -388,7 +388,7 @@ int main(int argc, char** argv) {
memcpyD2H(framebuffer.g, d_g, buffsize); memcpyD2H(framebuffer.g, d_g, buffsize);
memcpyD2H(framebuffer.b, d_b, buffsize); memcpyD2H(framebuffer.b, d_b, buffsize);
printf("[ispc static + tasks]:\t\t[%.3f] million cycles to render " printf("[ispc cuda]:\t\t[%.3f] million cycles to render "
"%d x %d image\n", ispcCycles, "%d x %d image\n", ispcCycles,
input->header.framebufferWidth, input->header.framebufferHeight); input->header.framebufferWidth, input->header.framebufferHeight);
WriteFrame("deferred-cuda.ppm", input, framebuffer); WriteFrame("deferred-cuda.ppm", input, framebuffer);