added any/none/all for bool

This commit is contained in:
Evghenii
2013-11-11 12:59:40 +01:00
parent a91c8e15e2
commit f2c66dc4c3
5 changed files with 43 additions and 43 deletions

View File

@@ -484,25 +484,28 @@ svml_stubs(double,d,WIDTH)
define i64 @__movmsk(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0
%v64 = zext i1 %v to i64
ret i64 %v64
%v64 = zext i1 %v to i64
ret i64 %v64
}
define i1 @__any(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0
%cmp = icmp ne i1 %v, 0
%res = call i32 @__ballot(i1 %v)
%cmp = icmp ne i32 %res, 0
ret i1 %cmp
}
define i1 @__all(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0
%cmp = icmp eq i1 %v, 1
%res = call i32 @__ballot(i1 %v)
%cmp = icmp eq i32 %res, 31
ret i1 %cmp
}
define i1 @__none(<1 x i1>) nounwind readnone alwaysinline {
%v = extractelement <1 x i1> %0, i32 0
%cmp = icmp eq i1 %v, 0
%res = call i32 @__ballot(i1 %v)
%cmp = icmp eq i32 %res, 0
ret i1 %cmp
}

View File

@@ -131,6 +131,7 @@ CreateInputDataFromFile(const char *path) {
fprintf(stderr, "Preumature EOF reading file \"%s\"\n", path);
return NULL;
}
fprintf(stderr, " numLights= %d\n", input->header.numLights);
// Load data chunk and update pointers
input->chunk = (uint8_t *)lAlignedMalloc(input->header.inputDataChunkSize,

View File

@@ -200,36 +200,33 @@ IntersectLightsWithTileMinMax(
// don't actually need to mask the rest of this function - this is
// just a greedy early-out. Could also structure all of this as
// nested if() statements, but this a bit easier to read
bool active = false;
if (any(inFrustum)) {
float light_positionView_x = light_positionView_x_array[lightIndex];
float light_positionView_y = light_positionView_y_array[lightIndex];
if (any(inFrustum)) {
float light_positionView_x = light_positionView_x_array[lightIndex];
float light_positionView_y = light_positionView_y_array[lightIndex];
d = light_positionView_z * frustumPlanes_z[0] +
light_positionView_x * frustumPlanes_xy[0];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[0] +
light_positionView_x * frustumPlanes_xy[0];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[1] +
light_positionView_x * frustumPlanes_xy[1];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[1] +
light_positionView_x * frustumPlanes_xy[1];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[2] +
light_positionView_y * frustumPlanes_xy[2];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[2] +
light_positionView_y * frustumPlanes_xy[2];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
d = light_positionView_z * frustumPlanes_z[3] +
light_positionView_y * frustumPlanes_xy[3];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
// Pack and store intersecting lights
if (inFrustum)
active = true;
}
if (lightIndex >= numLights)
active = false;
d = light_positionView_z * frustumPlanes_z[3] +
light_positionView_y * frustumPlanes_xy[3];
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
}
// Pack and store intersecting lights
const bool active = inFrustum && lightIndex < numLights;
if (any(active))
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
}
}
return tileNumLights;
}

View File

@@ -37,6 +37,7 @@
#define programIndex laneIndex()
#define taskIndex blockIndex0()
#define taskCount blockCount0()
#define cif if
#else
#warning "emitting HOST code"
#endif
@@ -212,8 +213,8 @@ IntersectLightsWithTileMinMax(
// don't actually need to mask the rest of this function - this is
// just a greedy early-out. Could also structure all of this as
// nested if() statements, but this a bit easier to read
bool active = false;
if (any(inFrustum)) {
if (any(inFrustum))
{
float light_positionView_x = light_positionView_x_array[lightIndex];
float light_positionView_y = light_positionView_y_array[lightIndex];
@@ -234,13 +235,11 @@ IntersectLightsWithTileMinMax(
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
// Pack and store intersecting lights
if (inFrustum)
active = true;
}
if (lightIndex >= numLights)
active = false;
const bool active = inFrustum && lightIndex < numLights;
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
if(any(active))
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
}
}
return tileNumLights;
@@ -402,7 +401,7 @@ ShadeTile(
// Clip at end of attenuation
float light_attenutaionEnd2 = light_attenuationEnd * light_attenuationEnd;
if (distanceToLight2 < light_attenutaionEnd2) {
cif (distanceToLight2 < light_attenutaionEnd2) {
float distanceToLight = sqrt(distanceToLight2);
// HLSL "rcp" is allowed to be fairly inaccurate
@@ -416,7 +415,7 @@ ShadeTile(
surface_normal_z, L_x, L_y, L_z);
// Clip back facing
if (NdotL > 0.0f) {
cif (NdotL > 0.0f) {
uniform float light_attenuationBegin =
inputData.lightAttenuationBegin[lightIndex];

View File

@@ -186,7 +186,7 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size)
checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size));
}
#define deviceLaunch(func,nbx,nby,nbz,params) \
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_EQUAL)); \
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \
checkCudaErrors( \
cuLaunchKernel( \
(func), \
@@ -251,7 +251,7 @@ extern "C"
assert(module_1 != NULL);
assert(func_name != NULL);
assert(func_args != NULL);
#if 1
#if 0
const char * module = module_1;
#else
const std::vector<char> module_str = readBinary("kernel.cubin");
@@ -388,7 +388,7 @@ int main(int argc, char** argv) {
memcpyD2H(framebuffer.g, d_g, buffsize);
memcpyD2H(framebuffer.b, d_b, buffsize);
printf("[ispc static + tasks]:\t\t[%.3f] million cycles to render "
printf("[ispc cuda]:\t\t[%.3f] million cycles to render "
"%d x %d image\n", ispcCycles,
input->header.framebufferWidth, input->header.framebufferHeight);
WriteFrame("deferred-cuda.ppm", input, framebuffer);