added any/none/all for bool
This commit is contained in:
@@ -484,25 +484,28 @@ svml_stubs(double,d,WIDTH)
|
||||
|
||||
define i64 @__movmsk(<1 x i1>) nounwind readnone alwaysinline {
|
||||
%v = extractelement <1 x i1> %0, i32 0
|
||||
%v64 = zext i1 %v to i64
|
||||
ret i64 %v64
|
||||
%v64 = zext i1 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
define i1 @__any(<1 x i1>) nounwind readnone alwaysinline {
|
||||
%v = extractelement <1 x i1> %0, i32 0
|
||||
%cmp = icmp ne i1 %v, 0
|
||||
%res = call i32 @__ballot(i1 %v)
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define i1 @__all(<1 x i1>) nounwind readnone alwaysinline {
|
||||
%v = extractelement <1 x i1> %0, i32 0
|
||||
%cmp = icmp eq i1 %v, 1
|
||||
%res = call i32 @__ballot(i1 %v)
|
||||
%cmp = icmp eq i32 %res, 31
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define i1 @__none(<1 x i1>) nounwind readnone alwaysinline {
|
||||
%v = extractelement <1 x i1> %0, i32 0
|
||||
%cmp = icmp eq i1 %v, 0
|
||||
%res = call i32 @__ballot(i1 %v)
|
||||
%cmp = icmp eq i32 %res, 0
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
|
||||
@@ -131,6 +131,7 @@ CreateInputDataFromFile(const char *path) {
|
||||
fprintf(stderr, "Preumature EOF reading file \"%s\"\n", path);
|
||||
return NULL;
|
||||
}
|
||||
fprintf(stderr, " numLights= %d\n", input->header.numLights);
|
||||
|
||||
// Load data chunk and update pointers
|
||||
input->chunk = (uint8_t *)lAlignedMalloc(input->header.inputDataChunkSize,
|
||||
|
||||
@@ -200,36 +200,33 @@ IntersectLightsWithTileMinMax(
|
||||
// don't actually need to mask the rest of this function - this is
|
||||
// just a greedy early-out. Could also structure all of this as
|
||||
// nested if() statements, but this a bit easier to read
|
||||
bool active = false;
|
||||
if (any(inFrustum)) {
|
||||
float light_positionView_x = light_positionView_x_array[lightIndex];
|
||||
float light_positionView_y = light_positionView_y_array[lightIndex];
|
||||
if (any(inFrustum)) {
|
||||
float light_positionView_x = light_positionView_x_array[lightIndex];
|
||||
float light_positionView_y = light_positionView_y_array[lightIndex];
|
||||
|
||||
d = light_positionView_z * frustumPlanes_z[0] +
|
||||
light_positionView_x * frustumPlanes_xy[0];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
d = light_positionView_z * frustumPlanes_z[0] +
|
||||
light_positionView_x * frustumPlanes_xy[0];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
|
||||
d = light_positionView_z * frustumPlanes_z[1] +
|
||||
light_positionView_x * frustumPlanes_xy[1];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
d = light_positionView_z * frustumPlanes_z[1] +
|
||||
light_positionView_x * frustumPlanes_xy[1];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
|
||||
d = light_positionView_z * frustumPlanes_z[2] +
|
||||
light_positionView_y * frustumPlanes_xy[2];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
d = light_positionView_z * frustumPlanes_z[2] +
|
||||
light_positionView_y * frustumPlanes_xy[2];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
|
||||
d = light_positionView_z * frustumPlanes_z[3] +
|
||||
light_positionView_y * frustumPlanes_xy[3];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
|
||||
// Pack and store intersecting lights
|
||||
if (inFrustum)
|
||||
active = true;
|
||||
}
|
||||
if (lightIndex >= numLights)
|
||||
active = false;
|
||||
d = light_positionView_z * frustumPlanes_z[3] +
|
||||
light_positionView_y * frustumPlanes_xy[3];
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
|
||||
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
|
||||
}
|
||||
// Pack and store intersecting lights
|
||||
const bool active = inFrustum && lightIndex < numLights;
|
||||
|
||||
if (any(active))
|
||||
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
|
||||
}
|
||||
}
|
||||
|
||||
return tileNumLights;
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#define programIndex laneIndex()
|
||||
#define taskIndex blockIndex0()
|
||||
#define taskCount blockCount0()
|
||||
#define cif if
|
||||
#else
|
||||
#warning "emitting HOST code"
|
||||
#endif
|
||||
@@ -212,8 +213,8 @@ IntersectLightsWithTileMinMax(
|
||||
// don't actually need to mask the rest of this function - this is
|
||||
// just a greedy early-out. Could also structure all of this as
|
||||
// nested if() statements, but this a bit easier to read
|
||||
bool active = false;
|
||||
if (any(inFrustum)) {
|
||||
if (any(inFrustum))
|
||||
{
|
||||
float light_positionView_x = light_positionView_x_array[lightIndex];
|
||||
float light_positionView_y = light_positionView_y_array[lightIndex];
|
||||
|
||||
@@ -234,13 +235,11 @@ IntersectLightsWithTileMinMax(
|
||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||
|
||||
// Pack and store intersecting lights
|
||||
if (inFrustum)
|
||||
active = true;
|
||||
}
|
||||
if (lightIndex >= numLights)
|
||||
active = false;
|
||||
const bool active = inFrustum && lightIndex < numLights;
|
||||
|
||||
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
|
||||
if(any(active))
|
||||
tileNumLights += packed_store_active(active, &tileLightIndices[tileNumLights], lightIndex);
|
||||
}
|
||||
}
|
||||
|
||||
return tileNumLights;
|
||||
@@ -402,7 +401,7 @@ ShadeTile(
|
||||
// Clip at end of attenuation
|
||||
float light_attenutaionEnd2 = light_attenuationEnd * light_attenuationEnd;
|
||||
|
||||
if (distanceToLight2 < light_attenutaionEnd2) {
|
||||
cif (distanceToLight2 < light_attenutaionEnd2) {
|
||||
float distanceToLight = sqrt(distanceToLight2);
|
||||
|
||||
// HLSL "rcp" is allowed to be fairly inaccurate
|
||||
@@ -416,7 +415,7 @@ ShadeTile(
|
||||
surface_normal_z, L_x, L_y, L_z);
|
||||
|
||||
// Clip back facing
|
||||
if (NdotL > 0.0f) {
|
||||
cif (NdotL > 0.0f) {
|
||||
uniform float light_attenuationBegin =
|
||||
inputData.lightAttenuationBegin[lightIndex];
|
||||
|
||||
|
||||
@@ -186,7 +186,7 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size)
|
||||
checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size));
|
||||
}
|
||||
#define deviceLaunch(func,nbx,nby,nbz,params) \
|
||||
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_EQUAL)); \
|
||||
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \
|
||||
checkCudaErrors( \
|
||||
cuLaunchKernel( \
|
||||
(func), \
|
||||
@@ -251,7 +251,7 @@ extern "C"
|
||||
assert(module_1 != NULL);
|
||||
assert(func_name != NULL);
|
||||
assert(func_args != NULL);
|
||||
#if 1
|
||||
#if 0
|
||||
const char * module = module_1;
|
||||
#else
|
||||
const std::vector<char> module_str = readBinary("kernel.cubin");
|
||||
@@ -388,7 +388,7 @@ int main(int argc, char** argv) {
|
||||
memcpyD2H(framebuffer.g, d_g, buffsize);
|
||||
memcpyD2H(framebuffer.b, d_b, buffsize);
|
||||
|
||||
printf("[ispc static + tasks]:\t\t[%.3f] million cycles to render "
|
||||
printf("[ispc cuda]:\t\t[%.3f] million cycles to render "
|
||||
"%d x %d image\n", ispcCycles,
|
||||
input->header.framebufferWidth, input->header.framebufferHeight);
|
||||
WriteFrame("deferred-cuda.ppm", input, framebuffer);
|
||||
|
||||
Reference in New Issue
Block a user