changed notation, task[Index,Count]_[x,y,z] -> task[Index,Count][1,2,3]. Change launch <<< nx,ny,nz >>> into launch [nx,ny,nz] or equivalent launch [nz][ny][nx]. Programmer can pick the one the is liked the most

This commit is contained in:
Evghenii
2013-10-24 13:16:23 +02:00
parent e6c8765891
commit 43761173ec
7 changed files with 122 additions and 120 deletions

View File

@@ -59,16 +59,10 @@ mandelbrot_scanline(uniform float x0, uniform float dx,
uniform int width, uniform int height,
uniform int xspan, uniform int yspan,
uniform int maxIterations, uniform int output[]) {
#if 0
print("taskIndex = % : % \n", taskIndex);
print("taskIndex_x= % : % \n", taskIndex_x);
print("taskIndex_y= % : % \n", taskIndex_y);
print(" --- \n");
#endif
const uniform int xstart = taskIndex_x * xspan;
const uniform int xstart = taskIndex1 * xspan;
const uniform int xend = min(xstart + xspan, width);
const uniform int ystart = taskIndex_y * yspan;
const uniform int ystart = taskIndex2 * yspan;
const uniform int yend = min(ystart + yspan, height);
@@ -90,10 +84,15 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height;
const uniform int xspan = 16;
const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */
const uniform int yspan = 16;
launch <<<width/xspan, height/yspan>>>
#if 1
launch [width/xspan, height/yspan]
#else
launch [height/yspan][width/xspan]
#endif
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
maxIterations, output);
}

View File

@@ -171,8 +171,8 @@
// Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount,
int taskIndex_x, int taskIndex_y, int taskIndex_z,
int taskCount_x, int taskCount_y, int taskCount_z);
int taskIndex1, int taskIndex2, int taskIndex3,
int taskCount1, int taskCount2, int taskCount3);
// Small structure used to hold the data for each task
struct TaskInfo {
@@ -183,21 +183,21 @@ struct TaskInfo {
#if defined(ISPC_IS_WINDOWS)
event taskEvent;
#endif
int taskIndex_x() const
int taskIndex1() const
{
return taskIndex % taskCount3d[0];
}
int taskIndex_y() const
int taskIndex2() const
{
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
}
int taskIndex_z() const
int taskIndex3() const
{
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
}
int taskCount_x() const { return taskCount3d[0]; }
int taskCount_y() const { return taskCount3d[1]; }
int taskCount_z() const { return taskCount3d[2]; }
int taskCount1() const { return taskCount3d[0]; }
int taskCount2() const { return taskCount3d[1]; }
int taskCount3() const { return taskCount3d[2]; }
};
// ispc expects these functions to have C linkage / not be mangled
@@ -537,8 +537,8 @@ lRunTask(void *ti) {
// Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount,
taskInfo->taskIndex, taskInfo->taskCount,
taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(),
taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z());
taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(),
taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3());
}
@@ -580,8 +580,8 @@ lRunTask(LPVOID param) {
int threadIndex = 0;
int threadCount = 1;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(),
ti->taskCount1(), ti->taskCount2(), ti->taskCount3());
// Signal the event that this task is done
ti->taskEvent.set();
@@ -683,8 +683,8 @@ lTaskEntry(void *arg) {
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
myTask->taskCount,
myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(),
myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z());
myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(),
myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3());
//
// Decrement the "number of unfinished tasks" counter in the task
@@ -886,8 +886,8 @@ TaskGroup::Sync() {
//
// FIXME: bogus values for thread index/thread count here as well..
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount,
myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(),
myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z());
myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(),
myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3());
//
// Decrement the number of unfinished tasks counter
@@ -918,8 +918,8 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount,
ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(),
ti->taskCount1(), ti->taskCount2(), ti->taskCount3());
}
}
@@ -949,8 +949,8 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(),
ti->taskCount1(), ti->taskCount2(), ti->taskCount3());
}
}
@@ -982,8 +982,8 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(),
ti->taskCount1(), ti->taskCount2(), ti->taskCount3());
});
}
@@ -1011,8 +1011,8 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(),
ti->taskCount1(), ti->taskCount2(), ti->taskCount3());
});
}
}
@@ -1065,8 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) {
///////////////////////////////////////////////////////////////////////////
void
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) {
const int count = countx*county*countz;
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, int count3) {
const int count = count1*count2*count3;
TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) {
InitTaskSystem();
@@ -1083,9 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county,
ti->data = data;
ti->taskIndex = i;
ti->taskCount = count;
ti->taskCount3d[0] = countx;
ti->taskCount3d[1] = county;
ti->taskCount3d[2] = countz;
ti->taskCount3d[0] = count1;
ti->taskCount3d[1] = count2;
ti->taskCount3d[2] = count3;
}
taskGroup->Launch(baseIndex, count);
}