Merge pull request #642 from egaburov/launch3d

concept of 3d tasking
This commit is contained in:
Dmitry Babokin
2013-12-17 08:40:07 -08:00
17 changed files with 386 additions and 69 deletions

View File

@@ -38,7 +38,8 @@
#pragma warning (disable: 4305)
#endif
#include <stdio.h>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <string.h>
#include "../timing.h"

View File

@@ -57,21 +57,26 @@ task void
mandelbrot_scanline(uniform float x0, uniform float dx,
uniform float y0, uniform float dy,
uniform int width, uniform int height,
uniform int span,
uniform int xspan, uniform int yspan,
uniform int maxIterations, uniform int output[]) {
uniform int ystart = taskIndex * span;
uniform int yend = min((taskIndex+1) * span, (unsigned int)height);
const uniform int xstart = taskIndex0 * xspan;
const uniform int xend = min(xstart + xspan, width);
foreach (yi = ystart ... yend, xi = 0 ... width) {
const uniform int ystart = taskIndex1 * yspan;
const uniform int yend = min(ystart + yspan, height);
foreach (yi = ystart ... yend, xi = xstart ... xend) {
float x = x0 + xi * dx;
float y = y0 + yi * dy;
int index = yi * width + xi;
output[index] = mandel(x, y, maxIterations);
}
}
#if 1
export void
mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1,
@@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height;
uniform int span = 4;
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
const uniform int yspan = 16;
launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
maxIterations, output);
#if 1
launch [width/xspan, height/yspan]
#else
launch [height/yspan][width/xspan]
#endif
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
maxIterations, output);
}
#endif

View File

@@ -170,21 +170,41 @@
// Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount);
int taskIndex, int taskCount,
int taskIndex0, int taskIndex1, int taskIndex2,
int taskCount0, int taskCount1, int taskCount2);
// Small structure used to hold the data for each task
struct TaskInfo {
TaskFuncType func;
void *data;
int taskIndex, taskCount;
int taskIndex;
int taskCount3d[3];
#if defined(ISPC_IS_WINDOWS)
event taskEvent;
#endif
};
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
int taskIndex0() const
{
return taskIndex % taskCount3d[0];
}
int taskIndex1() const
{
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
}
int taskIndex2() const
{
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
}
int taskCount0() const { return taskCount3d[0]; }
int taskCount1() const { return taskCount3d[1]; }
int taskCount2() const { return taskCount3d[2]; }
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
} __attribute__((aligned(32)));
// ispc expects these functions to have C linkage / not be mangled
extern "C" {
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz);
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
void ISPCSync(void *handle);
}
@@ -518,7 +538,9 @@ lRunTask(void *ti) {
// Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount,
taskInfo->taskIndex, taskInfo->taskCount);
taskInfo->taskIndex, taskInfo->taskCount(),
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
}
@@ -559,7 +581,9 @@ lRunTask(LPVOID param) {
// will cause bugs in code that uses those.
int threadIndex = 0;
int threadCount = 1;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
// Signal the event that this task is done
ti->taskEvent.set();
@@ -660,7 +684,9 @@ lTaskEntry(void *arg) {
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
myTask->taskCount);
myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
//
// Decrement the "number of unfinished tasks" counter in the task
@@ -871,7 +897,9 @@ TaskGroup::Sync() {
// Do work for _myTask_
//
// FIXME: bogus values for thread index/thread count here as well..
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
//
// Decrement the number of unfinished tasks counter
@@ -901,7 +929,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}
}
@@ -930,7 +960,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}
}
@@ -961,7 +993,9 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
});
}
@@ -988,7 +1022,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
});
}
}
@@ -1041,7 +1077,8 @@ FreeTaskGroup(TaskGroup *tg) {
///////////////////////////////////////////////////////////////////////////
void
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
const int count = count0*count1*count2;
TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) {
InitTaskSystem();
@@ -1057,7 +1094,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ti->func = (TaskFuncType)func;
ti->data = data;
ti->taskIndex = i;
ti->taskCount = count;
ti->taskCount3d[0] = count0;
ti->taskCount3d[1] = count1;
ti->taskCount3d[2] = count2;
}
taskGroup->Launch(baseIndex, count);
}