Merge pull request #642 from egaburov/launch3d

concept of 3d tasking
This commit is contained in:
Dmitry Babokin
2013-12-17 08:40:07 -08:00
17 changed files with 386 additions and 69 deletions

View File

@@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) { else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data); fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data); fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc, for (int k = 0; k < 3; k++)
fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc,
postFunc, data); postFunc, data);
} }
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) { else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {

View File

@@ -1831,7 +1831,7 @@ define(`stdlib_core', `
declare i32 @__fast_masked_vload() declare i32 @__fast_masked_vload()
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind declare void @ISPCLaunch(i8**, i8*, i8*, i32, i32, i32) nounwind
declare void @ISPCSync(i8*) nounwind declare void @ISPCSync(i8*) nounwind
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind

View File

@@ -3522,7 +3522,7 @@ FunctionEmitContext::ReturnInst() {
llvm::Value * llvm::Value *
FunctionEmitContext::LaunchInst(llvm::Value *callee, FunctionEmitContext::LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals, std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount) { llvm::Value *launchCount[3]){
if (callee == NULL) { if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0); AssertPos(currentPos, m->errorCount > 0);
return NULL; return NULL;
@@ -3583,7 +3583,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
args.push_back(launchGroupHandlePtr); args.push_back(launchGroupHandlePtr);
args.push_back(fptr); args.push_back(fptr);
args.push_back(voidmem); args.push_back(voidmem);
args.push_back(launchCount); args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, ""); return CallInst(flaunch, NULL, args, "");
} }

2
ctx.h
View File

@@ -542,7 +542,7 @@ public:
he given argument values. */ he given argument values. */
llvm::Value *LaunchInst(llvm::Value *callee, llvm::Value *LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals, std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount); llvm::Value *launchCount[3]);
void SyncInst(); void SyncInst();

View File

@@ -3015,8 +3015,7 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and
for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as
appropriate. Alternatively, ``ispc`` also has support for launching tasks appropriate. Alternatively, ``ispc`` also has support for launching tasks
from ``ispc`` code. The approach is similar to Intel® Cilk's task launch from ``ispc`` code. The approach is similar to Intel® Cilk's task launch
feature. (See the ``examples/mandelbrot_tasks`` example to see it used in feature. (Check the ``examples/mandelbrot_tasks`` example to see how it is used.)
a small example.)
Any function that is launched as a task must be declared with the Any function that is launched as a task must be declared with the
``task`` qualifier: ``task`` qualifier:
@@ -3111,6 +3110,38 @@ executing the current task. The ``threadIndex`` can be used for accessing
data that is private to the current thread and thus doesn't require data that is private to the current thread and thus doesn't require
synchronization to access under parallel execution. synchronization to access under parallel execution.
The tasking system also supports multi-dimensional partitioning (currently up
to three dimensions). To launch a 3D grid of tasks, for example with ``N0``,
``N1`` and ``N2`` tasks in x-, y- and z-dimension respectively
::
float data[N2][N1][N0]
task void foo_task()
{
data[taskIndex2][taskIndex1][threadIndex0] = taskIndex;
}
we use the following ``launch`` expressions:
::
launch [N2][N1][N0] foo_task()
or
::
launch [N0,N1,N2] foo_task()
Value of ``taskIndex`` is equal to ``taskIndex0 + taskCount0*(taskIndex1 +
taskCount1*taskIndex2)`` and it ranges from ``0`` to ``taskCount-1``, where
``taskCount = taskCount0*taskCount1*taskCount2``. If ``N1`` or/and ``N2`` are
not specified in the ``launch`` expression, a value of ``1`` is assumed.
Finally, for an one-dimensional grid of tasks, ``taskIndex`` is equivalent to
``taskIndex0`` and ``taskCount`` is equivalent to ``taskCount0``.
Task Parallelism: Runtime Requirements Task Parallelism: Runtime Requirements
-------------------------------------- --------------------------------------
@@ -3141,7 +3172,7 @@ manage tasks in ``ispc``:
:: ::
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
void ISPCLaunch(void **handlePtr, void *f, void *data, int count); void ISPCLaunch(void **handlePtr, void *f, void *data, int count0, int count1, int count2);
void ISPCSync(void *handle); void ISPCSync(void *handle);
All three of these functions take an opaque handle (or a pointer to an All three of these functions take an opaque handle (or a pointer to an
@@ -3178,16 +3209,20 @@ tasks. Each ``launch`` statement in ``ispc`` code causes a call to
after the handle pointer to the function are relatively straightforward; after the handle pointer to the function are relatively straightforward;
the ``void *f`` parameter holds a pointer to a function to call to run the the ``void *f`` parameter holds a pointer to a function to call to run the
work for this task, ``data`` holds a pointer to data to pass to this work for this task, ``data`` holds a pointer to data to pass to this
function, and ``count`` is the number of instances of this function to function, and ``count0``, ``count1`` and ``count2`` are the number of instances
enqueue for asynchronous execution. (In other words, ``count`` corresponds of this function to enqueue for asynchronous execution. (In other words,
to the value ``n`` in a multiple-task launch statement like ``launch[n]``.) ``count0``, ``count1`` and ``count2`` correspond to the value ``n0``, ``n1``
and ``n2`` in a multiple-task launch statement like ``launch[n2][n1][n0]`` or
``launch [n0,n1,n2]`` respectively.)
The signature of the provided function pointer ``f`` is The signature of the provided function pointer ``f`` is
:: ::
void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount, void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount) int taskIndex, int taskCount,
int taskIndex0, int taskIndex1, int taskIndex2,
int taskCount0, int taskCount1, int taskCount2);
When this function pointer is called by one of the hardware threads managed When this function pointer is called by one of the hardware threads managed
by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
@@ -3197,11 +3232,14 @@ number of hardware threads that have been spawned to run tasks and
uniquely identifying the hardware thread that is running the task. (These uniquely identifying the hardware thread that is running the task. (These
values can be used to index into thread-local storage.) values can be used to index into thread-local storage.)
The value of ``taskCount`` should be the number of tasks launched in the The value of ``taskCount`` should be the total number of tasks launched in the
``launch`` statement that caused the call to ``ISPCLaunch()`` and each of ``launch`` statement (it must be equal to ``taskCount0*taskCount1*taskCount2``)
the calls to this function should be given a unique value of ``taskIndex`` that caused the call to ``ISPCLaunch()`` and each of the calls to this function
between zero and ``taskCount``, to distinguish which of the instances should be given a unique value of ``taskIndex``, ``taskIndex0``, ``taskIndex1``
of the set of launched tasks is running. and ``taskIndex2`` between zero and ``taskCount``, ``taskCount0``,
``taskCount1`` and ``taskCount2`` respectively, with ``taskIndex = taskIndex0
+ taskCount0*(taskIndex1 + taskCount1*taskIndex2)``, to distinguish which of
the instances of the set of launched tasks is running.

View File

@@ -38,7 +38,8 @@
#pragma warning (disable: 4305) #pragma warning (disable: 4305)
#endif #endif
#include <stdio.h> #include <cstdio>
#include <cstdlib>
#include <algorithm> #include <algorithm>
#include <string.h> #include <string.h>
#include "../timing.h" #include "../timing.h"

View File

@@ -57,21 +57,26 @@ task void
mandelbrot_scanline(uniform float x0, uniform float dx, mandelbrot_scanline(uniform float x0, uniform float dx,
uniform float y0, uniform float dy, uniform float y0, uniform float dy,
uniform int width, uniform int height, uniform int width, uniform int height,
uniform int span, uniform int xspan, uniform int yspan,
uniform int maxIterations, uniform int output[]) { uniform int maxIterations, uniform int output[]) {
uniform int ystart = taskIndex * span; const uniform int xstart = taskIndex0 * xspan;
uniform int yend = min((taskIndex+1) * span, (unsigned int)height); const uniform int xend = min(xstart + xspan, width);
foreach (yi = ystart ... yend, xi = 0 ... width) { const uniform int ystart = taskIndex1 * yspan;
const uniform int yend = min(ystart + yspan, height);
foreach (yi = ystart ... yend, xi = xstart ... xend) {
float x = x0 + xi * dx; float x = x0 + xi * dx;
float y = y0 + yi * dy; float y = y0 + yi * dy;
int index = yi * width + xi; int index = yi * width + xi;
output[index] = mandel(x, y, maxIterations); output[index] = mandel(x, y, maxIterations);
} }
} }
#if 1
export void export void
mandelbrot_ispc(uniform float x0, uniform float y0, mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1, uniform float x1, uniform float y1,
@@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
uniform int maxIterations, uniform int output[]) { uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width; uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height; uniform float dy = (y1 - y0) / height;
uniform int span = 4; const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
const uniform int yspan = 16;
launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
#if 1
launch [width/xspan, height/yspan]
#else
launch [height/yspan][width/xspan]
#endif
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
maxIterations, output); maxIterations, output);
} }
#endif

View File

@@ -170,21 +170,41 @@
// Signature of ispc-generated 'task' functions // Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount); int taskIndex, int taskCount,
int taskIndex0, int taskIndex1, int taskIndex2,
int taskCount0, int taskCount1, int taskCount2);
// Small structure used to hold the data for each task // Small structure used to hold the data for each task
struct TaskInfo { struct TaskInfo {
TaskFuncType func; TaskFuncType func;
void *data; void *data;
int taskIndex, taskCount; int taskIndex;
int taskCount3d[3];
#if defined(ISPC_IS_WINDOWS) #if defined(ISPC_IS_WINDOWS)
event taskEvent; event taskEvent;
#endif #endif
}; int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
int taskIndex0() const
{
return taskIndex % taskCount3d[0];
}
int taskIndex1() const
{
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
}
int taskIndex2() const
{
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
}
int taskCount0() const { return taskCount3d[0]; }
int taskCount1() const { return taskCount3d[1]; }
int taskCount2() const { return taskCount3d[2]; }
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
} __attribute__((aligned(32)));
// ispc expects these functions to have C linkage / not be mangled // ispc expects these functions to have C linkage / not be mangled
extern "C" { extern "C" {
void ISPCLaunch(void **handlePtr, void *f, void *data, int count); void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz);
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
void ISPCSync(void *handle); void ISPCSync(void *handle);
} }
@@ -518,7 +538,9 @@ lRunTask(void *ti) {
// Actually run the task // Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount, taskInfo->func(taskInfo->data, threadIndex, threadCount,
taskInfo->taskIndex, taskInfo->taskCount); taskInfo->taskIndex, taskInfo->taskCount(),
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
} }
@@ -559,7 +581,9 @@ lRunTask(LPVOID param) {
// will cause bugs in code that uses those. // will cause bugs in code that uses those.
int threadIndex = 0; int threadIndex = 0;
int threadCount = 1; int threadCount = 1;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
// Signal the event that this task is done // Signal the event that this task is done
ti->taskEvent.set(); ti->taskEvent.set();
@@ -660,7 +684,9 @@ lTaskEntry(void *arg) {
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
TaskInfo *myTask = tg->GetTaskInfo(taskNumber); TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
myTask->taskCount); myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
// //
// Decrement the "number of unfinished tasks" counter in the task // Decrement the "number of unfinished tasks" counter in the task
@@ -871,7 +897,9 @@ TaskGroup::Sync() {
// Do work for _myTask_ // Do work for _myTask_
// //
// FIXME: bogus values for thread index/thread count here as well.. // FIXME: bogus values for thread index/thread count here as well..
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount); myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
// //
// Decrement the number of unfinished tasks counter // Decrement the number of unfinished tasks counter
@@ -901,7 +929,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task. // Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1 // Cilk does not expose the task -> thread mapping so we pretend it's 1:1
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
} }
} }
@@ -930,7 +960,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task. // Actually run the task.
int threadIndex = omp_get_thread_num(); int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads(); int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
} }
} }
@@ -961,7 +993,9 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex; int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount; int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}); });
} }
@@ -988,7 +1022,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// TBB does not expose the task -> thread mapping so we pretend it's 1:1 // TBB does not expose the task -> thread mapping so we pretend it's 1:1
int threadIndex = ti->taskIndex; int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount; int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}); });
} }
} }
@@ -1041,7 +1077,8 @@ FreeTaskGroup(TaskGroup *tg) {
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
void void
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
const int count = count0*count1*count2;
TaskGroup *taskGroup; TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) { if (*taskGroupPtr == NULL) {
InitTaskSystem(); InitTaskSystem();
@@ -1057,7 +1094,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ti->func = (TaskFuncType)func; ti->func = (TaskFuncType)func;
ti->data = data; ti->data = data;
ti->taskIndex = i; ti->taskIndex = i;
ti->taskCount = count; ti->taskCount3d[0] = count0;
ti->taskCount3d[1] = count1;
ti->taskCount3d[2] = count2;
} }
taskGroup->Launch(baseIndex, count); taskGroup->Launch(baseIndex, count);
} }

View File

@@ -3551,11 +3551,18 @@ SelectExpr::Print() const {
// FunctionCallExpr // FunctionCallExpr
FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p, FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p,
bool il, Expr *lce) bool il, Expr *lce[3])
: Expr(p), isLaunch(il) { : Expr(p), isLaunch(il) {
func = f; func = f;
args = a; args = a;
launchCountExpr = lce; if (lce != NULL)
{
launchCountExpr[0] = lce[0];
launchCountExpr[1] = lce[1];
launchCountExpr[2] = lce[2];
}
else
launchCountExpr[0] = launchCountExpr[1] = launchCountExpr[2] = NULL;
} }
@@ -3673,9 +3680,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
llvm::Value *retVal = NULL; llvm::Value *retVal = NULL;
ctx->SetDebugPos(pos); ctx->SetDebugPos(pos);
if (ft->isTask) { if (ft->isTask) {
AssertPos(pos, launchCountExpr != NULL); AssertPos(pos, launchCountExpr[0] != NULL);
llvm::Value *launchCount = launchCountExpr->GetValue(ctx); llvm::Value *launchCount[3] =
if (launchCount != NULL) { launchCountExpr[0]->GetValue(ctx),
launchCountExpr[1]->GetValue(ctx),
launchCountExpr[2]->GetValue(ctx) };
if (launchCount[0] != NULL)
ctx->LaunchInst(callee, argVals, launchCount); ctx->LaunchInst(callee, argVals, launchCount);
} }
else else
@@ -3798,22 +3809,25 @@ FunctionCallExpr::TypeCheck() {
if (!isLaunch) if (!isLaunch)
Error(pos, "\"launch\" expression needed to call function " Error(pos, "\"launch\" expression needed to call function "
"with \"task\" qualifier."); "with \"task\" qualifier.");
if (!launchCountExpr) for (int k = 0; k < 3; k++)
{
if (!launchCountExpr[k])
return NULL; return NULL;
launchCountExpr = launchCountExpr[k] =
TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32, TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32,
"task launch count"); "task launch count");
if (launchCountExpr == NULL) if (launchCountExpr[k] == NULL)
return NULL; return NULL;
} }
}
else { else {
if (isLaunch) { if (isLaunch) {
Error(pos, "\"launch\" expression illegal with non-\"task\"-" Error(pos, "\"launch\" expression illegal with non-\"task\"-"
"qualified function."); "qualified function.");
return NULL; return NULL;
} }
AssertPos(pos, launchCountExpr == NULL); AssertPos(pos, launchCountExpr[0] == NULL);
} }
} }
else { else {

5
expr.h
View File

@@ -247,7 +247,8 @@ public:
class FunctionCallExpr : public Expr { class FunctionCallExpr : public Expr {
public: public:
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p, FunctionCallExpr(Expr *func, ExprList *args, SourcePos p,
bool isLaunch = false, Expr *launchCountExpr = NULL); bool isLaunch = false,
Expr *launchCountExpr[3] = NULL);
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
llvm::Value *GetLValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
@@ -262,7 +263,7 @@ public:
Expr *func; Expr *func;
ExprList *args; ExprList *args;
bool isLaunch; bool isLaunch;
Expr *launchCountExpr; Expr *launchCountExpr[3];
}; };

View File

@@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) {
Assert(taskIndexSym); Assert(taskIndexSym);
taskCountSym = m->symbolTable->LookupVariable("taskCount"); taskCountSym = m->symbolTable->LookupVariable("taskCount");
Assert(taskCountSym); Assert(taskCountSym);
taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0");
Assert(taskIndexSym0);
taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1");
Assert(taskIndexSym1);
taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2");
Assert(taskIndexSym2);
taskCountSym0 = m->symbolTable->LookupVariable("taskCount0");
Assert(taskCountSym0);
taskCountSym1 = m->symbolTable->LookupVariable("taskCount1");
Assert(taskCountSym1);
taskCountSym2 = m->symbolTable->LookupVariable("taskCount2");
Assert(taskCountSym2);
} }
else else
{
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL;
taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL;
}
} }
@@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
llvm::Value *threadCount = argIter++; llvm::Value *threadCount = argIter++;
llvm::Value *taskIndex = argIter++; llvm::Value *taskIndex = argIter++;
llvm::Value *taskCount = argIter++; llvm::Value *taskCount = argIter++;
llvm::Value *taskIndex0 = argIter++;
llvm::Value *taskIndex1 = argIter++;
llvm::Value *taskIndex2 = argIter++;
llvm::Value *taskCount0 = argIter++;
llvm::Value *taskCount1 = argIter++;
llvm::Value *taskCount2 = argIter++;
// Copy the function parameter values from the structure into local // Copy the function parameter values from the structure into local
// storage // storage
@@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
ctx->StoreInst(taskCount, taskCountSym->storagePtr); ctx->StoreInst(taskCount, taskCountSym->storagePtr);
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
} }
else { else {
// Regular, non-task function // Regular, non-task function

3
func.h
View File

@@ -61,6 +61,9 @@ private:
Symbol *maskSymbol; Symbol *maskSymbol;
Symbol *threadIndexSym, *threadCountSym; Symbol *threadIndexSym, *threadCountSym;
Symbol *taskIndexSym, *taskCountSym; Symbol *taskIndexSym, *taskCountSym;
Symbol *taskIndexSym0, *taskCountSym0;
Symbol *taskIndexSym1, *taskCountSym1;
Symbol *taskIndexSym2, *taskCountSym2;
}; };
#endif // ISPC_FUNC_H #endif // ISPC_FUNC_H

View File

@@ -353,17 +353,75 @@ launch_expression
: TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')' : TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')'
{ {
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr); Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount);
} }
| TOKEN_LAUNCH postfix_expression '(' ')' | TOKEN_LAUNCH postfix_expression '(' ')'
{ {
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr); Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount);
} }
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')'
{ $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); } | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')' {
{ $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); } ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
$$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
$$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
Expr *launchCount[3] = {$3, $5, oneExpr};
$$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
Expr *launchCount[3] = {$3, $5, oneExpr};
$$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
Expr *launchCount[3] = {$6, $3, oneExpr};
$$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
Expr *launchCount[3] = {$6, $3, oneExpr};
$$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
Expr *launchCount[3] = {$3, $5, $7};
$$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
{
Expr *launchCount[3] = {$3, $5, $7};
$$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
Expr *launchCount[3] = {$9, $6, $3};
$$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
{
Expr *launchCount[3] = {$9, $6, $3};
$$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount);
}
| TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>' | TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>'
{ {
@@ -377,13 +435,13 @@ launch_expression
"around function call expression."); "around function call expression.");
$$ = NULL; $$ = NULL;
} }
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
{ {
Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' " Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' "
"around function call expression."); "around function call expression.");
$$ = NULL; $$ = NULL;
} }
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>' | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>'
{ {
Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' " Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' "
"around function call expression."); "around function call expression.");
@@ -2217,6 +2275,21 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) {
Symbol *taskCountSym = new Symbol("taskCount", pos, type); Symbol *taskCountSym = new Symbol("taskCount", pos, type);
m->symbolTable->AddVariable(taskCountSym); m->symbolTable->AddVariable(taskCountSym);
Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type);
m->symbolTable->AddVariable(taskIndexSym0);
Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type);
m->symbolTable->AddVariable(taskIndexSym1);
Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type);
m->symbolTable->AddVariable(taskIndexSym2);
Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type);
m->symbolTable->AddVariable(taskCountSym0);
Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type);
m->symbolTable->AddVariable(taskCountSym1);
Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type);
m->symbolTable->AddVariable(taskCountSym2);
} }

View File

@@ -62,17 +62,20 @@ extern "C" {
extern void f_di(float *result, double *a, int *b); extern void f_di(float *result, double *a, int *b);
extern void result(float *val); extern void result(float *val);
void ISPCLaunch(void **handlePtr, void *f, void *d, int); void ISPCLaunch(void **handlePtr, void *f, void *d, int,int,int);
void ISPCSync(void *handle); void ISPCSync(void *handle);
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
} }
void ISPCLaunch(void **handle, void *f, void *d, int count) { void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) {
*handle = (void *)0xdeadbeef; *handle = (void *)0xdeadbeef;
typedef void (*TaskFuncType)(void *, int, int, int, int); typedef void (*TaskFuncType)(void *, int, int, int, int, int, int, int, int, int, int);
TaskFuncType func = (TaskFuncType)f; TaskFuncType func = (TaskFuncType)f;
for (int i = 0; i < count; ++i) int count = count0*count1*count2, idx = 0;
func(d, 0, 1, i, count); for (int k = 0; k < count2; ++k)
for (int j = 0; j < count1; ++j)
for (int i = 0; i < count0; ++i)
func(d, 0, 1, idx++, count, i,j,k,count0,count1,count2);
} }
void ISPCSync(void *) { void ISPCSync(void *) {

42
tests/launch-8.ispc Normal file
View File

@@ -0,0 +1,42 @@
export uniform int width() { return programCount; }
#define N0 10
#define N1 20
#define N2 50
static uniform float array[N2][N1][N0];
task void x(const float f) {
uniform int j;
assert(taskCount == (int32)N0*N1*N2);
assert(taskCount0 == (int32)N0);
assert(taskCount1 == (int32)N1);
assert(taskCount2 == (int32)N2);
assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2));
assert(taskIndex0 < (int32)N0);
assert(taskIndex1 < (int32)N1);
assert(taskIndex2 < (int32)N2);
const uniform int i0 = taskIndex0;
const uniform int i1 = taskIndex1;
const uniform int i2 = taskIndex2;
const uniform int i = taskIndex;
array[i2][i1][i0] = i / 10000.;
cfor (j = 0; j < 10000; ++j)
array[i2][i1][i0] = sin(array[i2][i1][i0]);
if (array[i2][i1][i0] < .02)
array[i2][i1][i0] = i;
}
export void f_f(uniform float RET[], uniform float fFOO[]) {
float f = fFOO[programIndex];
launch[N2][N1][N0] x(f);
sync;
RET[programIndex] = array[N2-1][N1-1][N0-1];
}
export void result(uniform float RET[]) {
RET[programIndex] = 9999.000000;
}

42
tests/launch-9.ispc Normal file
View File

@@ -0,0 +1,42 @@
export uniform int width() { return programCount; }
#define N0 10
#define N1 20
#define N2 50
static uniform float array[N2][N1][N0];
task void x(const float f) {
uniform int j;
assert(taskCount == (int32)N0*N1*N2);
assert(taskCount0 == (int32)N0);
assert(taskCount1 == (int32)N1);
assert(taskCount2 == (int32)N2);
assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2));
assert(taskIndex0 < (int32)N0);
assert(taskIndex1 < (int32)N1);
assert(taskIndex2 < (int32)N2);
const uniform int i0 = taskIndex0;
const uniform int i1 = taskIndex1;
const uniform int i2 = taskIndex2;
const uniform int i = taskIndex;
array[i2][i1][i0] = i / 10000.;
cfor (j = 0; j < 10000; ++j)
array[i2][i1][i0] = sin(array[i2][i1][i0]);
if (array[i2][i1][i0] < .02)
array[i2][i1][i0] = i;
}
export void f_f(uniform float RET[], uniform float fFOO[]) {
float f = fFOO[programIndex];
launch[N0,N1,N2] x(f);
sync;
RET[programIndex] = array[N2-1][N1-1][N0-1];
}
export void result(uniform float RET[]) {
RET[programIndex] = 9999.000000;
}

View File

@@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // threadCount
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
callTypes.push_back(LLVMTypes::Int32Type); // taskCount callTypes.push_back(LLVMTypes::Int32Type); // taskCount
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
} }
else else
// Otherwise we already have the types of the arguments // Otherwise we already have the types of the arguments