diff --git a/ast.cpp b/ast.cpp index 83ee207d..60b20a80 100644 --- a/ast.cpp +++ b/ast.cpp @@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, else if ((fce = dynamic_cast(node)) != NULL) { fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data); fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data); - fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc, + for (int k = 0; k < 3; k++) + fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc, postFunc, data); } else if ((ie = dynamic_cast(node)) != NULL) { diff --git a/builtins/util.m4 b/builtins/util.m4 index e1c9bf97..e83e126b 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1831,7 +1831,7 @@ define(`stdlib_core', ` declare i32 @__fast_masked_vload() declare i8* @ISPCAlloc(i8**, i64, i32) nounwind -declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind +declare void @ISPCLaunch(i8**, i8*, i8*, i32, i32, i32) nounwind declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind diff --git a/ctx.cpp b/ctx.cpp index e5c60363..6ff26c6a 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3522,7 +3522,7 @@ FunctionEmitContext::ReturnInst() { llvm::Value * FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector &argVals, - llvm::Value *launchCount) { + llvm::Value *launchCount[3]){ if (callee == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; @@ -3583,7 +3583,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, args.push_back(launchGroupHandlePtr); args.push_back(fptr); args.push_back(voidmem); - args.push_back(launchCount); + args.push_back(launchCount[0]); + args.push_back(launchCount[1]); + args.push_back(launchCount[2]); return CallInst(flaunch, NULL, args, ""); } diff --git a/ctx.h b/ctx.h index 58f9aae3..4dd30053 100644 --- a/ctx.h +++ b/ctx.h @@ -542,7 +542,7 @@ public: he given argument values. */ llvm::Value *LaunchInst(llvm::Value *callee, std::vector &argVals, - llvm::Value *launchCount); + llvm::Value *launchCount[3]); void SyncInst(); diff --git a/docs/ispc.rst b/docs/ispc.rst index 93b6ac9b..7e76f433 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3015,8 +3015,7 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as appropriate. Alternatively, ``ispc`` also has support for launching tasks from ``ispc`` code. The approach is similar to Intel® Cilk's task launch -feature. (See the ``examples/mandelbrot_tasks`` example to see it used in -a small example.) +feature. (Check the ``examples/mandelbrot_tasks`` example to see how it is used.) Any function that is launched as a task must be declared with the ``task`` qualifier: @@ -3111,6 +3110,38 @@ executing the current task. The ``threadIndex`` can be used for accessing data that is private to the current thread and thus doesn't require synchronization to access under parallel execution. +The tasking system also supports multi-dimensional partitioning (currently up +to three dimensions). To launch a 3D grid of tasks, for example with ``N0``, +``N1`` and ``N2`` tasks in x-, y- and z-dimension respectively + +:: + + float data[N2][N1][N0] + task void foo_task() + { + data[taskIndex2][taskIndex1][threadIndex0] = taskIndex; + } + +we use the following ``launch`` expressions: + +:: + + launch [N2][N1][N0] foo_task() + +or + +:: + + launch [N0,N1,N2] foo_task() + +Value of ``taskIndex`` is equal to ``taskIndex0 + taskCount0*(taskIndex1 + +taskCount1*taskIndex2)`` and it ranges from ``0`` to ``taskCount-1``, where +``taskCount = taskCount0*taskCount1*taskCount2``. If ``N1`` or/and ``N2`` are +not specified in the ``launch`` expression, a value of ``1`` is assumed. +Finally, for an one-dimensional grid of tasks, ``taskIndex`` is equivalent to +``taskIndex0`` and ``taskCount`` is equivalent to ``taskCount0``. + + Task Parallelism: Runtime Requirements -------------------------------------- @@ -3141,7 +3172,7 @@ manage tasks in ``ispc``: :: void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); - void ISPCLaunch(void **handlePtr, void *f, void *data, int count); + void ISPCLaunch(void **handlePtr, void *f, void *data, int count0, int count1, int count2); void ISPCSync(void *handle); All three of these functions take an opaque handle (or a pointer to an @@ -3178,16 +3209,20 @@ tasks. Each ``launch`` statement in ``ispc`` code causes a call to after the handle pointer to the function are relatively straightforward; the ``void *f`` parameter holds a pointer to a function to call to run the work for this task, ``data`` holds a pointer to data to pass to this -function, and ``count`` is the number of instances of this function to -enqueue for asynchronous execution. (In other words, ``count`` corresponds -to the value ``n`` in a multiple-task launch statement like ``launch[n]``.) +function, and ``count0``, ``count1`` and ``count2`` are the number of instances +of this function to enqueue for asynchronous execution. (In other words, +``count0``, ``count1`` and ``count2`` correspond to the value ``n0``, ``n1`` +and ``n2`` in a multiple-task launch statement like ``launch[n2][n1][n0]`` or +``launch [n0,n1,n2]`` respectively.) The signature of the provided function pointer ``f`` is :: void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount, - int taskIndex, int taskCount) + int taskIndex, int taskCount, + int taskIndex0, int taskIndex1, int taskIndex2, + int taskCount0, int taskCount1, int taskCount2); When this function pointer is called by one of the hardware threads managed by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should @@ -3197,11 +3232,14 @@ number of hardware threads that have been spawned to run tasks and uniquely identifying the hardware thread that is running the task. (These values can be used to index into thread-local storage.) -The value of ``taskCount`` should be the number of tasks launched in the -``launch`` statement that caused the call to ``ISPCLaunch()`` and each of -the calls to this function should be given a unique value of ``taskIndex`` -between zero and ``taskCount``, to distinguish which of the instances -of the set of launched tasks is running. +The value of ``taskCount`` should be the total number of tasks launched in the +``launch`` statement (it must be equal to ``taskCount0*taskCount1*taskCount2``) +that caused the call to ``ISPCLaunch()`` and each of the calls to this function +should be given a unique value of ``taskIndex``, ``taskIndex0``, ``taskIndex1`` +and ``taskIndex2`` between zero and ``taskCount``, ``taskCount0``, +``taskCount1`` and ``taskCount2`` respectively, with ``taskIndex = taskIndex0 ++ taskCount0*(taskIndex1 + taskCount1*taskIndex2)``, to distinguish which of +the instances of the set of launched tasks is running. diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp index 32db45bc..682987ae 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp @@ -38,7 +38,8 @@ #pragma warning (disable: 4305) #endif -#include +#include +#include #include #include #include "../timing.h" diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc index 84d4ccd4..f9b0be4c 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc @@ -57,21 +57,26 @@ task void mandelbrot_scanline(uniform float x0, uniform float dx, uniform float y0, uniform float dy, uniform int width, uniform int height, - uniform int span, + uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { - uniform int ystart = taskIndex * span; - uniform int yend = min((taskIndex+1) * span, (unsigned int)height); + const uniform int xstart = taskIndex0 * xspan; + const uniform int xend = min(xstart + xspan, width); - foreach (yi = ystart ... yend, xi = 0 ... width) { + const uniform int ystart = taskIndex1 * yspan; + const uniform int yend = min(ystart + yspan, height); + + + foreach (yi = ystart ... yend, xi = xstart ... xend) { float x = x0 + xi * dx; float y = y0 + yi * dy; int index = yi * width + xi; output[index] = mandel(x, y, maxIterations); } + } - +#if 1 export void mandelbrot_ispc(uniform float x0, uniform float y0, uniform float x1, uniform float y1, @@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0, uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; - uniform int span = 4; + const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */ + const uniform int yspan = 16; - launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span, - maxIterations, output); + +#if 1 + launch [width/xspan, height/yspan] +#else + launch [height/yspan][width/xspan] +#endif + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + maxIterations, output); } +#endif diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index b97c4bba..cfe0b17b 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -170,21 +170,41 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, - int taskIndex, int taskCount); + int taskIndex, int taskCount, + int taskIndex0, int taskIndex1, int taskIndex2, + int taskCount0, int taskCount1, int taskCount2); // Small structure used to hold the data for each task struct TaskInfo { TaskFuncType func; void *data; - int taskIndex, taskCount; + int taskIndex; + int taskCount3d[3]; #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif -}; + int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; } + int taskIndex0() const + { + return taskIndex % taskCount3d[0]; + } + int taskIndex1() const + { + return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; + } + int taskIndex2() const + { + return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); + } + int taskCount0() const { return taskCount3d[0]; } + int taskCount1() const { return taskCount3d[1]; } + int taskCount2() const { return taskCount3d[2]; } + TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); } +} __attribute__((aligned(32))); // ispc expects these functions to have C linkage / not be mangled extern "C" { - void ISPCLaunch(void **handlePtr, void *f, void *data, int count); + void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void ISPCSync(void *handle); } @@ -518,7 +538,9 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, - taskInfo->taskIndex, taskInfo->taskCount); + taskInfo->taskIndex, taskInfo->taskCount(), + taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(), + taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2()); } @@ -559,7 +581,9 @@ lRunTask(LPVOID param) { // will cause bugs in code that uses those. int threadIndex = 0; int threadCount = 1; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); // Signal the event that this task is done ti->taskEvent.set(); @@ -660,7 +684,9 @@ lTaskEntry(void *arg) { DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, - myTask->taskCount); + myTask->taskCount(), + myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(), + myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2()); // // Decrement the "number of unfinished tasks" counter in the task @@ -871,7 +897,9 @@ TaskGroup::Sync() { // Do work for _myTask_ // // FIXME: bogus values for thread index/thread count here as well.. - myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount); + myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(), + myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(), + myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2()); // // Decrement the number of unfinished tasks counter @@ -901,7 +929,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 - ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, ti->taskIndex, ti->taskCount(), + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } } @@ -930,7 +960,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } } @@ -961,7 +993,9 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); }); } @@ -988,7 +1022,9 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); }); } } @@ -1041,7 +1077,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) { + const int count = count0*count1*count2; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1057,7 +1094,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { ti->func = (TaskFuncType)func; ti->data = data; ti->taskIndex = i; - ti->taskCount = count; + ti->taskCount3d[0] = count0; + ti->taskCount3d[1] = count1; + ti->taskCount3d[2] = count2; } taskGroup->Launch(baseIndex, count); } diff --git a/expr.cpp b/expr.cpp index 1cbebad5..5be578eb 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3551,11 +3551,18 @@ SelectExpr::Print() const { // FunctionCallExpr FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p, - bool il, Expr *lce) + bool il, Expr *lce[3]) : Expr(p), isLaunch(il) { func = f; args = a; - launchCountExpr = lce; + if (lce != NULL) + { + launchCountExpr[0] = lce[0]; + launchCountExpr[1] = lce[1]; + launchCountExpr[2] = lce[2]; + } + else + launchCountExpr[0] = launchCountExpr[1] = launchCountExpr[2] = NULL; } @@ -3673,9 +3680,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *retVal = NULL; ctx->SetDebugPos(pos); if (ft->isTask) { - AssertPos(pos, launchCountExpr != NULL); - llvm::Value *launchCount = launchCountExpr->GetValue(ctx); - if (launchCount != NULL) + AssertPos(pos, launchCountExpr[0] != NULL); + llvm::Value *launchCount[3] = + { launchCountExpr[0]->GetValue(ctx), + launchCountExpr[1]->GetValue(ctx), + launchCountExpr[2]->GetValue(ctx) }; + + if (launchCount[0] != NULL) ctx->LaunchInst(callee, argVals, launchCount); } else @@ -3798,14 +3809,17 @@ FunctionCallExpr::TypeCheck() { if (!isLaunch) Error(pos, "\"launch\" expression needed to call function " "with \"task\" qualifier."); - if (!launchCountExpr) + for (int k = 0; k < 3; k++) + { + if (!launchCountExpr[k]) return NULL; - launchCountExpr = - TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32, - "task launch count"); - if (launchCountExpr == NULL) + launchCountExpr[k] = + TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32, + "task launch count"); + if (launchCountExpr[k] == NULL) return NULL; + } } else { if (isLaunch) { @@ -3813,7 +3827,7 @@ FunctionCallExpr::TypeCheck() { "qualified function."); return NULL; } - AssertPos(pos, launchCountExpr == NULL); + AssertPos(pos, launchCountExpr[0] == NULL); } } else { diff --git a/expr.h b/expr.h index 45780414..38617e8e 100644 --- a/expr.h +++ b/expr.h @@ -247,7 +247,8 @@ public: class FunctionCallExpr : public Expr { public: FunctionCallExpr(Expr *func, ExprList *args, SourcePos p, - bool isLaunch = false, Expr *launchCountExpr = NULL); + bool isLaunch = false, + Expr *launchCountExpr[3] = NULL); llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; @@ -262,7 +263,7 @@ public: Expr *func; ExprList *args; bool isLaunch; - Expr *launchCountExpr; + Expr *launchCountExpr[3]; }; diff --git a/func.cpp b/func.cpp index b975049b..af2cc05a 100644 --- a/func.cpp +++ b/func.cpp @@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) { Assert(taskIndexSym); taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); + + taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0"); + Assert(taskIndexSym0); + taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1"); + Assert(taskIndexSym1); + taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2"); + Assert(taskIndexSym2); + + + taskCountSym0 = m->symbolTable->LookupVariable("taskCount0"); + Assert(taskCountSym0); + taskCountSym1 = m->symbolTable->LookupVariable("taskCount1"); + Assert(taskCountSym1); + taskCountSym2 = m->symbolTable->LookupVariable("taskCount2"); + Assert(taskCountSym2); } else + { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; + taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL; + taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL; + } } @@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; + llvm::Value *taskIndex0 = argIter++; + llvm::Value *taskIndex1 = argIter++; + llvm::Value *taskIndex2 = argIter++; + llvm::Value *taskCount0 = argIter++; + llvm::Value *taskCount1 = argIter++; + llvm::Value *taskCount2 = argIter++; // Copy the function parameter values from the structure into local // storage @@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); + + taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0"); + ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr); + taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); + ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); + taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); + ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); + + taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0"); + ctx->StoreInst(taskCount0, taskCountSym0->storagePtr); + taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); + ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); + taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); + ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index ac3e1447..88a96dbc 100644 --- a/func.h +++ b/func.h @@ -60,7 +60,10 @@ private: Stmt *code; Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; - Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym0, *taskCountSym0; + Symbol *taskIndexSym1, *taskCountSym1; + Symbol *taskIndexSym2, *taskCountSym2; }; #endif // ISPC_FUNC_H diff --git a/parse.yy b/parse.yy index 38c5ba77..9a0377c5 100644 --- a/parse.yy +++ b/parse.yy @@ -353,17 +353,75 @@ launch_expression : TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); - $$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr); + Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount); } | TOKEN_LAUNCH postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); - $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr); + Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount); } - | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')' - { $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); } - | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')' - { $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); } + + | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); + } + + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); + Expr *launchCount[3] = {$3, $5, oneExpr}; + $$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); + Expr *launchCount[3] = {$3, $5, oneExpr}; + $$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8); + Expr *launchCount[3] = {$6, $3, oneExpr}; + $$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8); + Expr *launchCount[3] = {$6, $3, oneExpr}; + $$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount); + } + + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + Expr *launchCount[3] = {$3, $5, $7}; + $$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')' + { + Expr *launchCount[3] = {$3, $5, $7}; + $$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + Expr *launchCount[3] = {$9, $6, $3}; + $$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')' + { + Expr *launchCount[3] = {$9, $6, $3}; + $$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount); + } + | TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>' { @@ -377,13 +435,13 @@ launch_expression "around function call expression."); $$ = NULL; } - | TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' + | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' { Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' " "around function call expression."); $$ = NULL; } - | TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>' + | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>' { Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' " "around function call expression."); @@ -2214,9 +2272,24 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskIndexSym = new Symbol("taskIndex", pos, type); m->symbolTable->AddVariable(taskIndexSym); - + Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); + + Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type); + m->symbolTable->AddVariable(taskIndexSym0); + Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type); + m->symbolTable->AddVariable(taskIndexSym1); + Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type); + m->symbolTable->AddVariable(taskIndexSym2); + + + Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type); + m->symbolTable->AddVariable(taskCountSym0); + Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type); + m->symbolTable->AddVariable(taskCountSym1); + Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type); + m->symbolTable->AddVariable(taskCountSym2); } diff --git a/test_static.cpp b/test_static.cpp index 8985fdb3..27a5b136 100644 --- a/test_static.cpp +++ b/test_static.cpp @@ -62,17 +62,20 @@ extern "C" { extern void f_di(float *result, double *a, int *b); extern void result(float *val); - void ISPCLaunch(void **handlePtr, void *f, void *d, int); + void ISPCLaunch(void **handlePtr, void *f, void *d, int,int,int); void ISPCSync(void *handle); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); } - -void ISPCLaunch(void **handle, void *f, void *d, int count) { + +void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) { *handle = (void *)0xdeadbeef; - typedef void (*TaskFuncType)(void *, int, int, int, int); + typedef void (*TaskFuncType)(void *, int, int, int, int, int, int, int, int, int, int); TaskFuncType func = (TaskFuncType)f; - for (int i = 0; i < count; ++i) - func(d, 0, 1, i, count); + int count = count0*count1*count2, idx = 0; + for (int k = 0; k < count2; ++k) + for (int j = 0; j < count1; ++j) + for (int i = 0; i < count0; ++i) + func(d, 0, 1, idx++, count, i,j,k,count0,count1,count2); } void ISPCSync(void *) { diff --git a/tests/launch-8.ispc b/tests/launch-8.ispc new file mode 100644 index 00000000..eacba673 --- /dev/null +++ b/tests/launch-8.ispc @@ -0,0 +1,42 @@ + +export uniform int width() { return programCount; } + + +#define N0 10 +#define N1 20 +#define N2 50 +static uniform float array[N2][N1][N0]; + +task void x(const float f) { + uniform int j; + + assert(taskCount == (int32)N0*N1*N2); + assert(taskCount0 == (int32)N0); + assert(taskCount1 == (int32)N1); + assert(taskCount2 == (int32)N2); + assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2)); + assert(taskIndex0 < (int32)N0); + assert(taskIndex1 < (int32)N1); + assert(taskIndex2 < (int32)N2); + + const uniform int i0 = taskIndex0; + const uniform int i1 = taskIndex1; + const uniform int i2 = taskIndex2; + const uniform int i = taskIndex; + array[i2][i1][i0] = i / 10000.; + cfor (j = 0; j < 10000; ++j) + array[i2][i1][i0] = sin(array[i2][i1][i0]); + if (array[i2][i1][i0] < .02) + array[i2][i1][i0] = i; +} +export void f_f(uniform float RET[], uniform float fFOO[]) { + float f = fFOO[programIndex]; + launch[N2][N1][N0] x(f); + sync; + RET[programIndex] = array[N2-1][N1-1][N0-1]; +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 9999.000000; +} diff --git a/tests/launch-9.ispc b/tests/launch-9.ispc new file mode 100644 index 00000000..1952e8e7 --- /dev/null +++ b/tests/launch-9.ispc @@ -0,0 +1,42 @@ + +export uniform int width() { return programCount; } + + +#define N0 10 +#define N1 20 +#define N2 50 +static uniform float array[N2][N1][N0]; + +task void x(const float f) { + uniform int j; + + assert(taskCount == (int32)N0*N1*N2); + assert(taskCount0 == (int32)N0); + assert(taskCount1 == (int32)N1); + assert(taskCount2 == (int32)N2); + assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2)); + assert(taskIndex0 < (int32)N0); + assert(taskIndex1 < (int32)N1); + assert(taskIndex2 < (int32)N2); + + const uniform int i0 = taskIndex0; + const uniform int i1 = taskIndex1; + const uniform int i2 = taskIndex2; + const uniform int i = taskIndex; + array[i2][i1][i0] = i / 10000.; + cfor (j = 0; j < 10000; ++j) + array[i2][i1][i0] = sin(array[i2][i1][i0]); + if (array[i2][i1][i0] < .02) + array[i2][i1][i0] = i; +} +export void f_f(uniform float RET[], uniform float fFOO[]) { + float f = fFOO[programIndex]; + launch[N0,N1,N2] x(f); + sync; + RET[programIndex] = array[N2-1][N1-1][N0-1]; +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 9999.000000; +} diff --git a/type.cpp b/type.cpp index 08013af0..e0234108 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0 + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1 + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount0 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount1 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount2 } else // Otherwise we already have the types of the arguments