3
ast.cpp
3
ast.cpp
@@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
||||
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
|
||||
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
|
||||
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc,
|
||||
for (int k = 0; k < 3; k++)
|
||||
fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {
|
||||
|
||||
@@ -1831,7 +1831,7 @@ define(`stdlib_core', `
|
||||
declare i32 @__fast_masked_vload()
|
||||
|
||||
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
|
||||
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind
|
||||
declare void @ISPCLaunch(i8**, i8*, i8*, i32, i32, i32) nounwind
|
||||
declare void @ISPCSync(i8*) nounwind
|
||||
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
||||
|
||||
|
||||
6
ctx.cpp
6
ctx.cpp
@@ -3522,7 +3522,7 @@ FunctionEmitContext::ReturnInst() {
|
||||
llvm::Value *
|
||||
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount) {
|
||||
llvm::Value *launchCount[3]){
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
@@ -3583,7 +3583,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
args.push_back(launchGroupHandlePtr);
|
||||
args.push_back(fptr);
|
||||
args.push_back(voidmem);
|
||||
args.push_back(launchCount);
|
||||
args.push_back(launchCount[0]);
|
||||
args.push_back(launchCount[1]);
|
||||
args.push_back(launchCount[2]);
|
||||
return CallInst(flaunch, NULL, args, "");
|
||||
}
|
||||
|
||||
|
||||
2
ctx.h
2
ctx.h
@@ -542,7 +542,7 @@ public:
|
||||
he given argument values. */
|
||||
llvm::Value *LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount);
|
||||
llvm::Value *launchCount[3]);
|
||||
|
||||
void SyncInst();
|
||||
|
||||
|
||||
@@ -3015,8 +3015,7 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and
|
||||
for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as
|
||||
appropriate. Alternatively, ``ispc`` also has support for launching tasks
|
||||
from ``ispc`` code. The approach is similar to Intel® Cilk's task launch
|
||||
feature. (See the ``examples/mandelbrot_tasks`` example to see it used in
|
||||
a small example.)
|
||||
feature. (Check the ``examples/mandelbrot_tasks`` example to see how it is used.)
|
||||
|
||||
Any function that is launched as a task must be declared with the
|
||||
``task`` qualifier:
|
||||
@@ -3111,6 +3110,38 @@ executing the current task. The ``threadIndex`` can be used for accessing
|
||||
data that is private to the current thread and thus doesn't require
|
||||
synchronization to access under parallel execution.
|
||||
|
||||
The tasking system also supports multi-dimensional partitioning (currently up
|
||||
to three dimensions). To launch a 3D grid of tasks, for example with ``N0``,
|
||||
``N1`` and ``N2`` tasks in x-, y- and z-dimension respectively
|
||||
|
||||
::
|
||||
|
||||
float data[N2][N1][N0]
|
||||
task void foo_task()
|
||||
{
|
||||
data[taskIndex2][taskIndex1][threadIndex0] = taskIndex;
|
||||
}
|
||||
|
||||
we use the following ``launch`` expressions:
|
||||
|
||||
::
|
||||
|
||||
launch [N2][N1][N0] foo_task()
|
||||
|
||||
or
|
||||
|
||||
::
|
||||
|
||||
launch [N0,N1,N2] foo_task()
|
||||
|
||||
Value of ``taskIndex`` is equal to ``taskIndex0 + taskCount0*(taskIndex1 +
|
||||
taskCount1*taskIndex2)`` and it ranges from ``0`` to ``taskCount-1``, where
|
||||
``taskCount = taskCount0*taskCount1*taskCount2``. If ``N1`` or/and ``N2`` are
|
||||
not specified in the ``launch`` expression, a value of ``1`` is assumed.
|
||||
Finally, for an one-dimensional grid of tasks, ``taskIndex`` is equivalent to
|
||||
``taskIndex0`` and ``taskCount`` is equivalent to ``taskCount0``.
|
||||
|
||||
|
||||
Task Parallelism: Runtime Requirements
|
||||
--------------------------------------
|
||||
|
||||
@@ -3141,7 +3172,7 @@ manage tasks in ``ispc``:
|
||||
::
|
||||
|
||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int count0, int count1, int count2);
|
||||
void ISPCSync(void *handle);
|
||||
|
||||
All three of these functions take an opaque handle (or a pointer to an
|
||||
@@ -3178,16 +3209,20 @@ tasks. Each ``launch`` statement in ``ispc`` code causes a call to
|
||||
after the handle pointer to the function are relatively straightforward;
|
||||
the ``void *f`` parameter holds a pointer to a function to call to run the
|
||||
work for this task, ``data`` holds a pointer to data to pass to this
|
||||
function, and ``count`` is the number of instances of this function to
|
||||
enqueue for asynchronous execution. (In other words, ``count`` corresponds
|
||||
to the value ``n`` in a multiple-task launch statement like ``launch[n]``.)
|
||||
function, and ``count0``, ``count1`` and ``count2`` are the number of instances
|
||||
of this function to enqueue for asynchronous execution. (In other words,
|
||||
``count0``, ``count1`` and ``count2`` correspond to the value ``n0``, ``n1``
|
||||
and ``n2`` in a multiple-task launch statement like ``launch[n2][n1][n0]`` or
|
||||
``launch [n0,n1,n2]`` respectively.)
|
||||
|
||||
The signature of the provided function pointer ``f`` is
|
||||
|
||||
::
|
||||
|
||||
void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount,
|
||||
int taskIndex, int taskCount)
|
||||
int taskIndex, int taskCount,
|
||||
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||
int taskCount0, int taskCount1, int taskCount2);
|
||||
|
||||
When this function pointer is called by one of the hardware threads managed
|
||||
by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
||||
@@ -3197,11 +3232,14 @@ number of hardware threads that have been spawned to run tasks and
|
||||
uniquely identifying the hardware thread that is running the task. (These
|
||||
values can be used to index into thread-local storage.)
|
||||
|
||||
The value of ``taskCount`` should be the number of tasks launched in the
|
||||
``launch`` statement that caused the call to ``ISPCLaunch()`` and each of
|
||||
the calls to this function should be given a unique value of ``taskIndex``
|
||||
between zero and ``taskCount``, to distinguish which of the instances
|
||||
of the set of launched tasks is running.
|
||||
The value of ``taskCount`` should be the total number of tasks launched in the
|
||||
``launch`` statement (it must be equal to ``taskCount0*taskCount1*taskCount2``)
|
||||
that caused the call to ``ISPCLaunch()`` and each of the calls to this function
|
||||
should be given a unique value of ``taskIndex``, ``taskIndex0``, ``taskIndex1``
|
||||
and ``taskIndex2`` between zero and ``taskCount``, ``taskCount0``,
|
||||
``taskCount1`` and ``taskCount2`` respectively, with ``taskIndex = taskIndex0
|
||||
+ taskCount0*(taskIndex1 + taskCount1*taskIndex2)``, to distinguish which of
|
||||
the instances of the set of launched tasks is running.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -38,7 +38,8 @@
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include "../timing.h"
|
||||
|
||||
@@ -57,21 +57,26 @@ task void
|
||||
mandelbrot_scanline(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int span,
|
||||
uniform int xspan, uniform int yspan,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * span;
|
||||
uniform int yend = min((taskIndex+1) * span, (unsigned int)height);
|
||||
const uniform int xstart = taskIndex0 * xspan;
|
||||
const uniform int xend = min(xstart + xspan, width);
|
||||
|
||||
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
||||
const uniform int ystart = taskIndex1 * yspan;
|
||||
const uniform int yend = min(ystart + yspan, height);
|
||||
|
||||
|
||||
foreach (yi = ystart ... yend, xi = xstart ... xend) {
|
||||
float x = x0 + xi * dx;
|
||||
float y = y0 + yi * dy;
|
||||
|
||||
int index = yi * width + xi;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#if 1
|
||||
export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
@@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
uniform int span = 4;
|
||||
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
|
||||
const uniform int yspan = 16;
|
||||
|
||||
launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
|
||||
maxIterations, output);
|
||||
|
||||
#if 1
|
||||
launch [width/xspan, height/yspan]
|
||||
#else
|
||||
launch [height/yspan][width/xspan]
|
||||
#endif
|
||||
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
|
||||
maxIterations, output);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -170,21 +170,41 @@
|
||||
|
||||
// Signature of ispc-generated 'task' functions
|
||||
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
||||
int taskIndex, int taskCount);
|
||||
int taskIndex, int taskCount,
|
||||
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||
int taskCount0, int taskCount1, int taskCount2);
|
||||
|
||||
// Small structure used to hold the data for each task
|
||||
struct TaskInfo {
|
||||
TaskFuncType func;
|
||||
void *data;
|
||||
int taskIndex, taskCount;
|
||||
int taskIndex;
|
||||
int taskCount3d[3];
|
||||
#if defined(ISPC_IS_WINDOWS)
|
||||
event taskEvent;
|
||||
#endif
|
||||
};
|
||||
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||
int taskIndex0() const
|
||||
{
|
||||
return taskIndex % taskCount3d[0];
|
||||
}
|
||||
int taskIndex1() const
|
||||
{
|
||||
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
|
||||
}
|
||||
int taskIndex2() const
|
||||
{
|
||||
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
|
||||
}
|
||||
int taskCount0() const { return taskCount3d[0]; }
|
||||
int taskCount1() const { return taskCount3d[1]; }
|
||||
int taskCount2() const { return taskCount3d[2]; }
|
||||
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
|
||||
} __attribute__((aligned(32)));
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz);
|
||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||
void ISPCSync(void *handle);
|
||||
}
|
||||
@@ -518,7 +538,9 @@ lRunTask(void *ti) {
|
||||
|
||||
// Actually run the task
|
||||
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
||||
taskInfo->taskIndex, taskInfo->taskCount);
|
||||
taskInfo->taskIndex, taskInfo->taskCount(),
|
||||
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
|
||||
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
|
||||
}
|
||||
|
||||
|
||||
@@ -559,7 +581,9 @@ lRunTask(LPVOID param) {
|
||||
// will cause bugs in code that uses those.
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
|
||||
// Signal the event that this task is done
|
||||
ti->taskEvent.set();
|
||||
@@ -660,7 +684,9 @@ lTaskEntry(void *arg) {
|
||||
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
||||
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
||||
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
||||
myTask->taskCount);
|
||||
myTask->taskCount(),
|
||||
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||
|
||||
//
|
||||
// Decrement the "number of unfinished tasks" counter in the task
|
||||
@@ -871,7 +897,9 @@ TaskGroup::Sync() {
|
||||
// Do work for _myTask_
|
||||
//
|
||||
// FIXME: bogus values for thread index/thread count here as well..
|
||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
|
||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
|
||||
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||
|
||||
//
|
||||
// Decrement the number of unfinished tasks counter
|
||||
@@ -901,7 +929,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
|
||||
// Actually run the task.
|
||||
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -930,7 +960,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// Actually run the task.
|
||||
int threadIndex = omp_get_thread_num();
|
||||
int threadCount = omp_get_num_threads();
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -961,7 +993,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -988,7 +1022,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1041,7 +1077,8 @@ FreeTaskGroup(TaskGroup *tg) {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
|
||||
const int count = count0*count1*count2;
|
||||
TaskGroup *taskGroup;
|
||||
if (*taskGroupPtr == NULL) {
|
||||
InitTaskSystem();
|
||||
@@ -1057,7 +1094,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
||||
ti->func = (TaskFuncType)func;
|
||||
ti->data = data;
|
||||
ti->taskIndex = i;
|
||||
ti->taskCount = count;
|
||||
ti->taskCount3d[0] = count0;
|
||||
ti->taskCount3d[1] = count1;
|
||||
ti->taskCount3d[2] = count2;
|
||||
}
|
||||
taskGroup->Launch(baseIndex, count);
|
||||
}
|
||||
|
||||
36
expr.cpp
36
expr.cpp
@@ -3551,11 +3551,18 @@ SelectExpr::Print() const {
|
||||
// FunctionCallExpr
|
||||
|
||||
FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p,
|
||||
bool il, Expr *lce)
|
||||
bool il, Expr *lce[3])
|
||||
: Expr(p), isLaunch(il) {
|
||||
func = f;
|
||||
args = a;
|
||||
launchCountExpr = lce;
|
||||
if (lce != NULL)
|
||||
{
|
||||
launchCountExpr[0] = lce[0];
|
||||
launchCountExpr[1] = lce[1];
|
||||
launchCountExpr[2] = lce[2];
|
||||
}
|
||||
else
|
||||
launchCountExpr[0] = launchCountExpr[1] = launchCountExpr[2] = NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -3673,9 +3680,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
llvm::Value *retVal = NULL;
|
||||
ctx->SetDebugPos(pos);
|
||||
if (ft->isTask) {
|
||||
AssertPos(pos, launchCountExpr != NULL);
|
||||
llvm::Value *launchCount = launchCountExpr->GetValue(ctx);
|
||||
if (launchCount != NULL)
|
||||
AssertPos(pos, launchCountExpr[0] != NULL);
|
||||
llvm::Value *launchCount[3] =
|
||||
{ launchCountExpr[0]->GetValue(ctx),
|
||||
launchCountExpr[1]->GetValue(ctx),
|
||||
launchCountExpr[2]->GetValue(ctx) };
|
||||
|
||||
if (launchCount[0] != NULL)
|
||||
ctx->LaunchInst(callee, argVals, launchCount);
|
||||
}
|
||||
else
|
||||
@@ -3798,14 +3809,17 @@ FunctionCallExpr::TypeCheck() {
|
||||
if (!isLaunch)
|
||||
Error(pos, "\"launch\" expression needed to call function "
|
||||
"with \"task\" qualifier.");
|
||||
if (!launchCountExpr)
|
||||
for (int k = 0; k < 3; k++)
|
||||
{
|
||||
if (!launchCountExpr[k])
|
||||
return NULL;
|
||||
|
||||
launchCountExpr =
|
||||
TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32,
|
||||
"task launch count");
|
||||
if (launchCountExpr == NULL)
|
||||
launchCountExpr[k] =
|
||||
TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32,
|
||||
"task launch count");
|
||||
if (launchCountExpr[k] == NULL)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (isLaunch) {
|
||||
@@ -3813,7 +3827,7 @@ FunctionCallExpr::TypeCheck() {
|
||||
"qualified function.");
|
||||
return NULL;
|
||||
}
|
||||
AssertPos(pos, launchCountExpr == NULL);
|
||||
AssertPos(pos, launchCountExpr[0] == NULL);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
5
expr.h
5
expr.h
@@ -247,7 +247,8 @@ public:
|
||||
class FunctionCallExpr : public Expr {
|
||||
public:
|
||||
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p,
|
||||
bool isLaunch = false, Expr *launchCountExpr = NULL);
|
||||
bool isLaunch = false,
|
||||
Expr *launchCountExpr[3] = NULL);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
@@ -262,7 +263,7 @@ public:
|
||||
Expr *func;
|
||||
ExprList *args;
|
||||
bool isLaunch;
|
||||
Expr *launchCountExpr;
|
||||
Expr *launchCountExpr[3];
|
||||
};
|
||||
|
||||
|
||||
|
||||
39
func.cpp
39
func.cpp
@@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) {
|
||||
Assert(taskIndexSym);
|
||||
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
||||
Assert(taskCountSym);
|
||||
|
||||
taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0");
|
||||
Assert(taskIndexSym0);
|
||||
taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1");
|
||||
Assert(taskIndexSym1);
|
||||
taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2");
|
||||
Assert(taskIndexSym2);
|
||||
|
||||
|
||||
taskCountSym0 = m->symbolTable->LookupVariable("taskCount0");
|
||||
Assert(taskCountSym0);
|
||||
taskCountSym1 = m->symbolTable->LookupVariable("taskCount1");
|
||||
Assert(taskCountSym1);
|
||||
taskCountSym2 = m->symbolTable->LookupVariable("taskCount2");
|
||||
Assert(taskCountSym2);
|
||||
}
|
||||
else
|
||||
{
|
||||
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
||||
taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL;
|
||||
taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::Value *threadCount = argIter++;
|
||||
llvm::Value *taskIndex = argIter++;
|
||||
llvm::Value *taskCount = argIter++;
|
||||
llvm::Value *taskIndex0 = argIter++;
|
||||
llvm::Value *taskIndex1 = argIter++;
|
||||
llvm::Value *taskIndex2 = argIter++;
|
||||
llvm::Value *taskCount0 = argIter++;
|
||||
llvm::Value *taskCount1 = argIter++;
|
||||
llvm::Value *taskCount2 = argIter++;
|
||||
|
||||
// Copy the function parameter values from the structure into local
|
||||
// storage
|
||||
@@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
|
||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||
|
||||
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||
|
||||
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||
}
|
||||
else {
|
||||
// Regular, non-task function
|
||||
|
||||
5
func.h
5
func.h
@@ -60,7 +60,10 @@ private:
|
||||
Stmt *code;
|
||||
Symbol *maskSymbol;
|
||||
Symbol *threadIndexSym, *threadCountSym;
|
||||
Symbol *taskIndexSym, *taskCountSym;
|
||||
Symbol *taskIndexSym, *taskCountSym;
|
||||
Symbol *taskIndexSym0, *taskCountSym0;
|
||||
Symbol *taskIndexSym1, *taskCountSym1;
|
||||
Symbol *taskIndexSym2, *taskCountSym2;
|
||||
};
|
||||
|
||||
#endif // ISPC_FUNC_H
|
||||
|
||||
91
parse.yy
91
parse.yy
@@ -353,17 +353,75 @@ launch_expression
|
||||
: TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
||||
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr);
|
||||
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
||||
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr);
|
||||
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{ $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); }
|
||||
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')'
|
||||
{ $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); }
|
||||
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
|
||||
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
|
||||
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount);
|
||||
}
|
||||
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
|
||||
Expr *launchCount[3] = {$3, $5, oneExpr};
|
||||
$$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
|
||||
Expr *launchCount[3] = {$3, $5, oneExpr};
|
||||
$$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
|
||||
Expr *launchCount[3] = {$6, $3, oneExpr};
|
||||
$$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
|
||||
Expr *launchCount[3] = {$6, $3, oneExpr};
|
||||
$$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount);
|
||||
}
|
||||
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$3, $5, $7};
|
||||
$$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$3, $5, $7};
|
||||
$$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$9, $6, $3};
|
||||
$$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$9, $6, $3};
|
||||
$$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount);
|
||||
}
|
||||
|
||||
|
||||
| TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||
{
|
||||
@@ -377,13 +435,13 @@ launch_expression
|
||||
"around function call expression.");
|
||||
$$ = NULL;
|
||||
}
|
||||
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||
{
|
||||
Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' "
|
||||
"around function call expression.");
|
||||
$$ = NULL;
|
||||
}
|
||||
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>'
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>'
|
||||
{
|
||||
Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' "
|
||||
"around function call expression.");
|
||||
@@ -2214,9 +2272,24 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) {
|
||||
|
||||
Symbol *taskIndexSym = new Symbol("taskIndex", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym);
|
||||
|
||||
|
||||
Symbol *taskCountSym = new Symbol("taskCount", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym);
|
||||
|
||||
Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym0);
|
||||
Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym1);
|
||||
Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym2);
|
||||
|
||||
|
||||
Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym0);
|
||||
Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym1);
|
||||
Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -62,17 +62,20 @@ extern "C" {
|
||||
extern void f_di(float *result, double *a, int *b);
|
||||
extern void result(float *val);
|
||||
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *d, int);
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *d, int,int,int);
|
||||
void ISPCSync(void *handle);
|
||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||
}
|
||||
|
||||
void ISPCLaunch(void **handle, void *f, void *d, int count) {
|
||||
|
||||
void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
typedef void (*TaskFuncType)(void *, int, int, int, int);
|
||||
typedef void (*TaskFuncType)(void *, int, int, int, int, int, int, int, int, int, int);
|
||||
TaskFuncType func = (TaskFuncType)f;
|
||||
for (int i = 0; i < count; ++i)
|
||||
func(d, 0, 1, i, count);
|
||||
int count = count0*count1*count2, idx = 0;
|
||||
for (int k = 0; k < count2; ++k)
|
||||
for (int j = 0; j < count1; ++j)
|
||||
for (int i = 0; i < count0; ++i)
|
||||
func(d, 0, 1, idx++, count, i,j,k,count0,count1,count2);
|
||||
}
|
||||
|
||||
void ISPCSync(void *) {
|
||||
|
||||
42
tests/launch-8.ispc
Normal file
42
tests/launch-8.ispc
Normal file
@@ -0,0 +1,42 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
#define N0 10
|
||||
#define N1 20
|
||||
#define N2 50
|
||||
static uniform float array[N2][N1][N0];
|
||||
|
||||
task void x(const float f) {
|
||||
uniform int j;
|
||||
|
||||
assert(taskCount == (int32)N0*N1*N2);
|
||||
assert(taskCount0 == (int32)N0);
|
||||
assert(taskCount1 == (int32)N1);
|
||||
assert(taskCount2 == (int32)N2);
|
||||
assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2));
|
||||
assert(taskIndex0 < (int32)N0);
|
||||
assert(taskIndex1 < (int32)N1);
|
||||
assert(taskIndex2 < (int32)N2);
|
||||
|
||||
const uniform int i0 = taskIndex0;
|
||||
const uniform int i1 = taskIndex1;
|
||||
const uniform int i2 = taskIndex2;
|
||||
const uniform int i = taskIndex;
|
||||
array[i2][i1][i0] = i / 10000.;
|
||||
cfor (j = 0; j < 10000; ++j)
|
||||
array[i2][i1][i0] = sin(array[i2][i1][i0]);
|
||||
if (array[i2][i1][i0] < .02)
|
||||
array[i2][i1][i0] = i;
|
||||
}
|
||||
export void f_f(uniform float RET[], uniform float fFOO[]) {
|
||||
float f = fFOO[programIndex];
|
||||
launch[N2][N1][N0] x(f);
|
||||
sync;
|
||||
RET[programIndex] = array[N2-1][N1-1][N0-1];
|
||||
}
|
||||
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 9999.000000;
|
||||
}
|
||||
42
tests/launch-9.ispc
Normal file
42
tests/launch-9.ispc
Normal file
@@ -0,0 +1,42 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
#define N0 10
|
||||
#define N1 20
|
||||
#define N2 50
|
||||
static uniform float array[N2][N1][N0];
|
||||
|
||||
task void x(const float f) {
|
||||
uniform int j;
|
||||
|
||||
assert(taskCount == (int32)N0*N1*N2);
|
||||
assert(taskCount0 == (int32)N0);
|
||||
assert(taskCount1 == (int32)N1);
|
||||
assert(taskCount2 == (int32)N2);
|
||||
assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2));
|
||||
assert(taskIndex0 < (int32)N0);
|
||||
assert(taskIndex1 < (int32)N1);
|
||||
assert(taskIndex2 < (int32)N2);
|
||||
|
||||
const uniform int i0 = taskIndex0;
|
||||
const uniform int i1 = taskIndex1;
|
||||
const uniform int i2 = taskIndex2;
|
||||
const uniform int i = taskIndex;
|
||||
array[i2][i1][i0] = i / 10000.;
|
||||
cfor (j = 0; j < 10000; ++j)
|
||||
array[i2][i1][i0] = sin(array[i2][i1][i0]);
|
||||
if (array[i2][i1][i0] < .02)
|
||||
array[i2][i1][i0] = i;
|
||||
}
|
||||
export void f_f(uniform float RET[], uniform float fFOO[]) {
|
||||
float f = fFOO[programIndex];
|
||||
launch[N0,N1,N2] x(f);
|
||||
sync;
|
||||
RET[programIndex] = array[N2-1][N1-1][N0-1];
|
||||
}
|
||||
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 9999.000000;
|
||||
}
|
||||
6
type.cpp
6
type.cpp
@@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
||||
}
|
||||
else
|
||||
// Otherwise we already have the types of the arguments
|
||||
|
||||
Reference in New Issue
Block a user