3
ast.cpp
3
ast.cpp
@@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
|||||||
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
||||||
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
|
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
|
||||||
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
|
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
|
||||||
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc,
|
for (int k = 0; k < 3; k++)
|
||||||
|
fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc,
|
||||||
postFunc, data);
|
postFunc, data);
|
||||||
}
|
}
|
||||||
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {
|
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {
|
||||||
|
|||||||
@@ -1831,7 +1831,7 @@ define(`stdlib_core', `
|
|||||||
declare i32 @__fast_masked_vload()
|
declare i32 @__fast_masked_vload()
|
||||||
|
|
||||||
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
|
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
|
||||||
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind
|
declare void @ISPCLaunch(i8**, i8*, i8*, i32, i32, i32) nounwind
|
||||||
declare void @ISPCSync(i8*) nounwind
|
declare void @ISPCSync(i8*) nounwind
|
||||||
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
||||||
|
|
||||||
|
|||||||
6
ctx.cpp
6
ctx.cpp
@@ -3522,7 +3522,7 @@ FunctionEmitContext::ReturnInst() {
|
|||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||||
std::vector<llvm::Value *> &argVals,
|
std::vector<llvm::Value *> &argVals,
|
||||||
llvm::Value *launchCount) {
|
llvm::Value *launchCount[3]){
|
||||||
if (callee == NULL) {
|
if (callee == NULL) {
|
||||||
AssertPos(currentPos, m->errorCount > 0);
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -3583,7 +3583,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|||||||
args.push_back(launchGroupHandlePtr);
|
args.push_back(launchGroupHandlePtr);
|
||||||
args.push_back(fptr);
|
args.push_back(fptr);
|
||||||
args.push_back(voidmem);
|
args.push_back(voidmem);
|
||||||
args.push_back(launchCount);
|
args.push_back(launchCount[0]);
|
||||||
|
args.push_back(launchCount[1]);
|
||||||
|
args.push_back(launchCount[2]);
|
||||||
return CallInst(flaunch, NULL, args, "");
|
return CallInst(flaunch, NULL, args, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
2
ctx.h
2
ctx.h
@@ -542,7 +542,7 @@ public:
|
|||||||
he given argument values. */
|
he given argument values. */
|
||||||
llvm::Value *LaunchInst(llvm::Value *callee,
|
llvm::Value *LaunchInst(llvm::Value *callee,
|
||||||
std::vector<llvm::Value *> &argVals,
|
std::vector<llvm::Value *> &argVals,
|
||||||
llvm::Value *launchCount);
|
llvm::Value *launchCount[3]);
|
||||||
|
|
||||||
void SyncInst();
|
void SyncInst();
|
||||||
|
|
||||||
|
|||||||
@@ -3015,8 +3015,7 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and
|
|||||||
for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as
|
for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as
|
||||||
appropriate. Alternatively, ``ispc`` also has support for launching tasks
|
appropriate. Alternatively, ``ispc`` also has support for launching tasks
|
||||||
from ``ispc`` code. The approach is similar to Intel® Cilk's task launch
|
from ``ispc`` code. The approach is similar to Intel® Cilk's task launch
|
||||||
feature. (See the ``examples/mandelbrot_tasks`` example to see it used in
|
feature. (Check the ``examples/mandelbrot_tasks`` example to see how it is used.)
|
||||||
a small example.)
|
|
||||||
|
|
||||||
Any function that is launched as a task must be declared with the
|
Any function that is launched as a task must be declared with the
|
||||||
``task`` qualifier:
|
``task`` qualifier:
|
||||||
@@ -3111,6 +3110,38 @@ executing the current task. The ``threadIndex`` can be used for accessing
|
|||||||
data that is private to the current thread and thus doesn't require
|
data that is private to the current thread and thus doesn't require
|
||||||
synchronization to access under parallel execution.
|
synchronization to access under parallel execution.
|
||||||
|
|
||||||
|
The tasking system also supports multi-dimensional partitioning (currently up
|
||||||
|
to three dimensions). To launch a 3D grid of tasks, for example with ``N0``,
|
||||||
|
``N1`` and ``N2`` tasks in x-, y- and z-dimension respectively
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
float data[N2][N1][N0]
|
||||||
|
task void foo_task()
|
||||||
|
{
|
||||||
|
data[taskIndex2][taskIndex1][threadIndex0] = taskIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
we use the following ``launch`` expressions:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
launch [N2][N1][N0] foo_task()
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
launch [N0,N1,N2] foo_task()
|
||||||
|
|
||||||
|
Value of ``taskIndex`` is equal to ``taskIndex0 + taskCount0*(taskIndex1 +
|
||||||
|
taskCount1*taskIndex2)`` and it ranges from ``0`` to ``taskCount-1``, where
|
||||||
|
``taskCount = taskCount0*taskCount1*taskCount2``. If ``N1`` or/and ``N2`` are
|
||||||
|
not specified in the ``launch`` expression, a value of ``1`` is assumed.
|
||||||
|
Finally, for an one-dimensional grid of tasks, ``taskIndex`` is equivalent to
|
||||||
|
``taskIndex0`` and ``taskCount`` is equivalent to ``taskCount0``.
|
||||||
|
|
||||||
|
|
||||||
Task Parallelism: Runtime Requirements
|
Task Parallelism: Runtime Requirements
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
@@ -3141,7 +3172,7 @@ manage tasks in ``ispc``:
|
|||||||
::
|
::
|
||||||
|
|
||||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
|
void ISPCLaunch(void **handlePtr, void *f, void *data, int count0, int count1, int count2);
|
||||||
void ISPCSync(void *handle);
|
void ISPCSync(void *handle);
|
||||||
|
|
||||||
All three of these functions take an opaque handle (or a pointer to an
|
All three of these functions take an opaque handle (or a pointer to an
|
||||||
@@ -3178,16 +3209,20 @@ tasks. Each ``launch`` statement in ``ispc`` code causes a call to
|
|||||||
after the handle pointer to the function are relatively straightforward;
|
after the handle pointer to the function are relatively straightforward;
|
||||||
the ``void *f`` parameter holds a pointer to a function to call to run the
|
the ``void *f`` parameter holds a pointer to a function to call to run the
|
||||||
work for this task, ``data`` holds a pointer to data to pass to this
|
work for this task, ``data`` holds a pointer to data to pass to this
|
||||||
function, and ``count`` is the number of instances of this function to
|
function, and ``count0``, ``count1`` and ``count2`` are the number of instances
|
||||||
enqueue for asynchronous execution. (In other words, ``count`` corresponds
|
of this function to enqueue for asynchronous execution. (In other words,
|
||||||
to the value ``n`` in a multiple-task launch statement like ``launch[n]``.)
|
``count0``, ``count1`` and ``count2`` correspond to the value ``n0``, ``n1``
|
||||||
|
and ``n2`` in a multiple-task launch statement like ``launch[n2][n1][n0]`` or
|
||||||
|
``launch [n0,n1,n2]`` respectively.)
|
||||||
|
|
||||||
The signature of the provided function pointer ``f`` is
|
The signature of the provided function pointer ``f`` is
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount,
|
void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount,
|
||||||
int taskIndex, int taskCount)
|
int taskIndex, int taskCount,
|
||||||
|
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||||
|
int taskCount0, int taskCount1, int taskCount2);
|
||||||
|
|
||||||
When this function pointer is called by one of the hardware threads managed
|
When this function pointer is called by one of the hardware threads managed
|
||||||
by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
||||||
@@ -3197,11 +3232,14 @@ number of hardware threads that have been spawned to run tasks and
|
|||||||
uniquely identifying the hardware thread that is running the task. (These
|
uniquely identifying the hardware thread that is running the task. (These
|
||||||
values can be used to index into thread-local storage.)
|
values can be used to index into thread-local storage.)
|
||||||
|
|
||||||
The value of ``taskCount`` should be the number of tasks launched in the
|
The value of ``taskCount`` should be the total number of tasks launched in the
|
||||||
``launch`` statement that caused the call to ``ISPCLaunch()`` and each of
|
``launch`` statement (it must be equal to ``taskCount0*taskCount1*taskCount2``)
|
||||||
the calls to this function should be given a unique value of ``taskIndex``
|
that caused the call to ``ISPCLaunch()`` and each of the calls to this function
|
||||||
between zero and ``taskCount``, to distinguish which of the instances
|
should be given a unique value of ``taskIndex``, ``taskIndex0``, ``taskIndex1``
|
||||||
of the set of launched tasks is running.
|
and ``taskIndex2`` between zero and ``taskCount``, ``taskCount0``,
|
||||||
|
``taskCount1`` and ``taskCount2`` respectively, with ``taskIndex = taskIndex0
|
||||||
|
+ taskCount0*(taskIndex1 + taskCount1*taskIndex2)``, to distinguish which of
|
||||||
|
the instances of the set of launched tasks is running.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,8 @@
|
|||||||
#pragma warning (disable: 4305)
|
#pragma warning (disable: 4305)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
|
|||||||
@@ -57,21 +57,26 @@ task void
|
|||||||
mandelbrot_scanline(uniform float x0, uniform float dx,
|
mandelbrot_scanline(uniform float x0, uniform float dx,
|
||||||
uniform float y0, uniform float dy,
|
uniform float y0, uniform float dy,
|
||||||
uniform int width, uniform int height,
|
uniform int width, uniform int height,
|
||||||
uniform int span,
|
uniform int xspan, uniform int yspan,
|
||||||
uniform int maxIterations, uniform int output[]) {
|
uniform int maxIterations, uniform int output[]) {
|
||||||
uniform int ystart = taskIndex * span;
|
const uniform int xstart = taskIndex0 * xspan;
|
||||||
uniform int yend = min((taskIndex+1) * span, (unsigned int)height);
|
const uniform int xend = min(xstart + xspan, width);
|
||||||
|
|
||||||
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
const uniform int ystart = taskIndex1 * yspan;
|
||||||
|
const uniform int yend = min(ystart + yspan, height);
|
||||||
|
|
||||||
|
|
||||||
|
foreach (yi = ystart ... yend, xi = xstart ... xend) {
|
||||||
float x = x0 + xi * dx;
|
float x = x0 + xi * dx;
|
||||||
float y = y0 + yi * dy;
|
float y = y0 + yi * dy;
|
||||||
|
|
||||||
int index = yi * width + xi;
|
int index = yi * width + xi;
|
||||||
output[index] = mandel(x, y, maxIterations);
|
output[index] = mandel(x, y, maxIterations);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
export void
|
export void
|
||||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||||
uniform float x1, uniform float y1,
|
uniform float x1, uniform float y1,
|
||||||
@@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
|||||||
uniform int maxIterations, uniform int output[]) {
|
uniform int maxIterations, uniform int output[]) {
|
||||||
uniform float dx = (x1 - x0) / width;
|
uniform float dx = (x1 - x0) / width;
|
||||||
uniform float dy = (y1 - y0) / height;
|
uniform float dy = (y1 - y0) / height;
|
||||||
uniform int span = 4;
|
const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */
|
||||||
|
const uniform int yspan = 16;
|
||||||
|
|
||||||
launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
|
|
||||||
maxIterations, output);
|
#if 1
|
||||||
|
launch [width/xspan, height/yspan]
|
||||||
|
#else
|
||||||
|
launch [height/yspan][width/xspan]
|
||||||
|
#endif
|
||||||
|
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
|
||||||
|
maxIterations, output);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -170,21 +170,41 @@
|
|||||||
|
|
||||||
// Signature of ispc-generated 'task' functions
|
// Signature of ispc-generated 'task' functions
|
||||||
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
||||||
int taskIndex, int taskCount);
|
int taskIndex, int taskCount,
|
||||||
|
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||||
|
int taskCount0, int taskCount1, int taskCount2);
|
||||||
|
|
||||||
// Small structure used to hold the data for each task
|
// Small structure used to hold the data for each task
|
||||||
struct TaskInfo {
|
struct TaskInfo {
|
||||||
TaskFuncType func;
|
TaskFuncType func;
|
||||||
void *data;
|
void *data;
|
||||||
int taskIndex, taskCount;
|
int taskIndex;
|
||||||
|
int taskCount3d[3];
|
||||||
#if defined(ISPC_IS_WINDOWS)
|
#if defined(ISPC_IS_WINDOWS)
|
||||||
event taskEvent;
|
event taskEvent;
|
||||||
#endif
|
#endif
|
||||||
};
|
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||||
|
int taskIndex0() const
|
||||||
|
{
|
||||||
|
return taskIndex % taskCount3d[0];
|
||||||
|
}
|
||||||
|
int taskIndex1() const
|
||||||
|
{
|
||||||
|
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
|
||||||
|
}
|
||||||
|
int taskIndex2() const
|
||||||
|
{
|
||||||
|
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
|
||||||
|
}
|
||||||
|
int taskCount0() const { return taskCount3d[0]; }
|
||||||
|
int taskCount1() const { return taskCount3d[1]; }
|
||||||
|
int taskCount2() const { return taskCount3d[2]; }
|
||||||
|
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
|
||||||
|
} __attribute__((aligned(32)));
|
||||||
|
|
||||||
// ispc expects these functions to have C linkage / not be mangled
|
// ispc expects these functions to have C linkage / not be mangled
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
|
void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz);
|
||||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||||
void ISPCSync(void *handle);
|
void ISPCSync(void *handle);
|
||||||
}
|
}
|
||||||
@@ -518,7 +538,9 @@ lRunTask(void *ti) {
|
|||||||
|
|
||||||
// Actually run the task
|
// Actually run the task
|
||||||
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
||||||
taskInfo->taskIndex, taskInfo->taskCount);
|
taskInfo->taskIndex, taskInfo->taskCount(),
|
||||||
|
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
|
||||||
|
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -559,7 +581,9 @@ lRunTask(LPVOID param) {
|
|||||||
// will cause bugs in code that uses those.
|
// will cause bugs in code that uses those.
|
||||||
int threadIndex = 0;
|
int threadIndex = 0;
|
||||||
int threadCount = 1;
|
int threadCount = 1;
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
|
|
||||||
// Signal the event that this task is done
|
// Signal the event that this task is done
|
||||||
ti->taskEvent.set();
|
ti->taskEvent.set();
|
||||||
@@ -660,7 +684,9 @@ lTaskEntry(void *arg) {
|
|||||||
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
||||||
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
||||||
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
||||||
myTask->taskCount);
|
myTask->taskCount(),
|
||||||
|
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||||
|
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||||
|
|
||||||
//
|
//
|
||||||
// Decrement the "number of unfinished tasks" counter in the task
|
// Decrement the "number of unfinished tasks" counter in the task
|
||||||
@@ -871,7 +897,9 @@ TaskGroup::Sync() {
|
|||||||
// Do work for _myTask_
|
// Do work for _myTask_
|
||||||
//
|
//
|
||||||
// FIXME: bogus values for thread index/thread count here as well..
|
// FIXME: bogus values for thread index/thread count here as well..
|
||||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
|
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
|
||||||
|
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||||
|
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||||
|
|
||||||
//
|
//
|
||||||
// Decrement the number of unfinished tasks counter
|
// Decrement the number of unfinished tasks counter
|
||||||
@@ -901,7 +929,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
|
|
||||||
// Actually run the task.
|
// Actually run the task.
|
||||||
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
||||||
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -930,7 +960,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
// Actually run the task.
|
// Actually run the task.
|
||||||
int threadIndex = omp_get_thread_num();
|
int threadIndex = omp_get_thread_num();
|
||||||
int threadCount = omp_get_num_threads();
|
int threadCount = omp_get_num_threads();
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -961,7 +993,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
int threadIndex = ti->taskIndex;
|
int threadIndex = ti->taskIndex;
|
||||||
int threadCount = ti->taskCount;
|
int threadCount = ti->taskCount;
|
||||||
|
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -988,7 +1022,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||||
int threadIndex = ti->taskIndex;
|
int threadIndex = ti->taskIndex;
|
||||||
int threadCount = ti->taskCount;
|
int threadCount = ti->taskCount;
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1041,7 +1077,8 @@ FreeTaskGroup(TaskGroup *tg) {
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void
|
void
|
||||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
|
||||||
|
const int count = count0*count1*count2;
|
||||||
TaskGroup *taskGroup;
|
TaskGroup *taskGroup;
|
||||||
if (*taskGroupPtr == NULL) {
|
if (*taskGroupPtr == NULL) {
|
||||||
InitTaskSystem();
|
InitTaskSystem();
|
||||||
@@ -1057,7 +1094,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
|||||||
ti->func = (TaskFuncType)func;
|
ti->func = (TaskFuncType)func;
|
||||||
ti->data = data;
|
ti->data = data;
|
||||||
ti->taskIndex = i;
|
ti->taskIndex = i;
|
||||||
ti->taskCount = count;
|
ti->taskCount3d[0] = count0;
|
||||||
|
ti->taskCount3d[1] = count1;
|
||||||
|
ti->taskCount3d[2] = count2;
|
||||||
}
|
}
|
||||||
taskGroup->Launch(baseIndex, count);
|
taskGroup->Launch(baseIndex, count);
|
||||||
}
|
}
|
||||||
|
|||||||
36
expr.cpp
36
expr.cpp
@@ -3551,11 +3551,18 @@ SelectExpr::Print() const {
|
|||||||
// FunctionCallExpr
|
// FunctionCallExpr
|
||||||
|
|
||||||
FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p,
|
FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p,
|
||||||
bool il, Expr *lce)
|
bool il, Expr *lce[3])
|
||||||
: Expr(p), isLaunch(il) {
|
: Expr(p), isLaunch(il) {
|
||||||
func = f;
|
func = f;
|
||||||
args = a;
|
args = a;
|
||||||
launchCountExpr = lce;
|
if (lce != NULL)
|
||||||
|
{
|
||||||
|
launchCountExpr[0] = lce[0];
|
||||||
|
launchCountExpr[1] = lce[1];
|
||||||
|
launchCountExpr[2] = lce[2];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
launchCountExpr[0] = launchCountExpr[1] = launchCountExpr[2] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -3673,9 +3680,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
llvm::Value *retVal = NULL;
|
llvm::Value *retVal = NULL;
|
||||||
ctx->SetDebugPos(pos);
|
ctx->SetDebugPos(pos);
|
||||||
if (ft->isTask) {
|
if (ft->isTask) {
|
||||||
AssertPos(pos, launchCountExpr != NULL);
|
AssertPos(pos, launchCountExpr[0] != NULL);
|
||||||
llvm::Value *launchCount = launchCountExpr->GetValue(ctx);
|
llvm::Value *launchCount[3] =
|
||||||
if (launchCount != NULL)
|
{ launchCountExpr[0]->GetValue(ctx),
|
||||||
|
launchCountExpr[1]->GetValue(ctx),
|
||||||
|
launchCountExpr[2]->GetValue(ctx) };
|
||||||
|
|
||||||
|
if (launchCount[0] != NULL)
|
||||||
ctx->LaunchInst(callee, argVals, launchCount);
|
ctx->LaunchInst(callee, argVals, launchCount);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -3798,14 +3809,17 @@ FunctionCallExpr::TypeCheck() {
|
|||||||
if (!isLaunch)
|
if (!isLaunch)
|
||||||
Error(pos, "\"launch\" expression needed to call function "
|
Error(pos, "\"launch\" expression needed to call function "
|
||||||
"with \"task\" qualifier.");
|
"with \"task\" qualifier.");
|
||||||
if (!launchCountExpr)
|
for (int k = 0; k < 3; k++)
|
||||||
|
{
|
||||||
|
if (!launchCountExpr[k])
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
launchCountExpr =
|
launchCountExpr[k] =
|
||||||
TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32,
|
TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32,
|
||||||
"task launch count");
|
"task launch count");
|
||||||
if (launchCountExpr == NULL)
|
if (launchCountExpr[k] == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (isLaunch) {
|
if (isLaunch) {
|
||||||
@@ -3813,7 +3827,7 @@ FunctionCallExpr::TypeCheck() {
|
|||||||
"qualified function.");
|
"qualified function.");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
AssertPos(pos, launchCountExpr == NULL);
|
AssertPos(pos, launchCountExpr[0] == NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|||||||
5
expr.h
5
expr.h
@@ -247,7 +247,8 @@ public:
|
|||||||
class FunctionCallExpr : public Expr {
|
class FunctionCallExpr : public Expr {
|
||||||
public:
|
public:
|
||||||
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p,
|
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p,
|
||||||
bool isLaunch = false, Expr *launchCountExpr = NULL);
|
bool isLaunch = false,
|
||||||
|
Expr *launchCountExpr[3] = NULL);
|
||||||
|
|
||||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||||
@@ -262,7 +263,7 @@ public:
|
|||||||
Expr *func;
|
Expr *func;
|
||||||
ExprList *args;
|
ExprList *args;
|
||||||
bool isLaunch;
|
bool isLaunch;
|
||||||
Expr *launchCountExpr;
|
Expr *launchCountExpr[3];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
39
func.cpp
39
func.cpp
@@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) {
|
|||||||
Assert(taskIndexSym);
|
Assert(taskIndexSym);
|
||||||
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
||||||
Assert(taskCountSym);
|
Assert(taskCountSym);
|
||||||
|
|
||||||
|
taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0");
|
||||||
|
Assert(taskIndexSym0);
|
||||||
|
taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1");
|
||||||
|
Assert(taskIndexSym1);
|
||||||
|
taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2");
|
||||||
|
Assert(taskIndexSym2);
|
||||||
|
|
||||||
|
|
||||||
|
taskCountSym0 = m->symbolTable->LookupVariable("taskCount0");
|
||||||
|
Assert(taskCountSym0);
|
||||||
|
taskCountSym1 = m->symbolTable->LookupVariable("taskCount1");
|
||||||
|
Assert(taskCountSym1);
|
||||||
|
taskCountSym2 = m->symbolTable->LookupVariable("taskCount2");
|
||||||
|
Assert(taskCountSym2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
||||||
|
taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL;
|
||||||
|
taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
llvm::Value *threadCount = argIter++;
|
llvm::Value *threadCount = argIter++;
|
||||||
llvm::Value *taskIndex = argIter++;
|
llvm::Value *taskIndex = argIter++;
|
||||||
llvm::Value *taskCount = argIter++;
|
llvm::Value *taskCount = argIter++;
|
||||||
|
llvm::Value *taskIndex0 = argIter++;
|
||||||
|
llvm::Value *taskIndex1 = argIter++;
|
||||||
|
llvm::Value *taskIndex2 = argIter++;
|
||||||
|
llvm::Value *taskCount0 = argIter++;
|
||||||
|
llvm::Value *taskCount1 = argIter++;
|
||||||
|
llvm::Value *taskCount2 = argIter++;
|
||||||
|
|
||||||
// Copy the function parameter values from the structure into local
|
// Copy the function parameter values from the structure into local
|
||||||
// storage
|
// storage
|
||||||
@@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
|
|
||||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||||
|
|
||||||
|
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||||
|
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||||
|
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||||
|
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||||
|
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||||
|
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||||
|
|
||||||
|
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||||
|
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||||
|
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||||
|
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||||
|
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||||
|
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Regular, non-task function
|
// Regular, non-task function
|
||||||
|
|||||||
5
func.h
5
func.h
@@ -60,7 +60,10 @@ private:
|
|||||||
Stmt *code;
|
Stmt *code;
|
||||||
Symbol *maskSymbol;
|
Symbol *maskSymbol;
|
||||||
Symbol *threadIndexSym, *threadCountSym;
|
Symbol *threadIndexSym, *threadCountSym;
|
||||||
Symbol *taskIndexSym, *taskCountSym;
|
Symbol *taskIndexSym, *taskCountSym;
|
||||||
|
Symbol *taskIndexSym0, *taskCountSym0;
|
||||||
|
Symbol *taskIndexSym1, *taskCountSym1;
|
||||||
|
Symbol *taskIndexSym2, *taskCountSym2;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // ISPC_FUNC_H
|
#endif // ISPC_FUNC_H
|
||||||
|
|||||||
89
parse.yy
89
parse.yy
@@ -353,17 +353,75 @@ launch_expression
|
|||||||
: TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')'
|
: TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')'
|
||||||
{
|
{
|
||||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
||||||
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr);
|
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount);
|
||||||
}
|
}
|
||||||
| TOKEN_LAUNCH postfix_expression '(' ')'
|
| TOKEN_LAUNCH postfix_expression '(' ')'
|
||||||
{
|
{
|
||||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
||||||
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr);
|
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount);
|
||||||
}
|
}
|
||||||
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')'
|
|
||||||
{ $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); }
|
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||||
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')'
|
{
|
||||||
{ $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); }
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
|
||||||
|
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')'
|
||||||
|
{
|
||||||
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
|
||||||
|
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||||
|
{
|
||||||
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
|
||||||
|
Expr *launchCount[3] = {$3, $5, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
|
||||||
|
{
|
||||||
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
|
||||||
|
Expr *launchCount[3] = {$3, $5, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||||
|
{
|
||||||
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
|
||||||
|
Expr *launchCount[3] = {$6, $3, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
|
||||||
|
{
|
||||||
|
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
|
||||||
|
Expr *launchCount[3] = {$6, $3, oneExpr};
|
||||||
|
$$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||||
|
{
|
||||||
|
Expr *launchCount[3] = {$3, $5, $7};
|
||||||
|
$$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
|
||||||
|
{
|
||||||
|
Expr *launchCount[3] = {$3, $5, $7};
|
||||||
|
$$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||||
|
{
|
||||||
|
Expr *launchCount[3] = {$9, $6, $3};
|
||||||
|
$$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount);
|
||||||
|
}
|
||||||
|
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
|
||||||
|
{
|
||||||
|
Expr *launchCount[3] = {$9, $6, $3};
|
||||||
|
$$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
| TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>'
|
| TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||||
{
|
{
|
||||||
@@ -377,13 +435,13 @@ launch_expression
|
|||||||
"around function call expression.");
|
"around function call expression.");
|
||||||
$$ = NULL;
|
$$ = NULL;
|
||||||
}
|
}
|
||||||
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
|
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||||
{
|
{
|
||||||
Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' "
|
Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' "
|
||||||
"around function call expression.");
|
"around function call expression.");
|
||||||
$$ = NULL;
|
$$ = NULL;
|
||||||
}
|
}
|
||||||
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>'
|
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>'
|
||||||
{
|
{
|
||||||
Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' "
|
Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' "
|
||||||
"around function call expression.");
|
"around function call expression.");
|
||||||
@@ -2217,6 +2275,21 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) {
|
|||||||
|
|
||||||
Symbol *taskCountSym = new Symbol("taskCount", pos, type);
|
Symbol *taskCountSym = new Symbol("taskCount", pos, type);
|
||||||
m->symbolTable->AddVariable(taskCountSym);
|
m->symbolTable->AddVariable(taskCountSym);
|
||||||
|
|
||||||
|
Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type);
|
||||||
|
m->symbolTable->AddVariable(taskIndexSym0);
|
||||||
|
Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type);
|
||||||
|
m->symbolTable->AddVariable(taskIndexSym1);
|
||||||
|
Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type);
|
||||||
|
m->symbolTable->AddVariable(taskIndexSym2);
|
||||||
|
|
||||||
|
|
||||||
|
Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type);
|
||||||
|
m->symbolTable->AddVariable(taskCountSym0);
|
||||||
|
Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type);
|
||||||
|
m->symbolTable->AddVariable(taskCountSym1);
|
||||||
|
Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type);
|
||||||
|
m->symbolTable->AddVariable(taskCountSym2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -62,17 +62,20 @@ extern "C" {
|
|||||||
extern void f_di(float *result, double *a, int *b);
|
extern void f_di(float *result, double *a, int *b);
|
||||||
extern void result(float *val);
|
extern void result(float *val);
|
||||||
|
|
||||||
void ISPCLaunch(void **handlePtr, void *f, void *d, int);
|
void ISPCLaunch(void **handlePtr, void *f, void *d, int,int,int);
|
||||||
void ISPCSync(void *handle);
|
void ISPCSync(void *handle);
|
||||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ISPCLaunch(void **handle, void *f, void *d, int count) {
|
void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) {
|
||||||
*handle = (void *)0xdeadbeef;
|
*handle = (void *)0xdeadbeef;
|
||||||
typedef void (*TaskFuncType)(void *, int, int, int, int);
|
typedef void (*TaskFuncType)(void *, int, int, int, int, int, int, int, int, int, int);
|
||||||
TaskFuncType func = (TaskFuncType)f;
|
TaskFuncType func = (TaskFuncType)f;
|
||||||
for (int i = 0; i < count; ++i)
|
int count = count0*count1*count2, idx = 0;
|
||||||
func(d, 0, 1, i, count);
|
for (int k = 0; k < count2; ++k)
|
||||||
|
for (int j = 0; j < count1; ++j)
|
||||||
|
for (int i = 0; i < count0; ++i)
|
||||||
|
func(d, 0, 1, idx++, count, i,j,k,count0,count1,count2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ISPCSync(void *) {
|
void ISPCSync(void *) {
|
||||||
|
|||||||
42
tests/launch-8.ispc
Normal file
42
tests/launch-8.ispc
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
#define N0 10
|
||||||
|
#define N1 20
|
||||||
|
#define N2 50
|
||||||
|
static uniform float array[N2][N1][N0];
|
||||||
|
|
||||||
|
task void x(const float f) {
|
||||||
|
uniform int j;
|
||||||
|
|
||||||
|
assert(taskCount == (int32)N0*N1*N2);
|
||||||
|
assert(taskCount0 == (int32)N0);
|
||||||
|
assert(taskCount1 == (int32)N1);
|
||||||
|
assert(taskCount2 == (int32)N2);
|
||||||
|
assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2));
|
||||||
|
assert(taskIndex0 < (int32)N0);
|
||||||
|
assert(taskIndex1 < (int32)N1);
|
||||||
|
assert(taskIndex2 < (int32)N2);
|
||||||
|
|
||||||
|
const uniform int i0 = taskIndex0;
|
||||||
|
const uniform int i1 = taskIndex1;
|
||||||
|
const uniform int i2 = taskIndex2;
|
||||||
|
const uniform int i = taskIndex;
|
||||||
|
array[i2][i1][i0] = i / 10000.;
|
||||||
|
cfor (j = 0; j < 10000; ++j)
|
||||||
|
array[i2][i1][i0] = sin(array[i2][i1][i0]);
|
||||||
|
if (array[i2][i1][i0] < .02)
|
||||||
|
array[i2][i1][i0] = i;
|
||||||
|
}
|
||||||
|
export void f_f(uniform float RET[], uniform float fFOO[]) {
|
||||||
|
float f = fFOO[programIndex];
|
||||||
|
launch[N2][N1][N0] x(f);
|
||||||
|
sync;
|
||||||
|
RET[programIndex] = array[N2-1][N1-1][N0-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 9999.000000;
|
||||||
|
}
|
||||||
42
tests/launch-9.ispc
Normal file
42
tests/launch-9.ispc
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
#define N0 10
|
||||||
|
#define N1 20
|
||||||
|
#define N2 50
|
||||||
|
static uniform float array[N2][N1][N0];
|
||||||
|
|
||||||
|
task void x(const float f) {
|
||||||
|
uniform int j;
|
||||||
|
|
||||||
|
assert(taskCount == (int32)N0*N1*N2);
|
||||||
|
assert(taskCount0 == (int32)N0);
|
||||||
|
assert(taskCount1 == (int32)N1);
|
||||||
|
assert(taskCount2 == (int32)N2);
|
||||||
|
assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2));
|
||||||
|
assert(taskIndex0 < (int32)N0);
|
||||||
|
assert(taskIndex1 < (int32)N1);
|
||||||
|
assert(taskIndex2 < (int32)N2);
|
||||||
|
|
||||||
|
const uniform int i0 = taskIndex0;
|
||||||
|
const uniform int i1 = taskIndex1;
|
||||||
|
const uniform int i2 = taskIndex2;
|
||||||
|
const uniform int i = taskIndex;
|
||||||
|
array[i2][i1][i0] = i / 10000.;
|
||||||
|
cfor (j = 0; j < 10000; ++j)
|
||||||
|
array[i2][i1][i0] = sin(array[i2][i1][i0]);
|
||||||
|
if (array[i2][i1][i0] < .02)
|
||||||
|
array[i2][i1][i0] = i;
|
||||||
|
}
|
||||||
|
export void f_f(uniform float RET[], uniform float fFOO[]) {
|
||||||
|
float f = fFOO[programIndex];
|
||||||
|
launch[N0,N1,N2] x(f);
|
||||||
|
sync;
|
||||||
|
RET[programIndex] = array[N2-1][N1-1][N0-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 9999.000000;
|
||||||
|
}
|
||||||
6
type.cpp
6
type.cpp
@@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
// Otherwise we already have the types of the arguments
|
// Otherwise we already have the types of the arguments
|
||||||
|
|||||||
Reference in New Issue
Block a user