diff --git a/examples/common.mk b/examples/common.mk
index 252c1196..db7b8eee 100644
--- a/examples/common.mk
+++ b/examples/common.mk
@@ -1,11 +1,11 @@
-TASK_CXX=../tasksys3d.cpp
+TASK_CXX=../tasksys.cpp
TASK_LIB=-lpthread
-TASK_OBJ=objs/tasksys3d.o
+TASK_OBJ=objs/tasksys.o
-CXX=g++ -fopenmp
+CXX=clang++
CXXFLAGS+=-Iobjs/ -O2
-CC=gcc -fopenmp
+CC=clang
CCFLAGS+=-Iobjs/ -O2
LIBS=-lm $(TASK_LIB) -lstdc++
diff --git a/examples/mandelbrot_tasks3d/.gitignore b/examples/mandelbrot_tasks3d/.gitignore
new file mode 100644
index 00000000..c2471c27
--- /dev/null
+++ b/examples/mandelbrot_tasks3d/.gitignore
@@ -0,0 +1,2 @@
+mandelbrot
+*.ppm
diff --git a/examples/mandelbrot_tasks3d/Makefile b/examples/mandelbrot_tasks3d/Makefile
new file mode 100644
index 00000000..3dd44d65
--- /dev/null
+++ b/examples/mandelbrot_tasks3d/Makefile
@@ -0,0 +1,8 @@
+
+EXAMPLE=mandelbrot_tasks3d
+CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp
+ISPC_SRC=mandelbrot_tasks3d.ispc
+ISPC_IA_TARGETS=avx,sse2,sse4
+ISPC_ARM_TARGETS=neon
+
+include ../common.mk
diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj
new file mode 100644
index 00000000..3a8fca79
--- /dev/null
+++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj
@@ -0,0 +1,180 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release
+ Win32
+
+
+ Release
+ x64
+
+
+
+ {E80DA7D4-AB22-4648-A068-327307156BE6}
+ Win32Proj
+ mandelbrot_tasks
+
+
+
+ Application
+ true
+ Unicode
+
+
+ Application
+ true
+ Unicode
+
+
+ Application
+ false
+ true
+ Unicode
+
+
+ Application
+ false
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ $(ProjectDir)..\..;$(ExecutablePath)
+ mandelbrot_tasks
+
+
+ true
+ $(ProjectDir)..\..;$(ExecutablePath)
+ mandelbrot_tasks
+
+
+ false
+ $(ProjectDir)..\..;$(ExecutablePath)
+ mandelbrot_tasks
+
+
+ false
+ $(ProjectDir)..\..;$(ExecutablePath)
+ mandelbrot_tasks
+
+
+
+
+
+ Level3
+ Disabled
+ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
+ true
+ Fast
+
+
+ Console
+ true
+
+
+
+
+
+
+ Level3
+ Disabled
+ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
+ true
+ Fast
+
+
+ Console
+ true
+
+
+
+
+ Level3
+
+
+ MaxSpeed
+ true
+ true
+ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
+ Fast
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+ Level3
+
+
+ MaxSpeed
+ true
+ true
+ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
+ Fast
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
+
+
+ Document
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
+
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
+
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
+
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
+
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+
+
+
+
+
+
diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp
new file mode 100644
index 00000000..9cbb966a
--- /dev/null
+++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2010-2011, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS
+#define NOMINMAX
+#pragma warning (disable: 4244)
+#pragma warning (disable: 4305)
+#endif
+
+#include
+#include
+#include
+#include "../timing.h"
+#include "mandelbrot_tasks3d_ispc.h"
+using namespace ispc;
+
+extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
+ int width, int height, int maxIterations,
+ int output[]);
+
+/* Write a PPM image file with the image of the Mandelbrot set */
+static void
+writePPM(int *buf, int width, int height, const char *fn) {
+ FILE *fp = fopen(fn, "wb");
+ fprintf(fp, "P6\n");
+ fprintf(fp, "%d %d\n", width, height);
+ fprintf(fp, "255\n");
+ for (int i = 0; i < width*height; ++i) {
+ // Map the iteration count to colors by just alternating between
+ // two greys.
+ char c = (buf[i] & 0x1) ? 240 : 20;
+ for (int j = 0; j < 3; ++j)
+ fputc(c, fp);
+ }
+ fclose(fp);
+ printf("Wrote image file %s\n", fn);
+}
+
+
+static void usage() {
+ fprintf(stderr, "usage: mandelbrot [--scale=]\n");
+ exit(1);
+}
+
+int main(int argc, char *argv[]) {
+ unsigned int width = 1536;
+ unsigned int height = 1024;
+ float x0 = -2;
+ float x1 = 1;
+ float y0 = -1;
+ float y1 = 1;
+
+ if (argc == 1)
+ ;
+ else if (argc == 2) {
+ if (strncmp(argv[1], "--scale=", 8) == 0) {
+ float scale = atof(argv[1] + 8);
+ if (scale == 0.f)
+ usage();
+ width *= scale;
+ height *= scale;
+ // round up to multiples of 16
+ width = (width + 0xf) & ~0xf;
+ height = (height + 0xf) & ~0xf;
+ }
+ else
+ usage();
+ }
+ else
+ usage();
+
+ int maxIterations = 512;
+ int *buf = new int[width*height];
+
+ //
+ // Compute the image using the ispc implementation; report the minimum
+ // time of three runs.
+ //
+ double minISPC = 1e30;
+ for (int i = 0; i < 3; ++i) {
+ // Clear out the buffer
+ for (unsigned int i = 0; i < width * height; ++i)
+ buf[i] = 0;
+ reset_and_start_timer();
+ mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
+ double dt = get_elapsed_mcycles();
+ minISPC = std::min(minISPC, dt);
+ }
+
+ printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
+ writePPM(buf, width, height, "mandelbrot-ispc.ppm");
+
+
+ //
+ // And run the serial implementation 3 times, again reporting the
+ // minimum time.
+ //
+ double minSerial = 1e30;
+ for (int i = 0; i < 3; ++i) {
+ // Clear out the buffer
+ for (unsigned int i = 0; i < width * height; ++i)
+ buf[i] = 0;
+ reset_and_start_timer();
+ mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
+ double dt = get_elapsed_mcycles();
+ minSerial = std::min(minSerial, dt);
+ }
+
+ printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
+ writePPM(buf, width, height, "mandelbrot-serial.ppm");
+
+ printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
+
+ return 0;
+}
diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc
new file mode 100644
index 00000000..60473a7f
--- /dev/null
+++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc
@@ -0,0 +1,100 @@
+/*
+ Copyright (c) 2010-2012, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+static inline int
+mandel(float c_re, float c_im, int count) {
+ float z_re = c_re, z_im = c_im;
+ int i;
+ for (i = 0; i < count; ++i) {
+ if (z_re * z_re + z_im * z_im > 4.)
+ break;
+
+ float new_re = z_re*z_re - z_im*z_im;
+ float new_im = 2.f * z_re * z_im;
+ unmasked {
+ z_re = c_re + new_re;
+ z_im = c_im + new_im;
+ }
+ }
+
+ return i;
+}
+
+
+/* Task to compute the Mandelbrot iterations for a single scanline.
+ */
+task void
+mandelbrot_scanline(uniform float x0, uniform float dx,
+ uniform float y0, uniform float dy,
+ uniform int width, uniform int height,
+ uniform int xspan, uniform int yspan,
+ uniform int maxIterations, uniform int output[]) {
+#if 0
+ print("taskIndex = % : % \n", taskIndex);
+ print("taskIndex_x= % : % \n", taskIndex_x);
+ print("taskIndex_y= % : % \n", taskIndex_y);
+ print(" --- \n");
+#endif
+ const uniform int xstart = taskIndex_x * xspan;
+ const uniform int xend = min(xstart + xspan, width);
+
+ const uniform int ystart = taskIndex_y * yspan;
+ const uniform int yend = min(ystart + yspan, height);
+
+
+ foreach (yi = ystart ... yend, xi = xstart ... xend) {
+ float x = x0 + xi * dx;
+ float y = y0 + yi * dy;
+
+ int index = yi * width + xi;
+ output[index] = mandel(x, y, maxIterations);
+ }
+
+}
+
+#if 1
+export void
+mandelbrot_ispc(uniform float x0, uniform float y0,
+ uniform float x1, uniform float y1,
+ uniform int width, uniform int height,
+ uniform int maxIterations, uniform int output[]) {
+ uniform float dx = (x1 - x0) / width;
+ uniform float dy = (y1 - y0) / height;
+ const uniform int xspan = 16;
+ const uniform int yspan = 16;
+
+ launch <<>>
+ mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
+ maxIterations, output);
+}
+#endif
diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp
new file mode 100644
index 00000000..a76fb5ca
--- /dev/null
+++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp
@@ -0,0 +1,68 @@
+/*
+ Copyright (c) 2010-2011, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+static int mandel(float c_re, float c_im, int count) {
+ float z_re = c_re, z_im = c_im;
+ int i;
+ for (i = 0; i < count; ++i) {
+ if (z_re * z_re + z_im * z_im > 4.f)
+ break;
+
+ float new_re = z_re*z_re - z_im*z_im;
+ float new_im = 2.f * z_re * z_im;
+ z_re = c_re + new_re;
+ z_im = c_im + new_im;
+ }
+
+ return i;
+}
+
+void mandelbrot_serial(float x0, float y0, float x1, float y1,
+ int width, int height, int maxIterations,
+ int output[])
+{
+ float dx = (x1 - x0) / width;
+ float dy = (y1 - y0) / height;
+
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; ++i) {
+ float x = x0 + i * dx;
+ float y = y0 + j * dy;
+
+ int index = (j * width + i);
+ output[index] = mandel(x, y, maxIterations);
+ }
+ }
+}
+
diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp
index d7b524a8..fed368dc 100644
--- a/examples/tasksys.cpp
+++ b/examples/tasksys.cpp
@@ -59,9 +59,7 @@
#define ISPC_USE_PTHREADS
#define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED
#define ISPC_USE_CILK
-*/
#define ISPC_USE_OMP
-/*
#define ISPC_USE_TBB_TASK_GROUP
#define ISPC_USE_TBB_PARALLEL_FOR
@@ -172,21 +170,39 @@
// Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
- int taskIndex, int taskCount);
+ int taskIndex, int taskCount,
+ int taskIndex_x, int taskIndex_y, int taskIndex_z,
+ int taskCount_x, int taskCount_y, int taskCount_z);
// Small structure used to hold the data for each task
struct TaskInfo {
TaskFuncType func;
void *data;
int taskIndex, taskCount;
+ int taskCount3d[3];
#if defined(ISPC_IS_WINDOWS)
event taskEvent;
#endif
+ int taskIndex_x() const
+ {
+ return taskIndex % taskCount3d[0];
+ }
+ int taskIndex_y() const
+ {
+ return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
+ }
+ int taskIndex_z() const
+ {
+ return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
+ }
+ int taskCount_x() const { return taskCount3d[0]; }
+ int taskCount_y() const { return taskCount3d[1]; }
+ int taskCount_z() const { return taskCount3d[2]; }
};
// ispc expects these functions to have C linkage / not be mangled
extern "C" {
- void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
+ void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz);
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
void ISPCSync(void *handle);
}
@@ -520,7 +536,9 @@ lRunTask(void *ti) {
// Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount,
- taskInfo->taskIndex, taskInfo->taskCount);
+ taskInfo->taskIndex, taskInfo->taskCount,
+ taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(),
+ taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z());
}
@@ -561,7 +579,9 @@ lRunTask(LPVOID param) {
// will cause bugs in code that uses those.
int threadIndex = 0;
int threadCount = 1;
- ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
+ ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
+ ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
+ ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
// Signal the event that this task is done
ti->taskEvent.set();
@@ -662,7 +682,9 @@ lTaskEntry(void *arg) {
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
- myTask->taskCount);
+ myTask->taskCount,
+ myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(),
+ myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z());
//
// Decrement the "number of unfinished tasks" counter in the task
@@ -863,7 +885,9 @@ TaskGroup::Sync() {
// Do work for _myTask_
//
// FIXME: bogus values for thread index/thread count here as well..
- myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
+ myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount,
+ myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(),
+ myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z());
//
// Decrement the number of unfinished tasks counter
@@ -893,7 +917,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
- ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
+ ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount,
+ ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
+ ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
}
}
@@ -922,7 +948,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads();
- ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
+ ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
+ ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
+ ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
}
}
@@ -953,7 +981,9 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
- ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
+ ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
+ ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
+ ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
});
}
@@ -980,7 +1010,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
- ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
+ ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount,
+ ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(),
+ ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z());
});
}
}
@@ -1033,7 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) {
///////////////////////////////////////////////////////////////////////////
void
-ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
+ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) {
+ const int count = countx*county*countz;
TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) {
InitTaskSystem();
@@ -1050,6 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ti->data = data;
ti->taskIndex = i;
ti->taskCount = count;
+ ti->taskCount3d[0] = countx;
+ ti->taskCount3d[1] = county;
+ ti->taskCount3d[2] = countz;
}
taskGroup->Launch(baseIndex, count);
}