diff --git a/examples/common.mk b/examples/common.mk index 252c1196..db7b8eee 100644 --- a/examples/common.mk +++ b/examples/common.mk @@ -1,11 +1,11 @@ -TASK_CXX=../tasksys3d.cpp +TASK_CXX=../tasksys.cpp TASK_LIB=-lpthread -TASK_OBJ=objs/tasksys3d.o +TASK_OBJ=objs/tasksys.o -CXX=g++ -fopenmp +CXX=clang++ CXXFLAGS+=-Iobjs/ -O2 -CC=gcc -fopenmp +CC=clang CCFLAGS+=-Iobjs/ -O2 LIBS=-lm $(TASK_LIB) -lstdc++ diff --git a/examples/mandelbrot_tasks3d/.gitignore b/examples/mandelbrot_tasks3d/.gitignore new file mode 100644 index 00000000..c2471c27 --- /dev/null +++ b/examples/mandelbrot_tasks3d/.gitignore @@ -0,0 +1,2 @@ +mandelbrot +*.ppm diff --git a/examples/mandelbrot_tasks3d/Makefile b/examples/mandelbrot_tasks3d/Makefile new file mode 100644 index 00000000..3dd44d65 --- /dev/null +++ b/examples/mandelbrot_tasks3d/Makefile @@ -0,0 +1,8 @@ + +EXAMPLE=mandelbrot_tasks3d +CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp +ISPC_SRC=mandelbrot_tasks3d.ispc +ISPC_IA_TARGETS=avx,sse2,sse4 +ISPC_ARM_TARGETS=neon + +include ../common.mk diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj new file mode 100644 index 00000000..3a8fca79 --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj @@ -0,0 +1,180 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {E80DA7D4-AB22-4648-A068-327307156BE6} + Win32Proj + mandelbrot_tasks + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + true + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + false + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + false + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + + + + + + Document + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp new file mode 100644 index 00000000..9cbb966a --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp @@ -0,0 +1,146 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#define NOMINMAX +#pragma warning (disable: 4244) +#pragma warning (disable: 4305) +#endif + +#include +#include +#include +#include "../timing.h" +#include "mandelbrot_tasks3d_ispc.h" +using namespace ispc; + +extern void mandelbrot_serial(float x0, float y0, float x1, float y1, + int width, int height, int maxIterations, + int output[]); + +/* Write a PPM image file with the image of the Mandelbrot set */ +static void +writePPM(int *buf, int width, int height, const char *fn) { + FILE *fp = fopen(fn, "wb"); + fprintf(fp, "P6\n"); + fprintf(fp, "%d %d\n", width, height); + fprintf(fp, "255\n"); + for (int i = 0; i < width*height; ++i) { + // Map the iteration count to colors by just alternating between + // two greys. + char c = (buf[i] & 0x1) ? 240 : 20; + for (int j = 0; j < 3; ++j) + fputc(c, fp); + } + fclose(fp); + printf("Wrote image file %s\n", fn); +} + + +static void usage() { + fprintf(stderr, "usage: mandelbrot [--scale=]\n"); + exit(1); +} + +int main(int argc, char *argv[]) { + unsigned int width = 1536; + unsigned int height = 1024; + float x0 = -2; + float x1 = 1; + float y0 = -1; + float y1 = 1; + + if (argc == 1) + ; + else if (argc == 2) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + if (scale == 0.f) + usage(); + width *= scale; + height *= scale; + // round up to multiples of 16 + width = (width + 0xf) & ~0xf; + height = (height + 0xf) & ~0xf; + } + else + usage(); + } + else + usage(); + + int maxIterations = 512; + int *buf = new int[width*height]; + + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } + + printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); + writePPM(buf, width, height, "mandelbrot-ispc.ppm"); + + + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } + + printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); + writePPM(buf, width, height, "mandelbrot-serial.ppm"); + + printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); + + return 0; +} diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc new file mode 100644 index 00000000..60473a7f --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -0,0 +1,100 @@ +/* + Copyright (c) 2010-2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +static inline int +mandel(float c_re, float c_im, int count) { + float z_re = c_re, z_im = c_im; + int i; + for (i = 0; i < count; ++i) { + if (z_re * z_re + z_im * z_im > 4.) + break; + + float new_re = z_re*z_re - z_im*z_im; + float new_im = 2.f * z_re * z_im; + unmasked { + z_re = c_re + new_re; + z_im = c_im + new_im; + } + } + + return i; +} + + +/* Task to compute the Mandelbrot iterations for a single scanline. + */ +task void +mandelbrot_scanline(uniform float x0, uniform float dx, + uniform float y0, uniform float dy, + uniform int width, uniform int height, + uniform int xspan, uniform int yspan, + uniform int maxIterations, uniform int output[]) { +#if 0 + print("taskIndex = % : % \n", taskIndex); + print("taskIndex_x= % : % \n", taskIndex_x); + print("taskIndex_y= % : % \n", taskIndex_y); + print(" --- \n"); +#endif + const uniform int xstart = taskIndex_x * xspan; + const uniform int xend = min(xstart + xspan, width); + + const uniform int ystart = taskIndex_y * yspan; + const uniform int yend = min(ystart + yspan, height); + + + foreach (yi = ystart ... yend, xi = xstart ... xend) { + float x = x0 + xi * dx; + float y = y0 + yi * dy; + + int index = yi * width + xi; + output[index] = mandel(x, y, maxIterations); + } + +} + +#if 1 +export void +mandelbrot_ispc(uniform float x0, uniform float y0, + uniform float x1, uniform float y1, + uniform int width, uniform int height, + uniform int maxIterations, uniform int output[]) { + uniform float dx = (x1 - x0) / width; + uniform float dy = (y1 - y0) / height; + const uniform int xspan = 16; + const uniform int yspan = 16; + + launch <<>> + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + maxIterations, output); +} +#endif diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp new file mode 100644 index 00000000..a76fb5ca --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp @@ -0,0 +1,68 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +static int mandel(float c_re, float c_im, int count) { + float z_re = c_re, z_im = c_im; + int i; + for (i = 0; i < count; ++i) { + if (z_re * z_re + z_im * z_im > 4.f) + break; + + float new_re = z_re*z_re - z_im*z_im; + float new_im = 2.f * z_re * z_im; + z_re = c_re + new_re; + z_im = c_im + new_im; + } + + return i; +} + +void mandelbrot_serial(float x0, float y0, float x1, float y1, + int width, int height, int maxIterations, + int output[]) +{ + float dx = (x1 - x0) / width; + float dy = (y1 - y0) / height; + + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; ++i) { + float x = x0 + i * dx; + float y = y0 + j * dy; + + int index = (j * width + i); + output[index] = mandel(x, y, maxIterations); + } + } +} + diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index d7b524a8..fed368dc 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -59,9 +59,7 @@ #define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_CILK -*/ #define ISPC_USE_OMP -/* #define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_PARALLEL_FOR @@ -172,21 +170,39 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, - int taskIndex, int taskCount); + int taskIndex, int taskCount, + int taskIndex_x, int taskIndex_y, int taskIndex_z, + int taskCount_x, int taskCount_y, int taskCount_z); // Small structure used to hold the data for each task struct TaskInfo { TaskFuncType func; void *data; int taskIndex, taskCount; + int taskCount3d[3]; #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif + int taskIndex_x() const + { + return taskIndex % taskCount3d[0]; + } + int taskIndex_y() const + { + return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; + } + int taskIndex_z() const + { + return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); + } + int taskCount_x() const { return taskCount3d[0]; } + int taskCount_y() const { return taskCount3d[1]; } + int taskCount_z() const { return taskCount3d[2]; } }; // ispc expects these functions to have C linkage / not be mangled extern "C" { - void ISPCLaunch(void **handlePtr, void *f, void *data, int count); + void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void ISPCSync(void *handle); } @@ -520,7 +536,9 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, - taskInfo->taskIndex, taskInfo->taskCount); + taskInfo->taskIndex, taskInfo->taskCount, + taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(), + taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z()); } @@ -561,7 +579,9 @@ lRunTask(LPVOID param) { // will cause bugs in code that uses those. int threadIndex = 0; int threadCount = 1; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); // Signal the event that this task is done ti->taskEvent.set(); @@ -662,7 +682,9 @@ lTaskEntry(void *arg) { DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, - myTask->taskCount); + myTask->taskCount, + myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), + myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); // // Decrement the "number of unfinished tasks" counter in the task @@ -863,7 +885,9 @@ TaskGroup::Sync() { // Do work for _myTask_ // // FIXME: bogus values for thread index/thread count here as well.. - myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount); + myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, + myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), + myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); // // Decrement the number of unfinished tasks counter @@ -893,7 +917,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 - ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); } } @@ -922,7 +948,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); } } @@ -953,7 +981,9 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); }); } @@ -980,7 +1010,9 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); }); } } @@ -1033,7 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) { + const int count = countx*county*countz; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1050,6 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { ti->data = data; ti->taskIndex = i; ti->taskCount = count; + ti->taskCount3d[0] = countx; + ti->taskCount3d[1] = county; + ti->taskCount3d[2] = countz; } taskGroup->Launch(baseIndex, count); }