diff --git a/builtins/util.m4 b/builtins/util.m4 index c90e8adc..1580dc08 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1813,7 +1813,7 @@ define(`stdlib_core', ` declare i32 @__fast_masked_vload() declare i8* @ISPCAlloc(i8**, i64, i32) nounwind -declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind +declare void @ISPCLaunch(i8**, i8*, i8*, i32, i32, i32) nounwind declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind diff --git a/docs/ispc.rst b/docs/ispc.rst index 3aab730b..04f478dc 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3012,8 +3012,7 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as appropriate. Alternatively, ``ispc`` also has support for launching tasks from ``ispc`` code. The approach is similar to Intel® Cilk's task launch -feature. (See the ``examples/mandelbrot_tasks`` and -``examples/mandelbrot_tasks3d`` examples to see it used in a small example.) +feature. (Check the ``examples/mandelbrot_tasks`` example to see how it is used.) Any function that is launched as a task must be declared with the ``task`` qualifier: diff --git a/examples/mandelbrot_tasks/Makefile b/examples/mandelbrot_tasks/Makefile index 1a565ffd..a50631ab 100644 --- a/examples/mandelbrot_tasks/Makefile +++ b/examples/mandelbrot_tasks/Makefile @@ -2,7 +2,7 @@ EXAMPLE=mandelbrot_tasks CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp ISPC_SRC=mandelbrot_tasks.ispc -ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2 +ISPC_IA_TARGETS=sse2,sse4-x2,avx ISPC_ARM_TARGETS=neon include ../common.mk diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp index 698daf0f..802afde0 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp @@ -38,7 +38,8 @@ #pragma warning (disable: 4305) #endif -#include +#include +#include #include #include #include "../timing.h" diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc index 84d4ccd4..f9b0be4c 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc @@ -57,21 +57,26 @@ task void mandelbrot_scanline(uniform float x0, uniform float dx, uniform float y0, uniform float dy, uniform int width, uniform int height, - uniform int span, + uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { - uniform int ystart = taskIndex * span; - uniform int yend = min((taskIndex+1) * span, (unsigned int)height); + const uniform int xstart = taskIndex0 * xspan; + const uniform int xend = min(xstart + xspan, width); - foreach (yi = ystart ... yend, xi = 0 ... width) { + const uniform int ystart = taskIndex1 * yspan; + const uniform int yend = min(ystart + yspan, height); + + + foreach (yi = ystart ... yend, xi = xstart ... xend) { float x = x0 + xi * dx; float y = y0 + yi * dy; int index = yi * width + xi; output[index] = mandel(x, y, maxIterations); } + } - +#if 1 export void mandelbrot_ispc(uniform float x0, uniform float y0, uniform float x1, uniform float y1, @@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0, uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; - uniform int span = 4; + const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */ + const uniform int yspan = 16; - launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span, - maxIterations, output); + +#if 1 + launch [width/xspan, height/yspan] +#else + launch [height/yspan][width/xspan] +#endif + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + maxIterations, output); } +#endif diff --git a/examples/mandelbrot_tasks3d/.gitignore b/examples/mandelbrot_tasks3d/.gitignore deleted file mode 100644 index c2471c27..00000000 --- a/examples/mandelbrot_tasks3d/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -mandelbrot -*.ppm diff --git a/examples/mandelbrot_tasks3d/Makefile b/examples/mandelbrot_tasks3d/Makefile deleted file mode 100644 index 3dd44d65..00000000 --- a/examples/mandelbrot_tasks3d/Makefile +++ /dev/null @@ -1,8 +0,0 @@ - -EXAMPLE=mandelbrot_tasks3d -CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp -ISPC_SRC=mandelbrot_tasks3d.ispc -ISPC_IA_TARGETS=avx,sse2,sse4 -ISPC_ARM_TARGETS=neon - -include ../common.mk diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj deleted file mode 100644 index 3a8fca79..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj +++ /dev/null @@ -1,180 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {E80DA7D4-AB22-4648-A068-327307156BE6} - Win32Proj - mandelbrot_tasks - - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - - - - Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - - diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp deleted file mode 100644 index 9cbb966a..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - Copyright (c) 2010-2011, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef _MSC_VER -#define _CRT_SECURE_NO_WARNINGS -#define NOMINMAX -#pragma warning (disable: 4244) -#pragma warning (disable: 4305) -#endif - -#include -#include -#include -#include "../timing.h" -#include "mandelbrot_tasks3d_ispc.h" -using namespace ispc; - -extern void mandelbrot_serial(float x0, float y0, float x1, float y1, - int width, int height, int maxIterations, - int output[]); - -/* Write a PPM image file with the image of the Mandelbrot set */ -static void -writePPM(int *buf, int width, int height, const char *fn) { - FILE *fp = fopen(fn, "wb"); - fprintf(fp, "P6\n"); - fprintf(fp, "%d %d\n", width, height); - fprintf(fp, "255\n"); - for (int i = 0; i < width*height; ++i) { - // Map the iteration count to colors by just alternating between - // two greys. - char c = (buf[i] & 0x1) ? 240 : 20; - for (int j = 0; j < 3; ++j) - fputc(c, fp); - } - fclose(fp); - printf("Wrote image file %s\n", fn); -} - - -static void usage() { - fprintf(stderr, "usage: mandelbrot [--scale=]\n"); - exit(1); -} - -int main(int argc, char *argv[]) { - unsigned int width = 1536; - unsigned int height = 1024; - float x0 = -2; - float x1 = 1; - float y0 = -1; - float y1 = 1; - - if (argc == 1) - ; - else if (argc == 2) { - if (strncmp(argv[1], "--scale=", 8) == 0) { - float scale = atof(argv[1] + 8); - if (scale == 0.f) - usage(); - width *= scale; - height *= scale; - // round up to multiples of 16 - width = (width + 0xf) & ~0xf; - height = (height + 0xf) & ~0xf; - } - else - usage(); - } - else - usage(); - - int maxIterations = 512; - int *buf = new int[width*height]; - - // - // Compute the image using the ispc implementation; report the minimum - // time of three runs. - // - double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { - // Clear out the buffer - for (unsigned int i = 0; i < width * height; ++i) - buf[i] = 0; - reset_and_start_timer(); - mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); - double dt = get_elapsed_mcycles(); - minISPC = std::min(minISPC, dt); - } - - printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); - writePPM(buf, width, height, "mandelbrot-ispc.ppm"); - - - // - // And run the serial implementation 3 times, again reporting the - // minimum time. - // - double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { - // Clear out the buffer - for (unsigned int i = 0; i < width * height; ++i) - buf[i] = 0; - reset_and_start_timer(); - mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); - double dt = get_elapsed_mcycles(); - minSerial = std::min(minSerial, dt); - } - - printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); - writePPM(buf, width, height, "mandelbrot-serial.ppm"); - - printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); - - return 0; -} diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc deleted file mode 100644 index 395bdca4..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2010-2012, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -static inline int -mandel(float c_re, float c_im, int count) { - float z_re = c_re, z_im = c_im; - int i; - for (i = 0; i < count; ++i) { - if (z_re * z_re + z_im * z_im > 4.) - break; - - float new_re = z_re*z_re - z_im*z_im; - float new_im = 2.f * z_re * z_im; - unmasked { - z_re = c_re + new_re; - z_im = c_im + new_im; - } - } - - return i; -} - - -/* Task to compute the Mandelbrot iterations for a single scanline. - */ -task void -mandelbrot_scanline(uniform float x0, uniform float dx, - uniform float y0, uniform float dy, - uniform int width, uniform int height, - uniform int xspan, uniform int yspan, - uniform int maxIterations, uniform int output[]) { - const uniform int xstart = taskIndex0 * xspan; - const uniform int xend = min(xstart + xspan, width); - - const uniform int ystart = taskIndex1 * yspan; - const uniform int yend = min(ystart + yspan, height); - - - foreach (yi = ystart ... yend, xi = xstart ... xend) { - float x = x0 + xi * dx; - float y = y0 + yi * dy; - - int index = yi * width + xi; - output[index] = mandel(x, y, maxIterations); - } - -} - -#if 1 -export void -mandelbrot_ispc(uniform float x0, uniform float y0, - uniform float x1, uniform float y1, - uniform int width, uniform int height, - uniform int maxIterations, uniform int output[]) { - uniform float dx = (x1 - x0) / width; - uniform float dy = (y1 - y0) / height; - const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */ - const uniform int yspan = 16; - - -#if 1 - launch [width/xspan, height/yspan] -#else - launch [height/yspan][width/xspan] -#endif - mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, - maxIterations, output); -} -#endif diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp deleted file mode 100644 index a76fb5ca..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright (c) 2010-2011, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -static int mandel(float c_re, float c_im, int count) { - float z_re = c_re, z_im = c_im; - int i; - for (i = 0; i < count; ++i) { - if (z_re * z_re + z_im * z_im > 4.f) - break; - - float new_re = z_re*z_re - z_im*z_im; - float new_im = 2.f * z_re * z_im; - z_re = c_re + new_re; - z_im = c_im + new_im; - } - - return i; -} - -void mandelbrot_serial(float x0, float y0, float x1, float y1, - int width, int height, int maxIterations, - int output[]) -{ - float dx = (x1 - x0) / width; - float dy = (y1 - y0) / height; - - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; ++i) { - float x = x0 + i * dx; - float y = y0 + j * dy; - - int index = (j * width + i); - output[index] = mandel(x, y, maxIterations); - } - } -} - diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 6bc60129..b914068e 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -204,7 +204,7 @@ struct TaskInfo { // ispc expects these functions to have C linkage / not be mangled extern "C" { - void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz); + void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void ISPCSync(void *handle); } diff --git a/test_static.cpp b/test_static.cpp index fceeb64e..27a5b136 100644 --- a/test_static.cpp +++ b/test_static.cpp @@ -69,7 +69,7 @@ extern "C" { void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) { *handle = (void *)0xdeadbeef; - typedef void (*TaskFuncType)(void *, int, int, int, int, int,int,int, int,int,int); + typedef void (*TaskFuncType)(void *, int, int, int, int, int, int, int, int, int, int); TaskFuncType func = (TaskFuncType)f; int count = count0*count1*count2, idx = 0; for (int k = 0; k < count2; ++k)