diff --git a/examples/stencil/.gitignore b/examples/stencil/.gitignore new file mode 100644 index 00000000..e043735b --- /dev/null +++ b/examples/stencil/.gitignore @@ -0,0 +1,2 @@ +stencil +objs diff --git a/examples/stencil/Makefile b/examples/stencil/Makefile new file mode 100644 index 00000000..6d6a9f62 --- /dev/null +++ b/examples/stencil/Makefile @@ -0,0 +1,38 @@ + +ARCH = $(shell uname) + +TASK_CXX=tasks_pthreads.cpp +TASK_LIB=-lpthread + +ifeq ($(ARCH), Darwin) + TASK_CXX=tasks_gcd.cpp + TASK_LIB= +endif + +TASK_OBJ=$(addprefix objs/, $(TASK_CXX:.cpp=.o)) + +CXX=g++ -m64 +CXXFLAGS=-Iobjs/ -O3 -Wall +ISPC=ispc +ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 + +default: stencil + +.PHONY: dirs clean + +dirs: + /bin/mkdir -p objs/ + +clean: + /bin/rm -rf objs *~ stencil + +stencil: dirs objs/stencil.o objs/stencil_serial.o objs/stencil_ispc.o $(TASK_OBJ) + $(CXX) $(CXXFLAGS) -o $@ objs/stencil.o objs/stencil_ispc.o objs/stencil_serial.o $(TASK_OBJ) -lm $(TASK_LIB) + +objs/%.o: %.cpp + $(CXX) $< $(CXXFLAGS) -c -o $@ + +objs/stencil.o: objs/stencil_ispc.h + +objs/%_ispc.h objs/%_ispc.o: %.ispc + $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/stencil/stencil.cpp b/examples/stencil/stencil.cpp new file mode 100644 index 00000000..9eb258af --- /dev/null +++ b/examples/stencil/stencil.cpp @@ -0,0 +1,178 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#define NOMINMAX +#pragma warning (disable: 4244) +#pragma warning (disable: 4305) +#endif + +#include +#include +#include +#include "../timing.h" +#include "../cpuid.h" +#include "stencil_ispc.h" +using namespace ispc; + + +// Make sure that the vector ISA used during compilation is supported by +// the processor. The ISPC_TARGET_* macro is set in the ispc-generated +// header file that we include above. +static void +ensureTargetISAIsSupported() { +#if defined(ISPC_TARGET_SSE2) + bool isaSupported = CPUSupportsSSE2(); + const char *target = "SSE2"; +#elif defined(ISPC_TARGET_SSE4) + bool isaSupported = CPUSupportsSSE4(); + const char *target = "SSE4"; +#elif defined(ISPC_TARGET_AVX) + bool isaSupported = CPUSupportsAVX(); + const char *target = "AVX"; +#else +#error "Unknown ISPC_TARGET_* value" +#endif + if (!isaSupported) { + fprintf(stderr, "***\n*** Error: the ispc-compiled code uses the %s instruction " + "set, which isn't\n*** supported by this computer's CPU!\n", target); + fprintf(stderr, "***\n*** Please modify the " +#ifdef _MSC_VER + "MSVC project file " +#else + "Makefile " +#endif + "to select another target (e.g. sse2)\n***\n"); + exit(1); + } +} + + +extern void loop_stencil_serial(int t0, int t1, int x0, int x1, + int y0, int y1, int z0, int z1, + int Nx, int Ny, int Nz, + const float coef[5], + const float vsq[], + float Aeven[], float Aodd[]); +extern "C" { +extern void loop_stencil_ispc(int t0, int t1, int x0, int x1, + int y0, int y1, int z0, int z1, + int Nx, int Ny, int Nz, + const float coef[5], + const float vsq[], + float Aeven[], float Aodd[]); +} + + +void InitData(int Nx, int Ny, int Nz, float *A[2], float *vsq) { + int offset = 0; + for (int z = 0; z < Nz; ++z) + for (int y = 0; y < Ny; ++y) + for (int x = 0; x < Nx; ++x, ++offset) { + A[0][offset] = (x < Nx / 2) ? x / float(Nx) : y / float(Ny); + A[1][offset] = 0; + vsq[offset] = x*y*z / float(Nx * Ny * Nz); + } +} + + +int main() { + ensureTargetISAIsSupported(); + + extern void TasksInit(); + TasksInit(); + + int Nx = 256, Ny = 256, Nz = 256; + int width = 4; + float *Aserial[2], *Aispc[2]; + Aserial[0] = new float [Nx * Ny * Nz]; + Aserial[1] = new float [Nx * Ny * Nz]; + Aispc[0] = new float [Nx * Ny * Nz]; + Aispc[1] = new float [Nx * Ny * Nz]; + float *vsq = new float [Nx * Ny * Nz]; + + float coeff[4] = { 0.5, -.25, .125, -.0625 }; + + InitData(Nx, Ny, Nz, Aispc, vsq); + + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width, + width, Nz - width, Nx, Ny, Nz, coeff, vsq, + Aispc[0], Aispc[1]); + double dt = get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } + + printf("[stencil ispc]:\t\t\t[%.3f] million cycles\n", minISPC); + + InitData(Nx, Ny, Nz, Aserial, vsq); + + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width, + width, Nz - width, Nx, Ny, Nz, coeff, vsq, + Aserial[0], Aserial[1]); + double dt = get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } + + printf("[stencil serial]:\t\t[%.3f] millon cycles\n", minSerial); + + printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minSerial/minISPC); + + // Check for agreement + int offset = 0; + for (int z = 0; z < Nz; ++z) + for (int y = 0; y < Ny; ++y) + for (int x = 0; x < Nx; ++x, ++offset) { + float error = fabsf((Aserial[1][offset] - Aispc[1][offset]) / + Aserial[1][offset]); + if (error > 1e-4) + printf("Error @ (%d,%d,%d): ispc = %f, serial = %f\n", + x, y, z, Aispc[1][offset], Aserial[1][offset]); + } + + return 0; +} diff --git a/examples/stencil/stencil.ispc b/examples/stencil/stencil.ispc new file mode 100644 index 00000000..bb618c2b --- /dev/null +++ b/examples/stencil/stencil.ispc @@ -0,0 +1,96 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +static task void +stencil_step(uniform int x0, uniform int x1, + uniform int y0, uniform int y1, + uniform int z0, uniform int z1, + uniform int Nx, uniform int Ny, uniform int Nz, + uniform const float coef[4], uniform const float vsq[], + uniform const float Ain[], uniform float Aout[]) { + const uniform int Nxy = Nx * Ny; + + for (uniform int z = z0; z < z1; ++z) { + for (uniform int y = y0; y < y1; ++y) { + // Assumes that (x1-x0) % programCount == 0 + for (uniform int x = x0; x < x1; x += programCount) { + int index = (z * Nxy) + (y * Nx) + x + programIndex; +#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)] +#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)] + float div = coef[0] * A_cur(0, 0, 0) + + coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) + + A_cur(0, +1, 0) + A_cur(0, -1, 0) + + A_cur(0, 0, +1) + A_cur(0, 0, -1)) + + coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) + + A_cur(0, +2, 0) + A_cur(0, -2, 0) + + A_cur(0, 0, +2) + A_cur(0, 0, -2)) + + coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) + + A_cur(0, +3, 0) + A_cur(0, -3, 0) + + A_cur(0, 0, +3) + A_cur(0, 0, -3)); + + A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) + + vsq[index] * div; + } + } + } +} + + +export void loop_stencil_ispc(uniform int t0, uniform int t1, + uniform int x0, uniform int x1, + uniform int y0, uniform int y1, + uniform int z0, uniform int z1, + uniform int Nx, uniform int Ny, uniform int Nz, + uniform const float coef[4], + uniform const float vsq[], + uniform float Aeven[], uniform float Aodd[]) +{ + for (uniform int t = t0; t < t1; ++t) { + // Parallelize across cores as well: each task will work on a slice + // of "dz" in the z extent of the volume. (dz=1 seems to work + // better than any larger values.) + uniform int dz = 1; + for (uniform int z = z0; z < z1; z += dz) { + if ((t & 1) == 0) + launch < stencil_step(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz, coef, vsq, + Aeven, Aodd) >; + else + launch < stencil_step(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz, coef, vsq, + Aodd, Aeven) >; + } + // We need to wait for all of the launched tasks to finish before + // starting the next iteration. + sync; + } +} diff --git a/examples/stencil/stencil_serial.cpp b/examples/stencil/stencil_serial.cpp new file mode 100644 index 00000000..b69617df --- /dev/null +++ b/examples/stencil/stencil_serial.cpp @@ -0,0 +1,86 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +static void +stencil_step(int x0, int x1, + int y0, int y1, + int z0, int z1, + int Nx, int Ny, int Nz, + const float coef[4], const float vsq[], + const float Ain[], float Aout[]) { + int Nxy = Nx * Ny; + + for (int z = z0; z < z1; ++z) { + for (int y = y0; y < y1; ++y) { + for (int x = x0; x < x1; ++x) { + int index = (z * Nxy) + (y * Nx) + x; +#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)] +#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)] + float div = coef[0] * A_cur(0, 0, 0) + + coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) + + A_cur(0, +1, 0) + A_cur(0, -1, 0) + + A_cur(0, 0, +1) + A_cur(0, 0, -1)) + + coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) + + A_cur(0, +2, 0) + A_cur(0, -2, 0) + + A_cur(0, 0, +2) + A_cur(0, 0, -2)) + + coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) + + A_cur(0, +3, 0) + A_cur(0, -3, 0) + + A_cur(0, 0, +3) + A_cur(0, 0, -3)); + + A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) + + vsq[index] * div; + } + } + } +} + + +void loop_stencil_serial(int t0, int t1, + int x0, int x1, + int y0, int y1, + int z0, int z1, + int Nx, int Ny, int Nz, + const float coef[4], + const float vsq[], + float Aeven[], float Aodd[]) +{ + for (int t = t0; t < t1; ++t) { + if ((t & 1) == 0) + stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, + Aeven, Aodd); + else + stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, + Aodd, Aeven); + } +} diff --git a/examples/stencil/tasks_concrt.cpp b/examples/stencil/tasks_concrt.cpp new file mode 100644 index 00000000..b70d5cbe --- /dev/null +++ b/examples/stencil/tasks_concrt.cpp @@ -0,0 +1,141 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Simple task system implementation for ispc based on Microsoft's + Concurrency Runtime. */ + +#include +#include +using namespace Concurrency; +#include +#include +#include +#include + +// ispc expects these functions to have C linkage / not be mangled +extern "C" { + void ISPCLaunch(void *f, void *data); + void ISPCSync(); + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); +} + +typedef void (*TaskFuncType)(void *, int, int); + +struct TaskInfo { + TaskFuncType ispcFunc; + void *ispcData; +}; + +// This is a simple implementation that just aborts if more than MAX_TASKS +// are launched. It could easily be extended to be more general... + +#define MAX_TASKS 4096 +static int taskOffset; +static TaskInfo taskInfo[MAX_TASKS]; +static event *events[MAX_TASKS]; +static CRITICAL_SECTION criticalSection; +static bool initialized = false; + +void +TasksInit() { + InitializeCriticalSection(&criticalSection); + for (int i = 0; i < MAX_TASKS; ++i) + events[i] = new event; + initialized = true; +} + + +void __cdecl +lRunTask(LPVOID param) { + TaskInfo *ti = (TaskInfo *)param; + + // Actually run the task. + // FIXME: like the tasks_gcd.cpp implementation, this is passing bogus + // values for the threadIndex and threadCount builtins, which in turn + // will cause bugs in code that uses those. FWIW this example doesn't + // use them... + int threadIndex = 0; + int threadCount = 1; + ti->ispcFunc(ti->ispcData, threadIndex, threadCount); + + // Signal the event that this task is done + int taskNum = ti - &taskInfo[0]; + events[taskNum]->set(); +} + + +void +ISPCLaunch(void *func, void *data) { + if (!initialized) { + fprintf(stderr, "You must call TasksInit() before launching tasks.\n"); + exit(1); + } + + // Get a TaskInfo struct for this task + EnterCriticalSection(&criticalSection); + TaskInfo *ti = &taskInfo[taskOffset++]; + assert(taskOffset < MAX_TASKS); + LeaveCriticalSection(&criticalSection); + + // And pass it on to the Concurrency Runtime... + ti->ispcFunc = (TaskFuncType)func; + ti->ispcData = data; + CurrentScheduler::ScheduleTask(lRunTask, ti); +} + + +void ISPCSync() { + if (!initialized) { + fprintf(stderr, "You must call TasksInit() before launching tasks.\n"); + exit(1); + } + + event::wait_for_multiple(&events[0], taskOffset, true, + COOPERATIVE_TIMEOUT_INFINITE); + + for (int i = 0; i < taskOffset; ++i) + events[i]->reset(); + + taskOffset = 0; +} + + +void *ISPCMalloc(int64_t size, int32_t alignment) { + return _aligned_malloc(size, alignment); +} + + +void ISPCFree(void *ptr) { + _aligned_free(ptr); +} diff --git a/examples/stencil/tasks_gcd.cpp b/examples/stencil/tasks_gcd.cpp new file mode 100644 index 00000000..f759cc37 --- /dev/null +++ b/examples/stencil/tasks_gcd.cpp @@ -0,0 +1,103 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* A simple task system for ispc programs based on Apple's Grand Central + Dispatch. */ + +#include +#include +#include + +static bool initialized = false; +static dispatch_queue_t gcdQueue; +static dispatch_group_t gcdGroup; + +// ispc expects these functions to have C linkage / not be mangled +extern "C" { + void ISPCLaunch(void *f, void *data); + void ISPCSync(); +} + +struct TaskInfo { + void *func; + void *data; +}; + + +void +TasksInit() { + gcdQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); + gcdGroup = dispatch_group_create(); + initialized = true; +} + + +static void +lRunTask(void *ti) { + typedef void (*TaskFuncType)(void *, int, int); + TaskInfo *taskInfo = (TaskInfo *)ti; + + TaskFuncType func = (TaskFuncType)(taskInfo->func); + + // FIXME: these are bogus values; may cause bugs in code that depends + // on them having unique values in different threads. + int threadIndex = 0; + int threadCount = 1; + // Actually run the task + func(taskInfo->data, threadIndex, threadCount); + + // FIXME: taskInfo leaks... +} + + +void ISPCLaunch(void *func, void *data) { + if (!initialized) { + fprintf(stderr, "You must call TasksInit() before launching tasks.\n"); + exit(1); + } + TaskInfo *ti = new TaskInfo; + ti->func = func; + ti->data = data; + dispatch_group_async_f(gcdGroup, gcdQueue, ti, lRunTask); +} + + +void ISPCSync() { + if (!initialized) { + fprintf(stderr, "You must call TasksInit() before launching tasks.\n"); + exit(1); + } + + // Wait for all of the tasks in the group to complete before returning + dispatch_group_wait(gcdGroup, DISPATCH_TIME_FOREVER); +} diff --git a/examples/stencil/tasks_pthreads.cpp b/examples/stencil/tasks_pthreads.cpp new file mode 100644 index 00000000..7ec35e04 --- /dev/null +++ b/examples/stencil/tasks_pthreads.cpp @@ -0,0 +1,295 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ispc expects these functions to have C linkage / not be mangled +extern "C" { + void ISPCLaunch(void *f, void *data); + void ISPCSync(); +} + + +static int nThreads; +static pthread_t *threads; +static pthread_mutex_t taskQueueMutex; +static std::vector > taskQueue; +static sem_t *workerSemaphore; +static uint32_t numUnfinishedTasks; +static pthread_mutex_t tasksRunningConditionMutex; +static pthread_cond_t tasksRunningCondition; + +static void *lTaskEntry(void *arg); + +/** Figure out how many CPU cores there are in the system + */ +static int +lNumCPUCores() { +#if defined(__linux__) + return sysconf(_SC_NPROCESSORS_ONLN); +#else + // Mac + int mib[2]; + mib[0] = CTL_HW; + size_t length = 2; + if (sysctlnametomib("hw.logicalcpu", mib, &length) == -1) { + fprintf(stderr, "sysctlnametomib() filed. Guessing 2 cores."); + return 2; + } + assert(length == 2); + + int nCores = 0; + size_t size = sizeof(nCores); + + if (sysctl(mib, 2, &nCores, &size, NULL, 0) == -1) { + fprintf(stderr, "sysctl() to find number of cores present failed. Guessing 2."); + return 2; + } + return nCores; +#endif +} + +void +TasksInit() { + nThreads = lNumCPUCores(); + + threads = new pthread_t[nThreads]; + + int err; + if ((err = pthread_mutex_init(&taskQueueMutex, NULL)) != 0) { + fprintf(stderr, "Error creating mutex: %s\n", strerror(err)); + exit(1); + } + + char name[32]; + sprintf(name, "mandelbrot.%d", (int)getpid()); + workerSemaphore = sem_open(name, O_CREAT, S_IRUSR|S_IWUSR, 0); + if (!workerSemaphore) { + fprintf(stderr, "Error creating semaphore: %s\n", strerror(err)); + exit(1); + } + + if ((err = pthread_cond_init(&tasksRunningCondition, NULL)) != 0) { + fprintf(stderr, "Error creating condition variable: %s\n", strerror(err)); + exit(1); + } + + if ((err = pthread_mutex_init(&tasksRunningConditionMutex, NULL)) != 0) { + fprintf(stderr, "Error creating mutex: %s\n", strerror(err)); + exit(1); + } + + for (int i = 0; i < nThreads; ++i) { + err = pthread_create(&threads[i], NULL, &lTaskEntry, reinterpret_cast(i)); + if (err != 0) { + fprintf(stderr, "Error creating pthread %d: %s\n", i, strerror(err)); + exit(1); + } + } +} + + +void +ISPCLaunch(void *f, void *d) { + if (threads == NULL) { + fprintf(stderr, "You must call TasksInit() before launching tasks.\n"); + exit(1); + } + + // + // Acquire mutex, add task + // + int err; + if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + + taskQueue.push_back(std::make_pair(f, d)); + + if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err)); + exit(1); + } + + // + // Update count of number of tasks left to run + // + if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + + ++numUnfinishedTasks; + + if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + + // + // Post to the worker semaphore to wake up worker threads that are + // sleeping waiting for tasks to show up + // + if ((err = sem_post(workerSemaphore)) != 0) { + fprintf(stderr, "Error from sem_post: %s\n", strerror(err)); + exit(1); + } +} + + +static void * +lTaskEntry(void *arg) { + int threadIndex = int(reinterpret_cast(arg)); + int threadCount = nThreads; + + while (true) { + int err; + if ((err = sem_wait(workerSemaphore)) != 0) { + fprintf(stderr, "Error from sem_wait: %s\n", strerror(err)); + exit(1); + } + + std::pair myTask; + // + // Acquire mutex, get task + // + if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + if (taskQueue.size() == 0) { + // + // Task queue is empty, go back and wait on the semaphore + // + if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err)); + exit(1); + } + continue; + } + + myTask = taskQueue.back(); + taskQueue.pop_back(); + + if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err)); + exit(1); + } + + // + // Do work for _myTask_ + // + typedef void (*TaskFunType)(void *, int, int); + TaskFunType func = (TaskFunType)myTask.first; + func(myTask.second, threadIndex, threadCount); + + // + // Decrement the number of unfinished tasks counter + // + if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + + int unfinished = --numUnfinishedTasks; + if (unfinished == 0) { + // + // Signal the "no more tasks are running" condition if all of + // them are done. + // + int err; + if ((err = pthread_cond_signal(&tasksRunningCondition)) != 0) { + fprintf(stderr, "Error from pthread_cond_signal: %s\n", strerror(err)); + exit(1); + } + } + + if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + } + + pthread_exit(NULL); + return 0; +} + + +void ISPCSync() { + if (threads == NULL) { + fprintf(stderr, "You must call TasksInit() before launching tasks.\n"); + exit(1); + } + + int err; + if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } + + // As long as there are tasks running, wait on the condition variable; + // doing so causes this thread to go to sleep until someone signals on + // the tasksRunningCondition condition variable. + while (numUnfinishedTasks > 0) { + if ((err = pthread_cond_wait(&tasksRunningCondition, + &tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_cond_wait: %s\n", strerror(err)); + exit(1); + } + } + + // We acquire ownership of the condition variable mutex when the above + // pthread_cond_wait returns. + // FIXME: is there a lurking issue here if numUnfinishedTasks gets back + // to zero by the time we get to ISPCSync() and thence we're trying to + // unlock a mutex we don't have a lock on? + if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) { + fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err)); + exit(1); + } +}