Add stencil example
This commit is contained in:
2
examples/stencil/.gitignore
vendored
Normal file
2
examples/stencil/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
stencil
|
||||||
|
objs
|
||||||
38
examples/stencil/Makefile
Normal file
38
examples/stencil/Makefile
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
ARCH = $(shell uname)
|
||||||
|
|
||||||
|
TASK_CXX=tasks_pthreads.cpp
|
||||||
|
TASK_LIB=-lpthread
|
||||||
|
|
||||||
|
ifeq ($(ARCH), Darwin)
|
||||||
|
TASK_CXX=tasks_gcd.cpp
|
||||||
|
TASK_LIB=
|
||||||
|
endif
|
||||||
|
|
||||||
|
TASK_OBJ=$(addprefix objs/, $(TASK_CXX:.cpp=.o))
|
||||||
|
|
||||||
|
CXX=g++ -m64
|
||||||
|
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||||
|
ISPC=ispc
|
||||||
|
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||||
|
|
||||||
|
default: stencil
|
||||||
|
|
||||||
|
.PHONY: dirs clean
|
||||||
|
|
||||||
|
dirs:
|
||||||
|
/bin/mkdir -p objs/
|
||||||
|
|
||||||
|
clean:
|
||||||
|
/bin/rm -rf objs *~ stencil
|
||||||
|
|
||||||
|
stencil: dirs objs/stencil.o objs/stencil_serial.o objs/stencil_ispc.o $(TASK_OBJ)
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ objs/stencil.o objs/stencil_ispc.o objs/stencil_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
|
||||||
|
|
||||||
|
objs/%.o: %.cpp
|
||||||
|
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
|
objs/stencil.o: objs/stencil_ispc.h
|
||||||
|
|
||||||
|
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||||
|
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||||
178
examples/stencil/stencil.cpp
Normal file
178
examples/stencil/stencil.cpp
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
#define NOMINMAX
|
||||||
|
#pragma warning (disable: 4244)
|
||||||
|
#pragma warning (disable: 4305)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <math.h>
|
||||||
|
#include "../timing.h"
|
||||||
|
#include "../cpuid.h"
|
||||||
|
#include "stencil_ispc.h"
|
||||||
|
using namespace ispc;
|
||||||
|
|
||||||
|
|
||||||
|
// Make sure that the vector ISA used during compilation is supported by
|
||||||
|
// the processor. The ISPC_TARGET_* macro is set in the ispc-generated
|
||||||
|
// header file that we include above.
|
||||||
|
static void
|
||||||
|
ensureTargetISAIsSupported() {
|
||||||
|
#if defined(ISPC_TARGET_SSE2)
|
||||||
|
bool isaSupported = CPUSupportsSSE2();
|
||||||
|
const char *target = "SSE2";
|
||||||
|
#elif defined(ISPC_TARGET_SSE4)
|
||||||
|
bool isaSupported = CPUSupportsSSE4();
|
||||||
|
const char *target = "SSE4";
|
||||||
|
#elif defined(ISPC_TARGET_AVX)
|
||||||
|
bool isaSupported = CPUSupportsAVX();
|
||||||
|
const char *target = "AVX";
|
||||||
|
#else
|
||||||
|
#error "Unknown ISPC_TARGET_* value"
|
||||||
|
#endif
|
||||||
|
if (!isaSupported) {
|
||||||
|
fprintf(stderr, "***\n*** Error: the ispc-compiled code uses the %s instruction "
|
||||||
|
"set, which isn't\n*** supported by this computer's CPU!\n", target);
|
||||||
|
fprintf(stderr, "***\n*** Please modify the "
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
"MSVC project file "
|
||||||
|
#else
|
||||||
|
"Makefile "
|
||||||
|
#endif
|
||||||
|
"to select another target (e.g. sse2)\n***\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern void loop_stencil_serial(int t0, int t1, int x0, int x1,
|
||||||
|
int y0, int y1, int z0, int z1,
|
||||||
|
int Nx, int Ny, int Nz,
|
||||||
|
const float coef[5],
|
||||||
|
const float vsq[],
|
||||||
|
float Aeven[], float Aodd[]);
|
||||||
|
extern "C" {
|
||||||
|
extern void loop_stencil_ispc(int t0, int t1, int x0, int x1,
|
||||||
|
int y0, int y1, int z0, int z1,
|
||||||
|
int Nx, int Ny, int Nz,
|
||||||
|
const float coef[5],
|
||||||
|
const float vsq[],
|
||||||
|
float Aeven[], float Aodd[]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void InitData(int Nx, int Ny, int Nz, float *A[2], float *vsq) {
|
||||||
|
int offset = 0;
|
||||||
|
for (int z = 0; z < Nz; ++z)
|
||||||
|
for (int y = 0; y < Ny; ++y)
|
||||||
|
for (int x = 0; x < Nx; ++x, ++offset) {
|
||||||
|
A[0][offset] = (x < Nx / 2) ? x / float(Nx) : y / float(Ny);
|
||||||
|
A[1][offset] = 0;
|
||||||
|
vsq[offset] = x*y*z / float(Nx * Ny * Nz);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
ensureTargetISAIsSupported();
|
||||||
|
|
||||||
|
extern void TasksInit();
|
||||||
|
TasksInit();
|
||||||
|
|
||||||
|
int Nx = 256, Ny = 256, Nz = 256;
|
||||||
|
int width = 4;
|
||||||
|
float *Aserial[2], *Aispc[2];
|
||||||
|
Aserial[0] = new float [Nx * Ny * Nz];
|
||||||
|
Aserial[1] = new float [Nx * Ny * Nz];
|
||||||
|
Aispc[0] = new float [Nx * Ny * Nz];
|
||||||
|
Aispc[1] = new float [Nx * Ny * Nz];
|
||||||
|
float *vsq = new float [Nx * Ny * Nz];
|
||||||
|
|
||||||
|
float coeff[4] = { 0.5, -.25, .125, -.0625 };
|
||||||
|
|
||||||
|
InitData(Nx, Ny, Nz, Aispc, vsq);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Compute the image using the ispc implementation; report the minimum
|
||||||
|
// time of three runs.
|
||||||
|
//
|
||||||
|
double minISPC = 1e30;
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
|
||||||
|
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
||||||
|
Aispc[0], Aispc[1]);
|
||||||
|
double dt = get_elapsed_mcycles();
|
||||||
|
minISPC = std::min(minISPC, dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("[stencil ispc]:\t\t\t[%.3f] million cycles\n", minISPC);
|
||||||
|
|
||||||
|
InitData(Nx, Ny, Nz, Aserial, vsq);
|
||||||
|
|
||||||
|
//
|
||||||
|
// And run the serial implementation 3 times, again reporting the
|
||||||
|
// minimum time.
|
||||||
|
//
|
||||||
|
double minSerial = 1e30;
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width,
|
||||||
|
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
||||||
|
Aserial[0], Aserial[1]);
|
||||||
|
double dt = get_elapsed_mcycles();
|
||||||
|
minSerial = std::min(minSerial, dt);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("[stencil serial]:\t\t[%.3f] millon cycles\n", minSerial);
|
||||||
|
|
||||||
|
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minSerial/minISPC);
|
||||||
|
|
||||||
|
// Check for agreement
|
||||||
|
int offset = 0;
|
||||||
|
for (int z = 0; z < Nz; ++z)
|
||||||
|
for (int y = 0; y < Ny; ++y)
|
||||||
|
for (int x = 0; x < Nx; ++x, ++offset) {
|
||||||
|
float error = fabsf((Aserial[1][offset] - Aispc[1][offset]) /
|
||||||
|
Aserial[1][offset]);
|
||||||
|
if (error > 1e-4)
|
||||||
|
printf("Error @ (%d,%d,%d): ispc = %f, serial = %f\n",
|
||||||
|
x, y, z, Aispc[1][offset], Aserial[1][offset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
96
examples/stencil/stencil.ispc
Normal file
96
examples/stencil/stencil.ispc
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
static task void
|
||||||
|
stencil_step(uniform int x0, uniform int x1,
|
||||||
|
uniform int y0, uniform int y1,
|
||||||
|
uniform int z0, uniform int z1,
|
||||||
|
uniform int Nx, uniform int Ny, uniform int Nz,
|
||||||
|
uniform const float coef[4], uniform const float vsq[],
|
||||||
|
uniform const float Ain[], uniform float Aout[]) {
|
||||||
|
const uniform int Nxy = Nx * Ny;
|
||||||
|
|
||||||
|
for (uniform int z = z0; z < z1; ++z) {
|
||||||
|
for (uniform int y = y0; y < y1; ++y) {
|
||||||
|
// Assumes that (x1-x0) % programCount == 0
|
||||||
|
for (uniform int x = x0; x < x1; x += programCount) {
|
||||||
|
int index = (z * Nxy) + (y * Nx) + x + programIndex;
|
||||||
|
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||||
|
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||||
|
float div = coef[0] * A_cur(0, 0, 0) +
|
||||||
|
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||||
|
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||||
|
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||||
|
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||||
|
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||||
|
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||||
|
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||||
|
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||||
|
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||||
|
|
||||||
|
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||||
|
vsq[index] * div;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void loop_stencil_ispc(uniform int t0, uniform int t1,
|
||||||
|
uniform int x0, uniform int x1,
|
||||||
|
uniform int y0, uniform int y1,
|
||||||
|
uniform int z0, uniform int z1,
|
||||||
|
uniform int Nx, uniform int Ny, uniform int Nz,
|
||||||
|
uniform const float coef[4],
|
||||||
|
uniform const float vsq[],
|
||||||
|
uniform float Aeven[], uniform float Aodd[])
|
||||||
|
{
|
||||||
|
for (uniform int t = t0; t < t1; ++t) {
|
||||||
|
// Parallelize across cores as well: each task will work on a slice
|
||||||
|
// of "dz" in the z extent of the volume. (dz=1 seems to work
|
||||||
|
// better than any larger values.)
|
||||||
|
uniform int dz = 1;
|
||||||
|
for (uniform int z = z0; z < z1; z += dz) {
|
||||||
|
if ((t & 1) == 0)
|
||||||
|
launch < stencil_step(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz, coef, vsq,
|
||||||
|
Aeven, Aodd) >;
|
||||||
|
else
|
||||||
|
launch < stencil_step(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz, coef, vsq,
|
||||||
|
Aodd, Aeven) >;
|
||||||
|
}
|
||||||
|
// We need to wait for all of the launched tasks to finish before
|
||||||
|
// starting the next iteration.
|
||||||
|
sync;
|
||||||
|
}
|
||||||
|
}
|
||||||
86
examples/stencil/stencil_serial.cpp
Normal file
86
examples/stencil/stencil_serial.cpp
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
stencil_step(int x0, int x1,
|
||||||
|
int y0, int y1,
|
||||||
|
int z0, int z1,
|
||||||
|
int Nx, int Ny, int Nz,
|
||||||
|
const float coef[4], const float vsq[],
|
||||||
|
const float Ain[], float Aout[]) {
|
||||||
|
int Nxy = Nx * Ny;
|
||||||
|
|
||||||
|
for (int z = z0; z < z1; ++z) {
|
||||||
|
for (int y = y0; y < y1; ++y) {
|
||||||
|
for (int x = x0; x < x1; ++x) {
|
||||||
|
int index = (z * Nxy) + (y * Nx) + x;
|
||||||
|
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||||
|
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||||
|
float div = coef[0] * A_cur(0, 0, 0) +
|
||||||
|
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||||
|
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||||
|
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||||
|
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||||
|
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||||
|
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||||
|
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||||
|
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||||
|
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||||
|
|
||||||
|
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||||
|
vsq[index] * div;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void loop_stencil_serial(int t0, int t1,
|
||||||
|
int x0, int x1,
|
||||||
|
int y0, int y1,
|
||||||
|
int z0, int z1,
|
||||||
|
int Nx, int Ny, int Nz,
|
||||||
|
const float coef[4],
|
||||||
|
const float vsq[],
|
||||||
|
float Aeven[], float Aodd[])
|
||||||
|
{
|
||||||
|
for (int t = t0; t < t1; ++t) {
|
||||||
|
if ((t & 1) == 0)
|
||||||
|
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq,
|
||||||
|
Aeven, Aodd);
|
||||||
|
else
|
||||||
|
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq,
|
||||||
|
Aodd, Aeven);
|
||||||
|
}
|
||||||
|
}
|
||||||
141
examples/stencil/tasks_concrt.cpp
Normal file
141
examples/stencil/tasks_concrt.cpp
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Simple task system implementation for ispc based on Microsoft's
|
||||||
|
Concurrency Runtime. */
|
||||||
|
|
||||||
|
#include <windows.h>
|
||||||
|
#include <concrt.h>
|
||||||
|
using namespace Concurrency;
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// ispc expects these functions to have C linkage / not be mangled
|
||||||
|
extern "C" {
|
||||||
|
void ISPCLaunch(void *f, void *data);
|
||||||
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef void (*TaskFuncType)(void *, int, int);
|
||||||
|
|
||||||
|
struct TaskInfo {
|
||||||
|
TaskFuncType ispcFunc;
|
||||||
|
void *ispcData;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This is a simple implementation that just aborts if more than MAX_TASKS
|
||||||
|
// are launched. It could easily be extended to be more general...
|
||||||
|
|
||||||
|
#define MAX_TASKS 4096
|
||||||
|
static int taskOffset;
|
||||||
|
static TaskInfo taskInfo[MAX_TASKS];
|
||||||
|
static event *events[MAX_TASKS];
|
||||||
|
static CRITICAL_SECTION criticalSection;
|
||||||
|
static bool initialized = false;
|
||||||
|
|
||||||
|
void
|
||||||
|
TasksInit() {
|
||||||
|
InitializeCriticalSection(&criticalSection);
|
||||||
|
for (int i = 0; i < MAX_TASKS; ++i)
|
||||||
|
events[i] = new event;
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __cdecl
|
||||||
|
lRunTask(LPVOID param) {
|
||||||
|
TaskInfo *ti = (TaskInfo *)param;
|
||||||
|
|
||||||
|
// Actually run the task.
|
||||||
|
// FIXME: like the tasks_gcd.cpp implementation, this is passing bogus
|
||||||
|
// values for the threadIndex and threadCount builtins, which in turn
|
||||||
|
// will cause bugs in code that uses those. FWIW this example doesn't
|
||||||
|
// use them...
|
||||||
|
int threadIndex = 0;
|
||||||
|
int threadCount = 1;
|
||||||
|
ti->ispcFunc(ti->ispcData, threadIndex, threadCount);
|
||||||
|
|
||||||
|
// Signal the event that this task is done
|
||||||
|
int taskNum = ti - &taskInfo[0];
|
||||||
|
events[taskNum]->set();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
ISPCLaunch(void *func, void *data) {
|
||||||
|
if (!initialized) {
|
||||||
|
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a TaskInfo struct for this task
|
||||||
|
EnterCriticalSection(&criticalSection);
|
||||||
|
TaskInfo *ti = &taskInfo[taskOffset++];
|
||||||
|
assert(taskOffset < MAX_TASKS);
|
||||||
|
LeaveCriticalSection(&criticalSection);
|
||||||
|
|
||||||
|
// And pass it on to the Concurrency Runtime...
|
||||||
|
ti->ispcFunc = (TaskFuncType)func;
|
||||||
|
ti->ispcData = data;
|
||||||
|
CurrentScheduler::ScheduleTask(lRunTask, ti);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCSync() {
|
||||||
|
if (!initialized) {
|
||||||
|
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
event::wait_for_multiple(&events[0], taskOffset, true,
|
||||||
|
COOPERATIVE_TIMEOUT_INFINITE);
|
||||||
|
|
||||||
|
for (int i = 0; i < taskOffset; ++i)
|
||||||
|
events[i]->reset();
|
||||||
|
|
||||||
|
taskOffset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
_aligned_free(ptr);
|
||||||
|
}
|
||||||
103
examples/stencil/tasks_gcd.cpp
Normal file
103
examples/stencil/tasks_gcd.cpp
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* A simple task system for ispc programs based on Apple's Grand Central
|
||||||
|
Dispatch. */
|
||||||
|
|
||||||
|
#include <dispatch/dispatch.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static bool initialized = false;
|
||||||
|
static dispatch_queue_t gcdQueue;
|
||||||
|
static dispatch_group_t gcdGroup;
|
||||||
|
|
||||||
|
// ispc expects these functions to have C linkage / not be mangled
|
||||||
|
extern "C" {
|
||||||
|
void ISPCLaunch(void *f, void *data);
|
||||||
|
void ISPCSync();
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TaskInfo {
|
||||||
|
void *func;
|
||||||
|
void *data;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
TasksInit() {
|
||||||
|
gcdQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||||
|
gcdGroup = dispatch_group_create();
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
lRunTask(void *ti) {
|
||||||
|
typedef void (*TaskFuncType)(void *, int, int);
|
||||||
|
TaskInfo *taskInfo = (TaskInfo *)ti;
|
||||||
|
|
||||||
|
TaskFuncType func = (TaskFuncType)(taskInfo->func);
|
||||||
|
|
||||||
|
// FIXME: these are bogus values; may cause bugs in code that depends
|
||||||
|
// on them having unique values in different threads.
|
||||||
|
int threadIndex = 0;
|
||||||
|
int threadCount = 1;
|
||||||
|
// Actually run the task
|
||||||
|
func(taskInfo->data, threadIndex, threadCount);
|
||||||
|
|
||||||
|
// FIXME: taskInfo leaks...
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCLaunch(void *func, void *data) {
|
||||||
|
if (!initialized) {
|
||||||
|
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
TaskInfo *ti = new TaskInfo;
|
||||||
|
ti->func = func;
|
||||||
|
ti->data = data;
|
||||||
|
dispatch_group_async_f(gcdGroup, gcdQueue, ti, lRunTask);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCSync() {
|
||||||
|
if (!initialized) {
|
||||||
|
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all of the tasks in the group to complete before returning
|
||||||
|
dispatch_group_wait(gcdGroup, DISPATCH_TIME_FOREVER);
|
||||||
|
}
|
||||||
295
examples/stencil/tasks_pthreads.cpp
Normal file
295
examples/stencil/tasks_pthreads.cpp
Normal file
@@ -0,0 +1,295 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <semaphore.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/param.h>
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
// ispc expects these functions to have C linkage / not be mangled
|
||||||
|
extern "C" {
|
||||||
|
void ISPCLaunch(void *f, void *data);
|
||||||
|
void ISPCSync();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int nThreads;
|
||||||
|
static pthread_t *threads;
|
||||||
|
static pthread_mutex_t taskQueueMutex;
|
||||||
|
static std::vector<std::pair<void *, void *> > taskQueue;
|
||||||
|
static sem_t *workerSemaphore;
|
||||||
|
static uint32_t numUnfinishedTasks;
|
||||||
|
static pthread_mutex_t tasksRunningConditionMutex;
|
||||||
|
static pthread_cond_t tasksRunningCondition;
|
||||||
|
|
||||||
|
static void *lTaskEntry(void *arg);
|
||||||
|
|
||||||
|
/** Figure out how many CPU cores there are in the system
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
lNumCPUCores() {
|
||||||
|
#if defined(__linux__)
|
||||||
|
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
#else
|
||||||
|
// Mac
|
||||||
|
int mib[2];
|
||||||
|
mib[0] = CTL_HW;
|
||||||
|
size_t length = 2;
|
||||||
|
if (sysctlnametomib("hw.logicalcpu", mib, &length) == -1) {
|
||||||
|
fprintf(stderr, "sysctlnametomib() filed. Guessing 2 cores.");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
assert(length == 2);
|
||||||
|
|
||||||
|
int nCores = 0;
|
||||||
|
size_t size = sizeof(nCores);
|
||||||
|
|
||||||
|
if (sysctl(mib, 2, &nCores, &size, NULL, 0) == -1) {
|
||||||
|
fprintf(stderr, "sysctl() to find number of cores present failed. Guessing 2.");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
return nCores;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TasksInit() {
|
||||||
|
nThreads = lNumCPUCores();
|
||||||
|
|
||||||
|
threads = new pthread_t[nThreads];
|
||||||
|
|
||||||
|
int err;
|
||||||
|
if ((err = pthread_mutex_init(&taskQueueMutex, NULL)) != 0) {
|
||||||
|
fprintf(stderr, "Error creating mutex: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
char name[32];
|
||||||
|
sprintf(name, "mandelbrot.%d", (int)getpid());
|
||||||
|
workerSemaphore = sem_open(name, O_CREAT, S_IRUSR|S_IWUSR, 0);
|
||||||
|
if (!workerSemaphore) {
|
||||||
|
fprintf(stderr, "Error creating semaphore: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((err = pthread_cond_init(&tasksRunningCondition, NULL)) != 0) {
|
||||||
|
fprintf(stderr, "Error creating condition variable: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((err = pthread_mutex_init(&tasksRunningConditionMutex, NULL)) != 0) {
|
||||||
|
fprintf(stderr, "Error creating mutex: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nThreads; ++i) {
|
||||||
|
err = pthread_create(&threads[i], NULL, &lTaskEntry, reinterpret_cast<void *>(i));
|
||||||
|
if (err != 0) {
|
||||||
|
fprintf(stderr, "Error creating pthread %d: %s\n", i, strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
ISPCLaunch(void *f, void *d) {
|
||||||
|
if (threads == NULL) {
|
||||||
|
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Acquire mutex, add task
|
||||||
|
//
|
||||||
|
int err;
|
||||||
|
if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
taskQueue.push_back(std::make_pair(f, d));
|
||||||
|
|
||||||
|
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Update count of number of tasks left to run
|
||||||
|
//
|
||||||
|
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
++numUnfinishedTasks;
|
||||||
|
|
||||||
|
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Post to the worker semaphore to wake up worker threads that are
|
||||||
|
// sleeping waiting for tasks to show up
|
||||||
|
//
|
||||||
|
if ((err = sem_post(workerSemaphore)) != 0) {
|
||||||
|
fprintf(stderr, "Error from sem_post: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void *
|
||||||
|
lTaskEntry(void *arg) {
|
||||||
|
int threadIndex = int(reinterpret_cast<int64_t>(arg));
|
||||||
|
int threadCount = nThreads;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int err;
|
||||||
|
if ((err = sem_wait(workerSemaphore)) != 0) {
|
||||||
|
fprintf(stderr, "Error from sem_wait: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<void *, void *> myTask;
|
||||||
|
//
|
||||||
|
// Acquire mutex, get task
|
||||||
|
//
|
||||||
|
if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (taskQueue.size() == 0) {
|
||||||
|
//
|
||||||
|
// Task queue is empty, go back and wait on the semaphore
|
||||||
|
//
|
||||||
|
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
myTask = taskQueue.back();
|
||||||
|
taskQueue.pop_back();
|
||||||
|
|
||||||
|
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Do work for _myTask_
|
||||||
|
//
|
||||||
|
typedef void (*TaskFunType)(void *, int, int);
|
||||||
|
TaskFunType func = (TaskFunType)myTask.first;
|
||||||
|
func(myTask.second, threadIndex, threadCount);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Decrement the number of unfinished tasks counter
|
||||||
|
//
|
||||||
|
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int unfinished = --numUnfinishedTasks;
|
||||||
|
if (unfinished == 0) {
|
||||||
|
//
|
||||||
|
// Signal the "no more tasks are running" condition if all of
|
||||||
|
// them are done.
|
||||||
|
//
|
||||||
|
int err;
|
||||||
|
if ((err = pthread_cond_signal(&tasksRunningCondition)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_cond_signal: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_exit(NULL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCSync() {
|
||||||
|
if (threads == NULL) {
|
||||||
|
fprintf(stderr, "You must call TasksInit() before launching tasks.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int err;
|
||||||
|
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// As long as there are tasks running, wait on the condition variable;
|
||||||
|
// doing so causes this thread to go to sleep until someone signals on
|
||||||
|
// the tasksRunningCondition condition variable.
|
||||||
|
while (numUnfinishedTasks > 0) {
|
||||||
|
if ((err = pthread_cond_wait(&tasksRunningCondition,
|
||||||
|
&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_cond_wait: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We acquire ownership of the condition variable mutex when the above
|
||||||
|
// pthread_cond_wait returns.
|
||||||
|
// FIXME: is there a lurking issue here if numUnfinishedTasks gets back
|
||||||
|
// to zero by the time we get to ISPCSync() and thence we're trying to
|
||||||
|
// unlock a mutex we don't have a lock on?
|
||||||
|
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||||
|
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user