diff --git a/examples/README.txt b/examples/README.txt index 7dcae8bf..25aa3a21 100644 --- a/examples/README.txt +++ b/examples/README.txt @@ -39,9 +39,6 @@ example implementation of this function that counts the number of times the callback is made and records some statistics about control flow coherence is provided in the instrument.cpp file. -*** Note: on Linux, this example currently hits an assertion in LLVM during -*** compilation - Deferred ======== @@ -110,6 +107,13 @@ This program implements both the Black-Scholes and Binomial options pricing models in both ispc and regular serial C++ code. +Perfbench +========= + +This runs a number of microbenchmarks to measure system performance and +code generation quality. + + RT == diff --git a/examples/perfbench/Makefile b/examples/perfbench/Makefile new file mode 100644 index 00000000..43684c71 --- /dev/null +++ b/examples/perfbench/Makefile @@ -0,0 +1,7 @@ + +EXAMPLE=perbench +CPP_SRC=perfbench.cpp perfbench_serial.cpp +ISPC_SRC=perfbench.ispc +ISPC_TARGETS=sse2,sse4,avx + +include ../common.mk diff --git a/examples/perfbench/perfbench.cpp b/examples/perfbench/perfbench.cpp new file mode 100644 index 00000000..04e72bd9 --- /dev/null +++ b/examples/perfbench/perfbench.cpp @@ -0,0 +1,108 @@ +/* + Copyright (c) 2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#define NOMINMAX +#pragma warning (disable: 4244) +#pragma warning (disable: 4305) +#endif + +#include +#include +#include "../timing.h" + +#include "perfbench_ispc.h" + +typedef void (FuncType)(float *, int, float *, float *); + +struct PerfTest { + FuncType *aFunc; + const char *aName; + FuncType *bFunc; + const char *bName; + const char *testName; +}; + +extern void xyzSumAOS(float *a, int count, float *zeros, float *result); +extern void xyzSumSOA(float *a, int count, float *zeros, float *result); + + +static void +lInitData(float *ptr, int count) { + for (int i = 0; i < count; ++i) + ptr[i] = float(i) / (1024.f * 1024.f); +} + +static PerfTest tests[] = { + { xyzSumAOS, "serial", ispc::xyzSumAOS, "ispc", "AOS vector element sum (with coalescing)" }, + { xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" }, + { xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" }, + { xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" }, + { ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" }, + { ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" }, +}; + +int main() { + int count = 3*64*1024; + float *a = new float[count]; + float zeros[32] = { 0 }; + + int nTests = sizeof(tests) / sizeof(tests[0]); + for (int i = 0; i < nTests; ++i) { + lInitData(a, count); + reset_and_start_timer(); + float resultA[3] = { 0, 0, 0 }; + for (int j = 0; j < 100; ++j) + tests[i].aFunc(a, count, zeros, resultA); + double aTime = get_elapsed_mcycles(); + + lInitData(a, count); + reset_and_start_timer(); + float resultB[3] = { 0, 0, 0 }; + for (int j = 0; j < 100; ++j) + tests[i].bFunc(a, count, zeros, resultB); + double bTime = get_elapsed_mcycles(); + + printf("%-40s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n", + tests[i].testName, aTime, tests[i].aName, bTime, tests[i].bName, + aTime/bTime); +#if 0 + printf("\t(%f %f %f) - (%f %f %f)\n", resultSerial[0], resultSerial[1], + resultSerial[2], resultISPC[0], resultISPC[1], resultISPC[2]); +#endif + } + + return 0; +} + diff --git a/examples/perfbench/perfbench.ispc b/examples/perfbench/perfbench.ispc new file mode 100644 index 00000000..38fe6cee --- /dev/null +++ b/examples/perfbench/perfbench.ispc @@ -0,0 +1,170 @@ +/* + Copyright (c) 2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +export void xyzSumAOS(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float xsum = 0, ysum = 0, zsum = 0; + foreach (i = 0 ... count/3) { + float x = array[3*i]; + float y = array[3*i+1]; + float z = array[3*i+2]; + + xsum += x; + ysum += y; + zsum += z; + } + result[0] = reduce_add(xsum); + result[1] = reduce_add(ysum); + result[2] = reduce_add(zsum); +} + +export void xyzSumAOSStdlib(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float xsum = 0, ysum = 0, zsum = 0; + for (uniform int i = 0; i < 64*1024 /*count/3*/; i += programCount) { + float x, y, z; + aos_to_soa3(&array[3*i], &x, &y, &z); + + xsum += x; + ysum += y; + zsum += z; + } + result[0] = reduce_add(xsum); + result[1] = reduce_add(ysum); + result[2] = reduce_add(zsum); +} + +export void xyzSumAOSNoCoalesce(uniform float array[], uniform int count, + uniform float zerosArray[], uniform float result[]) { + int zeros = zerosArray[programIndex]; + float xsum = 0, ysum = 0, zsum = 0; + foreach (i = 0 ... count/3) { + float x = array[3*i+zeros]; + float y = array[3*i+1+zeros]; + float z = array[3*i+2+zeros]; + + xsum += x; + ysum += y; + zsum += z; + } + result[0] = reduce_add(xsum); + result[1] = reduce_add(ysum); + result[2] = reduce_add(zsum); +} + +export void xyzSumSOA(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float xsum = 0, ysum = 0, zsum = 0; + uniform float * uniform ap = array; + assert(programCount <= 8); + + for (uniform int i = 0; i < count/3; i += 8, ap += 24) { + for (uniform int j = 0; j < 8; j += programCount) { + float x = ap[j + programIndex]; + float y = ap[8 + j + programIndex]; + float z = ap[16 + j + programIndex]; + + xsum += x; + ysum += y; + zsum += z; + } + } + result[0] = reduce_add(xsum); + result[1] = reduce_add(ysum); + result[2] = reduce_add(zsum); +} + +export void gathers(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float sum = 0; + int zero = zeros[programIndex]; + foreach (i = 0 ... count) + sum += array[i + zero]; + result[0] = reduce_add(sum); +} + + +export void loads(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float sum = 0; + foreach (i = 0 ... count) + sum += array[i]; + result[0] = reduce_add(sum); +} + + +export void scatters(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + int zero = zeros[programIndex]; + foreach (i = 0 ... count) + array[i + zero] = zero; +} + + +export void stores(uniform float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + int zero = zeros[programIndex]; + foreach (i = 0 ... count) + array[i] = zero; +} + +export void normalizeAOSNoCoalesce(uniform float array[], uniform int count, + uniform float zeroArray[]) { + float zeros = zeroArray[programIndex]; + foreach (i = 0 ... count/3) { + float x = array[3*i+zeros]; + float y = array[3*i+1+zeros]; + float z = array[3*i+2+zeros]; + + float l2 = x*x + y*y + z*z; + + array[3*i] /= l2; + array[3*i+1] /= l2; + array[3*i+2] /= l2; + } +} + +export void normalizeSOA(uniform float array[], uniform int count, + uniform float zeros[]) { + foreach (i = 0 ... count/3) { + float x = array[3*i]; + float y = array[3*i+1]; + float z = array[3*i+2]; + + float l2 = x*x + y*y + z*z; + + array[3*i] /= l2; + array[3*i+1] /= l2; + array[3*i+2] /= l2; + } +} diff --git a/examples/perfbench/perfbench.vcxproj b/examples/perfbench/perfbench.vcxproj new file mode 100644 index 00000000..31974ac7 --- /dev/null +++ b/examples/perfbench/perfbench.vcxproj @@ -0,0 +1,175 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {d923bb7e-a7c8-4850-8fcf-0eb9ce35b4e8} + Win32Proj + perfbench + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + + + + + Document + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/perfbench/perfbench_serial.cpp b/examples/perfbench/perfbench_serial.cpp new file mode 100644 index 00000000..dfd8e370 --- /dev/null +++ b/examples/perfbench/perfbench_serial.cpp @@ -0,0 +1,61 @@ +/* + Copyright (c) 2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +void +xyzSumAOS(float *a, int count, float *zeros, float *result) { + float xsum = 0, ysum = 0, zsum = 0; + for (int i = 0; i < count; i += 3) { + xsum += a[i]; + ysum += a[i+1]; + zsum += a[i+2]; + } + result[0] = xsum; + result[1] = ysum; + result[2] = zsum; +} + +void +xyzSumSOA(float *a, int count, float *zeros, float *result) { + float xsum = 0, ysum = 0, zsum = 0; + for (int i = 0; i < count/3; ++i) { + float *p = a + (i >> 3) * 24 + (i & 7); + xsum += p[0]; + ysum += p[8]; + zsum += p[16]; + } + result[0] = xsum; + result[1] = ysum; + result[2] = zsum; +}