Add perfbench to examples: a few small microbenchmarks.
This commit is contained in:
@@ -39,9 +39,6 @@ example implementation of this function that counts the number of times the
|
|||||||
callback is made and records some statistics about control flow coherence
|
callback is made and records some statistics about control flow coherence
|
||||||
is provided in the instrument.cpp file.
|
is provided in the instrument.cpp file.
|
||||||
|
|
||||||
*** Note: on Linux, this example currently hits an assertion in LLVM during
|
|
||||||
*** compilation
|
|
||||||
|
|
||||||
|
|
||||||
Deferred
|
Deferred
|
||||||
========
|
========
|
||||||
@@ -110,6 +107,13 @@ This program implements both the Black-Scholes and Binomial options pricing
|
|||||||
models in both ispc and regular serial C++ code.
|
models in both ispc and regular serial C++ code.
|
||||||
|
|
||||||
|
|
||||||
|
Perfbench
|
||||||
|
=========
|
||||||
|
|
||||||
|
This runs a number of microbenchmarks to measure system performance and
|
||||||
|
code generation quality.
|
||||||
|
|
||||||
|
|
||||||
RT
|
RT
|
||||||
==
|
==
|
||||||
|
|
||||||
|
|||||||
7
examples/perfbench/Makefile
Normal file
7
examples/perfbench/Makefile
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
EXAMPLE=perbench
|
||||||
|
CPP_SRC=perfbench.cpp perfbench_serial.cpp
|
||||||
|
ISPC_SRC=perfbench.ispc
|
||||||
|
ISPC_TARGETS=sse2,sse4,avx
|
||||||
|
|
||||||
|
include ../common.mk
|
||||||
108
examples/perfbench/perfbench.cpp
Normal file
108
examples/perfbench/perfbench.cpp
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2012, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
#define NOMINMAX
|
||||||
|
#pragma warning (disable: 4244)
|
||||||
|
#pragma warning (disable: 4305)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "../timing.h"
|
||||||
|
|
||||||
|
#include "perfbench_ispc.h"
|
||||||
|
|
||||||
|
typedef void (FuncType)(float *, int, float *, float *);
|
||||||
|
|
||||||
|
struct PerfTest {
|
||||||
|
FuncType *aFunc;
|
||||||
|
const char *aName;
|
||||||
|
FuncType *bFunc;
|
||||||
|
const char *bName;
|
||||||
|
const char *testName;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern void xyzSumAOS(float *a, int count, float *zeros, float *result);
|
||||||
|
extern void xyzSumSOA(float *a, int count, float *zeros, float *result);
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
lInitData(float *ptr, int count) {
|
||||||
|
for (int i = 0; i < count; ++i)
|
||||||
|
ptr[i] = float(i) / (1024.f * 1024.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PerfTest tests[] = {
|
||||||
|
{ xyzSumAOS, "serial", ispc::xyzSumAOS, "ispc", "AOS vector element sum (with coalescing)" },
|
||||||
|
{ xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" },
|
||||||
|
{ xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" },
|
||||||
|
{ xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" },
|
||||||
|
{ ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" },
|
||||||
|
{ ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" },
|
||||||
|
};
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int count = 3*64*1024;
|
||||||
|
float *a = new float[count];
|
||||||
|
float zeros[32] = { 0 };
|
||||||
|
|
||||||
|
int nTests = sizeof(tests) / sizeof(tests[0]);
|
||||||
|
for (int i = 0; i < nTests; ++i) {
|
||||||
|
lInitData(a, count);
|
||||||
|
reset_and_start_timer();
|
||||||
|
float resultA[3] = { 0, 0, 0 };
|
||||||
|
for (int j = 0; j < 100; ++j)
|
||||||
|
tests[i].aFunc(a, count, zeros, resultA);
|
||||||
|
double aTime = get_elapsed_mcycles();
|
||||||
|
|
||||||
|
lInitData(a, count);
|
||||||
|
reset_and_start_timer();
|
||||||
|
float resultB[3] = { 0, 0, 0 };
|
||||||
|
for (int j = 0; j < 100; ++j)
|
||||||
|
tests[i].bFunc(a, count, zeros, resultB);
|
||||||
|
double bTime = get_elapsed_mcycles();
|
||||||
|
|
||||||
|
printf("%-40s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n",
|
||||||
|
tests[i].testName, aTime, tests[i].aName, bTime, tests[i].bName,
|
||||||
|
aTime/bTime);
|
||||||
|
#if 0
|
||||||
|
printf("\t(%f %f %f) - (%f %f %f)\n", resultSerial[0], resultSerial[1],
|
||||||
|
resultSerial[2], resultISPC[0], resultISPC[1], resultISPC[2]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
170
examples/perfbench/perfbench.ispc
Normal file
170
examples/perfbench/perfbench.ispc
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2012, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export void xyzSumAOS(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
foreach (i = 0 ... count/3) {
|
||||||
|
float x = array[3*i];
|
||||||
|
float y = array[3*i+1];
|
||||||
|
float z = array[3*i+2];
|
||||||
|
|
||||||
|
xsum += x;
|
||||||
|
ysum += y;
|
||||||
|
zsum += z;
|
||||||
|
}
|
||||||
|
result[0] = reduce_add(xsum);
|
||||||
|
result[1] = reduce_add(ysum);
|
||||||
|
result[2] = reduce_add(zsum);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void xyzSumAOSStdlib(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
for (uniform int i = 0; i < 64*1024 /*count/3*/; i += programCount) {
|
||||||
|
float x, y, z;
|
||||||
|
aos_to_soa3(&array[3*i], &x, &y, &z);
|
||||||
|
|
||||||
|
xsum += x;
|
||||||
|
ysum += y;
|
||||||
|
zsum += z;
|
||||||
|
}
|
||||||
|
result[0] = reduce_add(xsum);
|
||||||
|
result[1] = reduce_add(ysum);
|
||||||
|
result[2] = reduce_add(zsum);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void xyzSumAOSNoCoalesce(uniform float array[], uniform int count,
|
||||||
|
uniform float zerosArray[], uniform float result[]) {
|
||||||
|
int zeros = zerosArray[programIndex];
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
foreach (i = 0 ... count/3) {
|
||||||
|
float x = array[3*i+zeros];
|
||||||
|
float y = array[3*i+1+zeros];
|
||||||
|
float z = array[3*i+2+zeros];
|
||||||
|
|
||||||
|
xsum += x;
|
||||||
|
ysum += y;
|
||||||
|
zsum += z;
|
||||||
|
}
|
||||||
|
result[0] = reduce_add(xsum);
|
||||||
|
result[1] = reduce_add(ysum);
|
||||||
|
result[2] = reduce_add(zsum);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void xyzSumSOA(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
uniform float * uniform ap = array;
|
||||||
|
assert(programCount <= 8);
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < count/3; i += 8, ap += 24) {
|
||||||
|
for (uniform int j = 0; j < 8; j += programCount) {
|
||||||
|
float x = ap[j + programIndex];
|
||||||
|
float y = ap[8 + j + programIndex];
|
||||||
|
float z = ap[16 + j + programIndex];
|
||||||
|
|
||||||
|
xsum += x;
|
||||||
|
ysum += y;
|
||||||
|
zsum += z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result[0] = reduce_add(xsum);
|
||||||
|
result[1] = reduce_add(ysum);
|
||||||
|
result[2] = reduce_add(zsum);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void gathers(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
float sum = 0;
|
||||||
|
int zero = zeros[programIndex];
|
||||||
|
foreach (i = 0 ... count)
|
||||||
|
sum += array[i + zero];
|
||||||
|
result[0] = reduce_add(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void loads(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
float sum = 0;
|
||||||
|
foreach (i = 0 ... count)
|
||||||
|
sum += array[i];
|
||||||
|
result[0] = reduce_add(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void scatters(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
int zero = zeros[programIndex];
|
||||||
|
foreach (i = 0 ... count)
|
||||||
|
array[i + zero] = zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void stores(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
int zero = zeros[programIndex];
|
||||||
|
foreach (i = 0 ... count)
|
||||||
|
array[i] = zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void normalizeAOSNoCoalesce(uniform float array[], uniform int count,
|
||||||
|
uniform float zeroArray[]) {
|
||||||
|
float zeros = zeroArray[programIndex];
|
||||||
|
foreach (i = 0 ... count/3) {
|
||||||
|
float x = array[3*i+zeros];
|
||||||
|
float y = array[3*i+1+zeros];
|
||||||
|
float z = array[3*i+2+zeros];
|
||||||
|
|
||||||
|
float l2 = x*x + y*y + z*z;
|
||||||
|
|
||||||
|
array[3*i] /= l2;
|
||||||
|
array[3*i+1] /= l2;
|
||||||
|
array[3*i+2] /= l2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void normalizeSOA(uniform float array[], uniform int count,
|
||||||
|
uniform float zeros[]) {
|
||||||
|
foreach (i = 0 ... count/3) {
|
||||||
|
float x = array[3*i];
|
||||||
|
float y = array[3*i+1];
|
||||||
|
float z = array[3*i+2];
|
||||||
|
|
||||||
|
float l2 = x*x + y*y + z*z;
|
||||||
|
|
||||||
|
array[3*i] /= l2;
|
||||||
|
array[3*i+1] /= l2;
|
||||||
|
array[3*i+2] /= l2;
|
||||||
|
}
|
||||||
|
}
|
||||||
175
examples/perfbench/perfbench.vcxproj
Normal file
175
examples/perfbench/perfbench.vcxproj
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<ProjectGuid>{d923bb7e-a7c8-4850-8fcf-0eb9ce35b4e8}</ProjectGuid>
|
||||||
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
<RootNamespace>perfbench</RootNamespace>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<PrecompiledHeader>
|
||||||
|
</PrecompiledHeader>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<FloatingPointModel>Fast</FloatingPointModel>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<PrecompiledHeader>
|
||||||
|
</PrecompiledHeader>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<FloatingPointModel>Fast</FloatingPointModel>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<PrecompiledHeader>
|
||||||
|
</PrecompiledHeader>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||||
|
<FloatingPointModel>Fast</FloatingPointModel>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<PrecompiledHeader>
|
||||||
|
</PrecompiledHeader>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||||
|
<FloatingPointModel>Fast</FloatingPointModel>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="perfbench.cpp" />
|
||||||
|
<ClCompile Include="perfbench_serial.cpp" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="perfbench.ispc">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
|
||||||
|
</Command>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
|
||||||
|
</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
|
||||||
|
</Command>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
|
||||||
|
</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
||||||
61
examples/perfbench/perfbench_serial.cpp
Normal file
61
examples/perfbench/perfbench_serial.cpp
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2012, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
xyzSumAOS(float *a, int count, float *zeros, float *result) {
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
for (int i = 0; i < count; i += 3) {
|
||||||
|
xsum += a[i];
|
||||||
|
ysum += a[i+1];
|
||||||
|
zsum += a[i+2];
|
||||||
|
}
|
||||||
|
result[0] = xsum;
|
||||||
|
result[1] = ysum;
|
||||||
|
result[2] = zsum;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
xyzSumSOA(float *a, int count, float *zeros, float *result) {
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
for (int i = 0; i < count/3; ++i) {
|
||||||
|
float *p = a + (i >> 3) * 24 + (i & 7);
|
||||||
|
xsum += p[0];
|
||||||
|
ysum += p[8];
|
||||||
|
zsum += p[16];
|
||||||
|
}
|
||||||
|
result[0] = xsum;
|
||||||
|
result[1] = ysum;
|
||||||
|
result[2] = zsum;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user