From 6a52717184459d29d3b1fc25368d5ec67598f3f9 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Fri, 21 Feb 2014 09:07:45 +0100 Subject: [PATCH] options example --- examples/portable/options/.gitignore | 1 + examples/portable/options/Makefile_cpu | 8 + examples/portable/options/Makefile_ptx | 14 + examples/portable/options/options.cpp | 120 ++++++++ examples/portable/options/options.cu | 334 +++++++++++++++++++++++ examples/portable/options/options.ispc | 211 ++++++++++++++ examples/portable/options/options_defs.h | 40 +++ 7 files changed, 728 insertions(+) create mode 100644 examples/portable/options/.gitignore create mode 100644 examples/portable/options/Makefile_cpu create mode 100644 examples/portable/options/Makefile_ptx create mode 100644 examples/portable/options/options.cpp create mode 100644 examples/portable/options/options.cu create mode 100644 examples/portable/options/options.ispc create mode 100644 examples/portable/options/options_defs.h diff --git a/examples/portable/options/.gitignore b/examples/portable/options/.gitignore new file mode 100644 index 00000000..55bdd069 --- /dev/null +++ b/examples/portable/options/.gitignore @@ -0,0 +1 @@ +options diff --git a/examples/portable/options/Makefile_cpu b/examples/portable/options/Makefile_cpu new file mode 100644 index 00000000..dc4f8108 --- /dev/null +++ b/examples/portable/options/Makefile_cpu @@ -0,0 +1,8 @@ + +EXAMPLE=options +CPP_SRC=options.cpp +ISPC_SRC=options.ispc +ISPC_IA_TARGETS=avx1-i32x16 +ISPC_ARM_TARGETS=neon + +include ../common_cpu.mk diff --git a/examples/portable/options/Makefile_ptx b/examples/portable/options/Makefile_ptx new file mode 100644 index 00000000..1065eb92 --- /dev/null +++ b/examples/portable/options/Makefile_ptx @@ -0,0 +1,14 @@ +PROG=options +ISPC_SRC=options.ispc +CU_SRC=options.cu +CXX_SRC=options.cpp +PTXCC_REGMAX=128 + + +#LLVM_GPU=1 +NVVM_GPU=1 + +include ../common_ptx.mk + + + diff --git a/examples/portable/options/options.cpp b/examples/portable/options/options.cpp new file mode 100644 index 00000000..de0fb2ce --- /dev/null +++ b/examples/portable/options/options.cpp @@ -0,0 +1,120 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define NOMINMAX + +#include +#include +#include +#include +#include +#include +using std::max; + +#include "options_defs.h" +#include "timing.h" +#include "ispc_malloc.h" + +#include "options_ispc.h" +using namespace ispc; + +static void usage() { + printf("usage: options [--count=]\n"); +} + + +int main(int argc, char *argv[]) { + int nOptions = 128*1024; + + for (int i = 1; i < argc; ++i) { + if (strncmp(argv[i], "--count=", 8) == 0) { + nOptions = atoi(argv[i] + 8); + if (nOptions <= 0) { + usage(); + exit(1); + } + } + } + + float *S = new float[nOptions]; + float *X = new float[nOptions]; + float *T = new float[nOptions]; + float *r = new float[nOptions]; + float *v = new float[nOptions]; + float *result = new float[nOptions]; + + for (int i = 0; i < nOptions; ++i) { + S[i] = 100; // stock price + X[i] = 98; // option strike price + T[i] = 2; // time (years) + r[i] = .02; // risk-free interest rate + v[i] = 5; // volatility + } + + double sum; + + // + // Binomial options pricing model, ispc implementation, tasks + // + double binomial_tasks = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + binomial_put_ispc_tasks(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_msec(); + binomial_tasks = std::min(binomial_tasks, dt); + } + sum = 0.; + for (int i = 0; i < nOptions; ++i) + sum += result[i]; + printf("[binomial ispc, tasks]:\t\t[%.3f] msec (avg %f)\n", + binomial_tasks, sum / nOptions); + + // + // Black-Scholes options pricing model, ispc implementation, tasks + // + double bs_ispc_tasks = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + black_scholes_ispc_tasks(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_msec(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) + sum += result[i]; + bs_ispc_tasks = std::min(bs_ispc_tasks, dt); + } + printf("[black-scholes ispc, tasks]:\t[%.3f] msec (avg %f)\n", + bs_ispc_tasks, sum / nOptions); + + + return 0; +} diff --git a/examples/portable/options/options.cu b/examples/portable/options/options.cu new file mode 100644 index 00000000..d588096e --- /dev/null +++ b/examples/portable/options/options.cu @@ -0,0 +1,334 @@ +// -*- mode: c++ -*- +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "options_defs.h" +#include "cuda_helpers.cuh" + +__device__ static inline void __range_reduce_log(float input, float * reduced, + int * exponent) { + int int_version = __float_as_int(input); //intbits(input); + // single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM + // exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000 + // 0x7 0xF 0x8 0x0 0x0 0x0 0x0 0x0 + // non-exponent = 1000 0000 0111 1111 1111 1111 1111 1111 + // = 0x8 0x0 0x7 0xF 0xF 0xF 0xF 0xF + + //const int exponent_mask(0x7F800000) + const int nonexponent_mask = 0x807FFFFF; + + // We want the reduced version to have an exponent of -1 which is -1 + 127 after biasing or 126 + const int exponent_neg1 = (126l << 23); + // NOTE(boulos): We don't need to mask anything out since we know + // the sign bit has to be 0. If it's 1, we need to return infinity/nan + // anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN). + int biased_exponent = int_version >> 23; // This number is [0, 255] but it means [-127, 128] + + int offset_exponent = biased_exponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2 + *exponent = offset_exponent - 127; // get the real value + + // Blend the offset_exponent with the original input (do this in + // int for now, until I decide if float can have & and ¬) + int blended = (int_version & nonexponent_mask) | (exponent_neg1); + *reduced = __int_as_float(blended); //floatbits(blended); +} + + +__device__ static inline float __Logf(const float x_full) +{ +#if 1 + return __logf(x_full); +#else + float reduced; + int exponent; + + const int NaN_bits = 0x7fc00000; + const int Neg_Inf_bits = 0xFF800000; + const float NaN = __int_as_float(NaN_bits); //floatbits(NaN_bits); + const float neg_inf = __int_as_float(Neg_Inf_bits); //floatbits(Neg_Inf_bits); + bool use_nan = x_full < 0.f; + bool use_inf = x_full == 0.f; + bool exceptional = use_nan || use_inf; + const float one = 1.0f; + + float patched = exceptional ? one : x_full; + __range_reduce_log(patched, &reduced, &exponent); + + const float ln2 = 0.693147182464599609375f; + + float x1 = one - reduced; + const float c1 = 0.50000095367431640625f; + const float c2 = 0.33326041698455810546875f; + const float c3 = 0.2519190013408660888671875f; + const float c4 = 0.17541764676570892333984375f; + const float c5 = 0.3424419462680816650390625f; + const float c6 = -0.599632322788238525390625f; + const float c7 = +1.98442304134368896484375f; + const float c8 = -2.4899270534515380859375f; + const float c9 = +1.7491014003753662109375f; + + float result = x1 * c9 + c8; + result = x1 * result + c7; + result = x1 * result + c6; + result = x1 * result + c5; + result = x1 * result + c4; + result = x1 * result + c3; + result = x1 * result + c2; + result = x1 * result + c1; + result = x1 * result + one; + + // Equation was for -(ln(red)/(1-red)) + result *= -x1; + result += (float)(exponent) * ln2; + + return exceptional ? (use_nan ? NaN : neg_inf) : result; +#endif +} + +__device__ static inline float __Expf(const float x_full) +{ +#if 1 + return __expf(x_full); +#else + const float ln2_part1 = 0.6931457519f; + const float ln2_part2 = 1.4286067653e-6f; + const float one_over_ln2 = 1.44269502162933349609375f; + + float scaled = x_full * one_over_ln2; + float k_real = floor(scaled); + int k = (int)k_real; + + // Reduced range version of x + float x = x_full - k_real * ln2_part1; + x -= k_real * ln2_part2; + + // These coefficients are for e^x in [0, ln(2)] + const float one = 1.f; + const float c2 = 0.4999999105930328369140625f; + const float c3 = 0.166668415069580078125f; + const float c4 = 4.16539050638675689697265625e-2f; + const float c5 = 8.378830738365650177001953125e-3f; + const float c6 = 1.304379315115511417388916015625e-3f; + const float c7 = 2.7555381529964506626129150390625e-4f; + + float result = x * c7 + c6; + result = x * result + c5; + result = x * result + c4; + result = x * result + c3; + result = x * result + c2; + result = x * result + one; + result = x * result + one; + + // Compute 2^k (should differ for float and double, but I'll avoid + // it for now and just do floats) + const int fpbias = 127; + int biased_n = k + fpbias; + bool overflow = k > fpbias; + // Minimum exponent is -126, so if k is <= -127 (k + 127 <= 0) + // we've got underflow. -127 * ln(2) -> -88.02. So the most + // negative float input that doesn't result in zero is like -88. + bool underflow = (biased_n <= 0); + const int InfBits = 0x7f800000; + biased_n <<= 23; + // Reinterpret this thing as float + float two_to_the_n = __int_as_float(biased_n); //floatbits(biased_n); + // Handle both doubles and floats (hopefully eliding the copy for float) + float elemtype_2n = two_to_the_n; + result *= elemtype_2n; +// result = overflow ? floatbits(InfBits) : result; + result = overflow ? __int_as_float(InfBits) : result; + result = underflow ? 0.0f : result; + return result; +#endif +} + +// Cumulative normal distribution function +// +__device__ +static inline float +CND(float X) { + float L = fabsf(X); + + float k = 1.0f / (1.0f + 0.2316419f * L); + float k2 = k*k; + float k3 = k2*k; + float k4 = k2*k2; + float k5 = k3*k2; + + const float invSqrt2Pi = 0.39894228040f; + float w = (0.31938153f * k - 0.356563782f * k2 + 1.781477937f * k3 + + -1.821255978f * k4 + 1.330274429f * k5); + w *= invSqrt2Pi * __Expf(-L * L * .5f); + + if (X > 0.f) + w = 1.0f - w; + return w; +} + +__global__ +void bs_task( float Sa[], float Xa[], float Ta[], + float ra[], float va[], + float result[], int count) { + if (taskIndex >= taskCount) return; + int first = taskIndex * (count/taskCount); + int last = min(count, (int)((taskIndex+1) * (count/taskCount))); + + for (int i = programIndex + first; i < last; i += programCount) + if (i < last) + { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + + float d1 = (__Logf(S/X) + (r + v * v * .5f) * T) / (v * sqrtf(T)); + float d2 = d1 - v * sqrtf(T); + + result[i] = S * CND(d1) - X * __Expf(-r * T) * CND(d2); + } +} + +extern "C" +__global__ void +black_scholes_ispc_tasks___export( float Sa[], float Xa[], float Ta[], + float ra[], float va[], + float result[], int count) { + int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384); + launch(nTasks,1,1,bs_task) + (Sa, Xa, Ta, ra, va, result, count); + cudaDeviceSynchronize(); +} +extern "C" +__host__ void +black_scholes_ispc_tasks( float Sa[], float Xa[], float Ta[], + float ra[], float va[], + float result[], int count) { + black_scholes_ispc_tasks___export<<<1,32>>>(Sa,Xa,Ta,ra,va,result,count); + cudaDeviceSynchronize(); +} + +/********/ + + +template +struct loop +{ + __device__ static void op1(float V[], const float u, const float X, const float S) + { + const int j = NBEG; + float upow = powf(u, (float)(2*j-BINOMIAL_NUM)); + V[j] = max(0.0f, X - S * upow); + loop::op1(V,u,X,S); + } + __device__ static void op2(float V[], const float Pu, const float disc) + { + const int j = NBEG; +#pragma unroll + for ( int k = 0; k < j; ++k) + V[k] = ((1.0f - Pu) * V[k] + Pu * V[k+ 1]) / disc; + loop::op2(V, Pu,disc); + } +}; + +template +struct loop +{ + __device__ static void op1(float V[], const float u, const float X, const float S) {} + __device__ static void op2(float V[], const float Pu, const float disc) {} +}; + +__device__ +static inline float +binomial_put(float S, float X, float T, float r, float v) +{ + + float V[BINOMIAL_NUM]; + + float dt = T / BINOMIAL_NUM; + float u = exp(v * sqrt(dt)); + float d = 1.f / u; + float disc = exp(r * dt); + float Pu = (disc - d) / (u - d); + +#if 0 /* slow */ + for ( int j = 0; j < BINOMIAL_NUM; ++j) { + float upow = powf(u, (float)(2*j-BINOMIAL_NUM)); + V[j] = max(0.0f, X - S * upow); + } + for ( int j = BINOMIAL_NUM-1; j >= 0; --j) + for ( int k = 0; k < j; ++k) + V[k] = ((1.0f - Pu) * V[k] + Pu * V[k+ 1]) / disc; +#else /* with loop unrolling, stores resutls in registers */ + loop<0,BINOMIAL_NUM,1>::op1(V,u,X,S); + loop::op2(V, Pu, disc); +#endif + return V[0]; +} + + + +__global__ void +binomial_task( float Sa[], float Xa[], + float Ta[], float ra[], + float va[], float result[], + int count) +{ + int first = taskIndex * (count/taskCount); + int last = min(count, (int)((taskIndex+1) * (count/taskCount))); + + for (int i = programIndex + first; i < last; i += programCount) + if (i < last) + { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + result[i] = binomial_put(S, X, T, r, v); + } +} + + +extern "C" __global__ void +binomial_put_ispc_tasks___export( float Sa[], float Xa[], + float Ta[], float ra[], + float va[], float result[], + int count) { + int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384); + launch(nTasks,1,1,binomial_task) + (Sa, Xa, Ta, ra, va, result, count); + cudaDeviceSynchronize(); +} +extern "C" +__host__ void +binomial_put_ispc_tasks( float Sa[], float Xa[], float Ta[], + float ra[], float va[], + float result[], int count) { + + cudaDeviceSetCacheConfig (cudaFuncCachePreferL1); + binomial_put_ispc_tasks___export<<<1,32>>>(Sa,Xa,Ta,ra,va,result,count); + cudaDeviceSynchronize(); +} diff --git a/examples/portable/options/options.ispc b/examples/portable/options/options.ispc new file mode 100644 index 00000000..0bc2288a --- /dev/null +++ b/examples/portable/options/options.ispc @@ -0,0 +1,211 @@ +// -*- mode: c++ -*- +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "options_defs.h" + +// Cumulative normal distribution function +static inline float +CND(float X) { + float L = abs(X); + + float k = 1.0 / (1.0 + 0.2316419 * L); + float k2 = k*k; + float k3 = k2*k; + float k4 = k2*k2; + float k5 = k3*k2; + + const float invSqrt2Pi = 0.39894228040f; + float w = (0.31938153f * k - 0.356563782f * k2 + 1.781477937f * k3 + + -1.821255978f * k4 + 1.330274429f * k5); + w *= invSqrt2Pi * exp(-L * L * .5f); + + if (X > 0.f) + w = 1.0 - w; + return w; +} + +task void +bs_task(uniform float Sa[], uniform float Xa[], uniform float Ta[], + uniform float ra[], uniform float va[], + uniform float result[], uniform int count) { + uniform int first = taskIndex * (count/taskCount); + uniform int last = min(count, (int)((taskIndex+1) * (count/taskCount))); + + foreach (i = first ... last) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + + float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T)); + float d2 = d1 - v * sqrt(T); + + result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2); + } +} + +export void +black_scholes_ispc_tasks(uniform float Sa[], uniform float Xa[], uniform float Ta[], + uniform float ra[], uniform float va[], + uniform float result[], uniform int count) { + uniform int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384); + launch[nTasks] bs_task(Sa, Xa, Ta, ra, va, result, count); +} + +/********/ + + +export void +black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], + uniform float ra[], uniform float va[], + uniform float result[], uniform int count) { + foreach (i = 0 ... count) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + + float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T)); + float d2 = d1 - v * sqrt(T); + + result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2); + } +} + + +static inline float +binomial_put(float S, float X, float T, float r, float v) { + float V[BINOMIAL_NUM]; + + float dt = T / BINOMIAL_NUM; + float u = exp(v * sqrt(dt)); + float d = 1. / u; + float disc = exp(r * dt); + float Pu = (disc - d) / (u - d); + +#ifndef __NVPTX__ + + for (uniform int j = 0; j < BINOMIAL_NUM; ++j) { + float upow = pow(u, (float)(2*j-BINOMIAL_NUM)); + V[j] = max(0., X - S * upow); + } + for (uniform int j = BINOMIAL_NUM-1; j >= 0; --j) + for (uniform int k = 0; k < j; ++k) + V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc; + +#else + + /* loop unrolling helps NVVM to place V -> registers therefore boosting performance */ + /* takes looong time to compile... */ +#if BINOMIAL_NUM != 64 +#error "Cannot unroll. Please use generic version above" +#endif + + // with PTX target unroll loops which will store data in registers.. + + /* first loop */ + +#define OP(j) { \ + float upow = pow(u, (float)(2*(j)-BINOMIAL_NUM)); \ + V[j] = max(0., X - S * upow); } +#define OP10(k) \ + OP(k+0); OP(k+1); OP(k+2); OP(k+3); OP(k+4) \ + OP(k+5); OP(k+6); OP(k+7); OP(k+8); OP(k+9); + OP10(0) + OP10(10) + OP10(20) + OP10(30) + OP10(40) + OP10(50) + OP(60) + OP(61) + OP(62) + OP(63) +#undef OP10 +#undef OP + + /* second loop */ + +#define OP(j) {\ + for (uniform int k = 0; k < (j); ++k) \ + V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc; } +#define OP10(k) \ + OP(k+9); OP(k+8); OP(k+7); OP(k+6); OP(k+5); \ + OP(k+4); OP(k+3); OP(k+2); OP(k+1); OP(k+0); + OP(63) + OP(62) + OP(61) + OP(60) + OP10(50) + OP10(40) + OP10(30) + OP10(20) + OP10(10) + OP10(0) +#undef OP10 +#undef OP + +#endif + return V[0]; +} + + +export void +binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], + uniform float ra[], uniform float va[], + uniform float result[], uniform int count) { + foreach (i = 0 ... count) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + result[i] = binomial_put(S, X, T, r, v); + } +} + + +task void +binomial_task(uniform float Sa[], uniform float Xa[], + uniform float Ta[], uniform float ra[], + uniform float va[], uniform float result[], + uniform int count) { + uniform int first = taskIndex * (count/taskCount); + uniform int last = min(count, (int)((taskIndex+1) * (count/taskCount))); + + foreach (i = first ... last) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + result[i] = binomial_put(S, X, T, r, v); + } +} + + +export void +binomial_put_ispc_tasks(uniform float Sa[], uniform float Xa[], + uniform float Ta[], uniform float ra[], + uniform float va[], uniform float result[], + uniform int count) { + uniform int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384); + launch[nTasks] binomial_task(Sa, Xa, Ta, ra, va, result, count); +} diff --git a/examples/portable/options/options_defs.h b/examples/portable/options/options_defs.h new file mode 100644 index 00000000..ee1450c1 --- /dev/null +++ b/examples/portable/options/options_defs.h @@ -0,0 +1,40 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OPTIONS_DEFS_H +#define OPTIONS_DEFS_H 1 + +#define BINOMIAL_NUM 64 + + +#endif // OPTIONS_DEFS_H