added cuda versino
This commit is contained in:
55
examples_cuda/options/Makefile_gpu
Normal file
55
examples_cuda/options/Makefile_gpu
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
PROG=options_cu
|
||||||
|
ISPC_SRC=options.ispc
|
||||||
|
CXX_SRC=options_cu.cpp
|
||||||
|
|
||||||
|
CXX=g++
|
||||||
|
CXXFLAGS=-O3 -I$(CUDATK)/include
|
||||||
|
LD=g++
|
||||||
|
LDFLAGS=-lcuda
|
||||||
|
|
||||||
|
ISPC=ispc
|
||||||
|
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||||
|
|
||||||
|
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||||
|
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||||
|
PTXGEN = $(HOME)/ptxgen
|
||||||
|
PTXGEN += -opt=3
|
||||||
|
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||||
|
|
||||||
|
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||||
|
|
||||||
|
.SUFFIXES: .bc .o .ptx .cu _ispc_nvptx64.bc
|
||||||
|
|
||||||
|
|
||||||
|
ISPC_OBJ=$(ISPC_SRC:%.ispc=%_ispc.o)
|
||||||
|
ISPC_BC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.bc)
|
||||||
|
PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx)
|
||||||
|
CXX_OBJ=$(CXX_SRC:%.cpp=%.o)
|
||||||
|
|
||||||
|
all: $(PROG)
|
||||||
|
|
||||||
|
|
||||||
|
$(CXX_OBJ) : kernel.ptx
|
||||||
|
$(PROG): $(CXX_OBJ) kernel.ptx
|
||||||
|
/bin/cp kernel.ptx __kernels.ptx
|
||||||
|
$(LD) -o $@ $(CXX_OBJ) $(LDFLAGS)
|
||||||
|
|
||||||
|
%.o: %.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
|
||||||
|
%_ispc_nvptx64.bc: %.ispc
|
||||||
|
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc_nvptx64.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
|
||||||
|
|
||||||
|
%.ptx: %.bc
|
||||||
|
$(LLVM32DIS) $<
|
||||||
|
$(PTXGEN) `basename $< .bc`.ll > $@
|
||||||
|
|
||||||
|
kernel.ptx: $(PTXSRC)
|
||||||
|
cat $^ > kernel.ptx
|
||||||
|
|
||||||
|
clean:
|
||||||
|
/bin/rm -rf *.ptx *.bc *.ll $(PROG)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
150
examples_cuda/options/options.cu
Normal file
150
examples_cuda/options/options.cu
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
// -*- mode: c++ -*-
|
||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "options_defs.h"
|
||||||
|
|
||||||
|
#define programCount 32
|
||||||
|
#define programIndex (threadIdx.x & 31)
|
||||||
|
#define taskIndex (blockIdx.x*4 + (threadIdx.x >> 5))
|
||||||
|
#define taskCount (gridDim.x*4)
|
||||||
|
#define warpIdx (threadIdx.x >> 5)
|
||||||
|
|
||||||
|
// Cumulative normal distribution function
|
||||||
|
//
|
||||||
|
__device__
|
||||||
|
static inline float
|
||||||
|
CND(float X) {
|
||||||
|
float L = abs(X);
|
||||||
|
|
||||||
|
float k = 1.0 / (1.0 + 0.2316419 * L);
|
||||||
|
float k2 = k*k;
|
||||||
|
float k3 = k2*k;
|
||||||
|
float k4 = k2*k2;
|
||||||
|
float k5 = k3*k2;
|
||||||
|
|
||||||
|
const float invSqrt2Pi = 0.39894228040f;
|
||||||
|
float w = (0.31938153f * k - 0.356563782f * k2 + 1.781477937f * k3 +
|
||||||
|
-1.821255978f * k4 + 1.330274429f * k5);
|
||||||
|
w *= invSqrt2Pi * exp(-L * L * .5f);
|
||||||
|
|
||||||
|
if (X > 0.f)
|
||||||
|
w = 1.0 - w;
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__
|
||||||
|
void bs_task( float Sa[], float Xa[], float Ta[],
|
||||||
|
float ra[], float va[],
|
||||||
|
float result[], int count) {
|
||||||
|
int first = taskIndex * (count/taskCount);
|
||||||
|
int last = min(count, (int)((taskIndex+1) * (count/taskCount)));
|
||||||
|
|
||||||
|
for (int i = programIndex + first; i < last; i += programCount)
|
||||||
|
if (i < last)
|
||||||
|
{
|
||||||
|
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||||
|
|
||||||
|
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
|
||||||
|
float d2 = d1 - v * sqrt(T);
|
||||||
|
|
||||||
|
result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
__global__ void
|
||||||
|
black_scholes_ispc_tasks( float Sa[], float Xa[], float Ta[],
|
||||||
|
float ra[], float va[],
|
||||||
|
float result[], int count) {
|
||||||
|
int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384);
|
||||||
|
bs_task<<<nTasks/4,128>>>(Sa, Xa, Ta, ra, va, result, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/********/
|
||||||
|
|
||||||
|
|
||||||
|
__device__
|
||||||
|
static inline float
|
||||||
|
binomial_put(float S, float X, float T, float r, float v) {
|
||||||
|
float V[BINOMIAL_NUM];
|
||||||
|
|
||||||
|
float dt = T / BINOMIAL_NUM;
|
||||||
|
float u = exp(v * sqrt(dt));
|
||||||
|
float d = 1. / u;
|
||||||
|
float disc = exp(r * dt);
|
||||||
|
float Pu = (disc - d) / (u - d);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for ( int j = 0; j < BINOMIAL_NUM; ++j) {
|
||||||
|
float upow = pow(u, (float)(2*j-BINOMIAL_NUM));
|
||||||
|
V[j] = max(0., X - S * upow);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for ( int j = BINOMIAL_NUM-1; j >= 0; --j)
|
||||||
|
#pragma unroll
|
||||||
|
for ( int k = 0; k < j; ++k)
|
||||||
|
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
||||||
|
return V[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void
|
||||||
|
binomial_task( float Sa[], float Xa[],
|
||||||
|
float Ta[], float ra[],
|
||||||
|
float va[], float result[],
|
||||||
|
int count) {
|
||||||
|
int first = taskIndex * (count/taskCount);
|
||||||
|
int last = min(count, (int)((taskIndex+1) * (count/taskCount)));
|
||||||
|
|
||||||
|
for (int i = programIndex + first; i < last; i += programCount)
|
||||||
|
if (i < last)
|
||||||
|
{
|
||||||
|
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||||
|
result[i] = binomial_put(S, X, T, r, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" __global__ void
|
||||||
|
binomial_put_ispc_tasks( float Sa[], float Xa[],
|
||||||
|
float Ta[], float ra[],
|
||||||
|
float va[], float result[],
|
||||||
|
int count) {
|
||||||
|
int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384);
|
||||||
|
if (programIndex == 0)
|
||||||
|
binomial_task<<<nTasks/4,128>>>(Sa, Xa, Ta, ra, va, result, count);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
184
examples_cuda/options/options_cu.cpp
Normal file
184
examples_cuda/options/options_cu.cpp
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define NOMINMAX
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <algorithm>
|
||||||
|
using std::max;
|
||||||
|
|
||||||
|
#include "options_defs.h"
|
||||||
|
#include "../timing.h"
|
||||||
|
|
||||||
|
#include "options_ispc.h"
|
||||||
|
using namespace ispc;
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include "../cuda_ispc.h"
|
||||||
|
|
||||||
|
static void usage() {
|
||||||
|
printf("usage: options [--count=<num options>]\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
int nOptions = 128*1024;
|
||||||
|
|
||||||
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
if (strncmp(argv[i], "--count=", 8) == 0) {
|
||||||
|
nOptions = atoi(argv[i] + 8);
|
||||||
|
if (nOptions <= 0) {
|
||||||
|
usage();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float *S = new float[nOptions];
|
||||||
|
float *X = new float[nOptions];
|
||||||
|
float *T = new float[nOptions];
|
||||||
|
float *r = new float[nOptions];
|
||||||
|
float *v = new float[nOptions];
|
||||||
|
float *result = new float[nOptions];
|
||||||
|
|
||||||
|
for (int i = 0; i < nOptions; ++i) {
|
||||||
|
S[i] = 100; // stock price
|
||||||
|
X[i] = 98; // option strike price
|
||||||
|
T[i] = 2; // time (years)
|
||||||
|
r[i] = .02; // risk-free interest rate
|
||||||
|
v[i] = 5; // volatility
|
||||||
|
}
|
||||||
|
|
||||||
|
/*******************/
|
||||||
|
createContext();
|
||||||
|
/*******************/
|
||||||
|
devicePtr d_S = deviceMalloc(nOptions*sizeof(float));
|
||||||
|
devicePtr d_X = deviceMalloc(nOptions*sizeof(float));
|
||||||
|
devicePtr d_T = deviceMalloc(nOptions*sizeof(float));
|
||||||
|
devicePtr d_r = deviceMalloc(nOptions*sizeof(float));
|
||||||
|
devicePtr d_v = deviceMalloc(nOptions*sizeof(float));
|
||||||
|
devicePtr d_result = deviceMalloc(nOptions*sizeof(float));
|
||||||
|
|
||||||
|
memcpyH2D(d_S, S, nOptions*sizeof(float));
|
||||||
|
memcpyH2D(d_X, X, nOptions*sizeof(float));
|
||||||
|
memcpyH2D(d_T, T, nOptions*sizeof(float));
|
||||||
|
memcpyH2D(d_r, r, nOptions*sizeof(float));
|
||||||
|
memcpyH2D(d_v, v, nOptions*sizeof(float));
|
||||||
|
|
||||||
|
double sum;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Binomial options pricing model, ispc implementation
|
||||||
|
//
|
||||||
|
const bool print_log = false;
|
||||||
|
const int nreg = 32;
|
||||||
|
double binomial_ispc = 1e30;
|
||||||
|
#if 0
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
const double t0 = rtc();
|
||||||
|
const char * func_name = "binomial_put_ispc";
|
||||||
|
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||||
|
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||||
|
dt *= 1e3;
|
||||||
|
sum = 0.;
|
||||||
|
for (int i = 0; i < nOptions; ++i)
|
||||||
|
sum += result[i];
|
||||||
|
binomial_ispc = std::min(binomial_ispc, dt);
|
||||||
|
}
|
||||||
|
printf("[binomial ispc, 1 thread]:\t[%.3f] million cycles (avg %f)\n",
|
||||||
|
binomial_ispc, sum / nOptions);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Binomial options pricing model, ispc implementation, tasks
|
||||||
|
//
|
||||||
|
double binomial_tasks = 1e30;
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
const char * func_name = "binomial_put_ispc_tasks";
|
||||||
|
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||||
|
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||||
|
dt *= 1e3;
|
||||||
|
sum = 0.;
|
||||||
|
for (int i = 0; i < nOptions; ++i)
|
||||||
|
sum += result[i];
|
||||||
|
binomial_tasks = std::min(binomial_tasks, dt);
|
||||||
|
}
|
||||||
|
printf("[binomial ispc, tasks]:\t\t[%.3f] million cycles (avg %f)\n",
|
||||||
|
binomial_tasks, sum / nOptions);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Black-Scholes options pricing model, ispc implementation, 1 thread
|
||||||
|
//
|
||||||
|
double bs_ispc = 1e30;
|
||||||
|
#if 0
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
const char * func_name = "black_scholes_ispc";
|
||||||
|
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||||
|
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||||
|
dt *= 1e3;
|
||||||
|
sum = 0.;
|
||||||
|
for (int i = 0; i < nOptions; ++i)
|
||||||
|
sum += result[i];
|
||||||
|
bs_ispc = std::min(bs_ispc, dt);
|
||||||
|
}
|
||||||
|
printf("[black-scholes ispc, 1 thread]:\t[%.3f] million cycles (avg %f)\n",
|
||||||
|
bs_ispc, sum / nOptions);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Black-Scholes options pricing model, ispc implementation, tasks
|
||||||
|
//
|
||||||
|
double bs_ispc_tasks = 1e30;
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
reset_and_start_timer();
|
||||||
|
const char * func_name = "black_scholes_ispc_tasks";
|
||||||
|
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||||
|
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||||
|
dt *= 1e3;
|
||||||
|
sum = 0.;
|
||||||
|
for (int i = 0; i < nOptions; ++i)
|
||||||
|
sum += result[i];
|
||||||
|
bs_ispc_tasks = std::min(bs_ispc_tasks, dt);
|
||||||
|
}
|
||||||
|
printf("[black-scholes ispc, tasks]:\t[%.3f] million cycles (avg %f)\n",
|
||||||
|
bs_ispc_tasks, sum / nOptions);
|
||||||
|
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user