added cuda versino
This commit is contained in:
55
examples_cuda/options/Makefile_gpu
Normal file
55
examples_cuda/options/Makefile_gpu
Normal file
@@ -0,0 +1,55 @@
|
||||
PROG=options_cu
|
||||
ISPC_SRC=options.ispc
|
||||
CXX_SRC=options_cu.cpp
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-O3 -I$(CUDATK)/include
|
||||
LD=g++
|
||||
LDFLAGS=-lcuda
|
||||
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
PTXGEN = $(HOME)/ptxgen
|
||||
PTXGEN += -opt=3
|
||||
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
.SUFFIXES: .bc .o .ptx .cu _ispc_nvptx64.bc
|
||||
|
||||
|
||||
ISPC_OBJ=$(ISPC_SRC:%.ispc=%_ispc.o)
|
||||
ISPC_BC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.bc)
|
||||
PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx)
|
||||
CXX_OBJ=$(CXX_SRC:%.cpp=%.o)
|
||||
|
||||
all: $(PROG)
|
||||
|
||||
|
||||
$(CXX_OBJ) : kernel.ptx
|
||||
$(PROG): $(CXX_OBJ) kernel.ptx
|
||||
/bin/cp kernel.ptx __kernels.ptx
|
||||
$(LD) -o $@ $(CXX_OBJ) $(LDFLAGS)
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
|
||||
%_ispc_nvptx64.bc: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc_nvptx64.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
|
||||
|
||||
%.ptx: %.bc
|
||||
$(LLVM32DIS) $<
|
||||
$(PTXGEN) `basename $< .bc`.ll > $@
|
||||
|
||||
kernel.ptx: $(PTXSRC)
|
||||
cat $^ > kernel.ptx
|
||||
|
||||
clean:
|
||||
/bin/rm -rf *.ptx *.bc *.ll $(PROG)
|
||||
|
||||
|
||||
|
||||
150
examples_cuda/options/options.cu
Normal file
150
examples_cuda/options/options.cu
Normal file
@@ -0,0 +1,150 @@
|
||||
// -*- mode: c++ -*-
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "options_defs.h"
|
||||
|
||||
#define programCount 32
|
||||
#define programIndex (threadIdx.x & 31)
|
||||
#define taskIndex (blockIdx.x*4 + (threadIdx.x >> 5))
|
||||
#define taskCount (gridDim.x*4)
|
||||
#define warpIdx (threadIdx.x >> 5)
|
||||
|
||||
// Cumulative normal distribution function
|
||||
//
|
||||
__device__
|
||||
static inline float
|
||||
CND(float X) {
|
||||
float L = abs(X);
|
||||
|
||||
float k = 1.0 / (1.0 + 0.2316419 * L);
|
||||
float k2 = k*k;
|
||||
float k3 = k2*k;
|
||||
float k4 = k2*k2;
|
||||
float k5 = k3*k2;
|
||||
|
||||
const float invSqrt2Pi = 0.39894228040f;
|
||||
float w = (0.31938153f * k - 0.356563782f * k2 + 1.781477937f * k3 +
|
||||
-1.821255978f * k4 + 1.330274429f * k5);
|
||||
w *= invSqrt2Pi * exp(-L * L * .5f);
|
||||
|
||||
if (X > 0.f)
|
||||
w = 1.0 - w;
|
||||
return w;
|
||||
}
|
||||
|
||||
__global__
|
||||
void bs_task( float Sa[], float Xa[], float Ta[],
|
||||
float ra[], float va[],
|
||||
float result[], int count) {
|
||||
int first = taskIndex * (count/taskCount);
|
||||
int last = min(count, (int)((taskIndex+1) * (count/taskCount)));
|
||||
|
||||
for (int i = programIndex + first; i < last; i += programCount)
|
||||
if (i < last)
|
||||
{
|
||||
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||
|
||||
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
|
||||
float d2 = d1 - v * sqrt(T);
|
||||
|
||||
result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__ void
|
||||
black_scholes_ispc_tasks( float Sa[], float Xa[], float Ta[],
|
||||
float ra[], float va[],
|
||||
float result[], int count) {
|
||||
int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384);
|
||||
bs_task<<<nTasks/4,128>>>(Sa, Xa, Ta, ra, va, result, count);
|
||||
}
|
||||
|
||||
/********/
|
||||
|
||||
|
||||
__device__
|
||||
static inline float
|
||||
binomial_put(float S, float X, float T, float r, float v) {
|
||||
float V[BINOMIAL_NUM];
|
||||
|
||||
float dt = T / BINOMIAL_NUM;
|
||||
float u = exp(v * sqrt(dt));
|
||||
float d = 1. / u;
|
||||
float disc = exp(r * dt);
|
||||
float Pu = (disc - d) / (u - d);
|
||||
|
||||
#pragma unroll
|
||||
for ( int j = 0; j < BINOMIAL_NUM; ++j) {
|
||||
float upow = pow(u, (float)(2*j-BINOMIAL_NUM));
|
||||
V[j] = max(0., X - S * upow);
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
for ( int j = BINOMIAL_NUM-1; j >= 0; --j)
|
||||
#pragma unroll
|
||||
for ( int k = 0; k < j; ++k)
|
||||
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
||||
return V[0];
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
binomial_task( float Sa[], float Xa[],
|
||||
float Ta[], float ra[],
|
||||
float va[], float result[],
|
||||
int count) {
|
||||
int first = taskIndex * (count/taskCount);
|
||||
int last = min(count, (int)((taskIndex+1) * (count/taskCount)));
|
||||
|
||||
for (int i = programIndex + first; i < last; i += programCount)
|
||||
if (i < last)
|
||||
{
|
||||
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||
result[i] = binomial_put(S, X, T, r, v);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void
|
||||
binomial_put_ispc_tasks( float Sa[], float Xa[],
|
||||
float Ta[], float ra[],
|
||||
float va[], float result[],
|
||||
int count) {
|
||||
int nTasks = 2048; //count/16384; //max((int)64, (int)count/16384);
|
||||
if (programIndex == 0)
|
||||
binomial_task<<<nTasks/4,128>>>(Sa, Xa, Ta, ra, va, result, count);
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
184
examples_cuda/options/options_cu.cpp
Normal file
184
examples_cuda/options/options_cu.cpp
Normal file
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define NOMINMAX
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
using std::max;
|
||||
|
||||
#include "options_defs.h"
|
||||
#include "../timing.h"
|
||||
|
||||
#include "options_ispc.h"
|
||||
using namespace ispc;
|
||||
#include <sys/time.h>
|
||||
#include "../cuda_ispc.h"
|
||||
|
||||
static void usage() {
|
||||
printf("usage: options [--count=<num options>]\n");
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int nOptions = 128*1024;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (strncmp(argv[i], "--count=", 8) == 0) {
|
||||
nOptions = atoi(argv[i] + 8);
|
||||
if (nOptions <= 0) {
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float *S = new float[nOptions];
|
||||
float *X = new float[nOptions];
|
||||
float *T = new float[nOptions];
|
||||
float *r = new float[nOptions];
|
||||
float *v = new float[nOptions];
|
||||
float *result = new float[nOptions];
|
||||
|
||||
for (int i = 0; i < nOptions; ++i) {
|
||||
S[i] = 100; // stock price
|
||||
X[i] = 98; // option strike price
|
||||
T[i] = 2; // time (years)
|
||||
r[i] = .02; // risk-free interest rate
|
||||
v[i] = 5; // volatility
|
||||
}
|
||||
|
||||
/*******************/
|
||||
createContext();
|
||||
/*******************/
|
||||
devicePtr d_S = deviceMalloc(nOptions*sizeof(float));
|
||||
devicePtr d_X = deviceMalloc(nOptions*sizeof(float));
|
||||
devicePtr d_T = deviceMalloc(nOptions*sizeof(float));
|
||||
devicePtr d_r = deviceMalloc(nOptions*sizeof(float));
|
||||
devicePtr d_v = deviceMalloc(nOptions*sizeof(float));
|
||||
devicePtr d_result = deviceMalloc(nOptions*sizeof(float));
|
||||
|
||||
memcpyH2D(d_S, S, nOptions*sizeof(float));
|
||||
memcpyH2D(d_X, X, nOptions*sizeof(float));
|
||||
memcpyH2D(d_T, T, nOptions*sizeof(float));
|
||||
memcpyH2D(d_r, r, nOptions*sizeof(float));
|
||||
memcpyH2D(d_v, v, nOptions*sizeof(float));
|
||||
|
||||
double sum;
|
||||
|
||||
//
|
||||
// Binomial options pricing model, ispc implementation
|
||||
//
|
||||
const bool print_log = false;
|
||||
const int nreg = 32;
|
||||
double binomial_ispc = 1e30;
|
||||
#if 0
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
const double t0 = rtc();
|
||||
const char * func_name = "binomial_put_ispc";
|
||||
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||
dt *= 1e3;
|
||||
sum = 0.;
|
||||
for (int i = 0; i < nOptions; ++i)
|
||||
sum += result[i];
|
||||
binomial_ispc = std::min(binomial_ispc, dt);
|
||||
}
|
||||
printf("[binomial ispc, 1 thread]:\t[%.3f] million cycles (avg %f)\n",
|
||||
binomial_ispc, sum / nOptions);
|
||||
#endif
|
||||
|
||||
//
|
||||
// Binomial options pricing model, ispc implementation, tasks
|
||||
//
|
||||
double binomial_tasks = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
const char * func_name = "binomial_put_ispc_tasks";
|
||||
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||
dt *= 1e3;
|
||||
sum = 0.;
|
||||
for (int i = 0; i < nOptions; ++i)
|
||||
sum += result[i];
|
||||
binomial_tasks = std::min(binomial_tasks, dt);
|
||||
}
|
||||
printf("[binomial ispc, tasks]:\t\t[%.3f] million cycles (avg %f)\n",
|
||||
binomial_tasks, sum / nOptions);
|
||||
|
||||
//
|
||||
// Black-Scholes options pricing model, ispc implementation, 1 thread
|
||||
//
|
||||
double bs_ispc = 1e30;
|
||||
#if 0
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
const char * func_name = "black_scholes_ispc";
|
||||
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||
dt *= 1e3;
|
||||
sum = 0.;
|
||||
for (int i = 0; i < nOptions; ++i)
|
||||
sum += result[i];
|
||||
bs_ispc = std::min(bs_ispc, dt);
|
||||
}
|
||||
printf("[black-scholes ispc, 1 thread]:\t[%.3f] million cycles (avg %f)\n",
|
||||
bs_ispc, sum / nOptions);
|
||||
#endif
|
||||
|
||||
//
|
||||
// Black-Scholes options pricing model, ispc implementation, tasks
|
||||
//
|
||||
double bs_ispc_tasks = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
const char * func_name = "black_scholes_ispc_tasks";
|
||||
void *func_args[] = {&d_S, &d_X, &d_T, &d_r, &d_v, &d_result, &nOptions};
|
||||
double dt = CUDALaunch(NULL, func_name, func_args, print_log, nreg);
|
||||
dt *= 1e3;
|
||||
sum = 0.;
|
||||
for (int i = 0; i < nOptions; ++i)
|
||||
sum += result[i];
|
||||
bs_ispc_tasks = std::min(bs_ispc_tasks, dt);
|
||||
}
|
||||
printf("[black-scholes ispc, tasks]:\t[%.3f] million cycles (avg %f)\n",
|
||||
bs_ispc_tasks, sum / nOptions);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user