diff --git a/examples/options/Makefile b/examples/options/Makefile index 4163ba3a..404acff7 100644 --- a/examples/options/Makefile +++ b/examples/options/Makefile @@ -1,4 +1,9 @@ +TASK_CXX=../tasksys.cpp +TASK_LIB=-lpthread +TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) + + CXX=g++ -m64 CXXFLAGS=-Iobjs/ -g -Wall ISPC=ispc @@ -6,7 +11,7 @@ ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 OBJS=objs/options.o objs/options_serial.o objs/options_ispc.o \ objs/options_ispc_sse2.o objs/options_ispc_sse4.o \ - objs/options_ispc_avx.o + objs/options_ispc_avx.o $(TASK_OBJ) default: options @@ -19,11 +24,14 @@ clean: /bin/rm -rf objs *~ options options: dirs $(OBJS) - $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ +objs/%.o: ../%.cpp + $(CXX) $< $(CXXFLAGS) -c -o $@ + objs/options.o: objs/options_ispc.h options_defs.h objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc options_defs.h diff --git a/examples/options/options.cpp b/examples/options/options.cpp index 5fe48f86..2b9c40ec 100644 --- a/examples/options/options.cpp +++ b/examples/options/options.cpp @@ -53,15 +53,32 @@ extern void binomial_put_serial(float Sa[], float Xa[], float Ta[], float ra[], float va[], float result[], int count); -int main() { - float *S = new float[N_OPTIONS]; - float *X = new float[N_OPTIONS]; - float *T = new float[N_OPTIONS]; - float *r = new float[N_OPTIONS]; - float *v = new float[N_OPTIONS]; - float *result = new float[N_OPTIONS]; +static void usage() { + printf("usage: options [--count=]\n"); +} - for (int i = 0; i < N_OPTIONS; ++i) { + +int main(int argc, char *argv[]) { + int nOptions = 128*1024; + + for (int i = 1; i < argc; ++i) { + if (strncmp(argv[i], "--count=", 8) == 0) { + nOptions = atoi(argv[i] + 8); + if (nOptions <= 0) { + usage(); + exit(1); + } + } + } + + float *S = new float[nOptions]; + float *X = new float[nOptions]; + float *T = new float[nOptions]; + float *r = new float[nOptions]; + float *v = new float[nOptions]; + float *result = new float[nOptions]; + + for (int i = 0; i < nOptions; ++i) { S[i] = 100; // stock price X[i] = 98; // option strike price T[i] = 2; // time (years) @@ -69,61 +86,109 @@ int main() { v[i] = 5; // volatility } + double sum; + // // Binomial options pricing model, ispc implementation // - reset_and_start_timer(); - binomial_put_ispc(S, X, T, r, v, result, N_OPTIONS); - double binomial_ispc = get_elapsed_mcycles(); - float sum = 0.f; - for (int i = 0; i < N_OPTIONS; ++i) - sum += result[i]; - printf("[binomial ispc]:\t\t[%.3f] million cycles (avg %f)\n", - binomial_ispc, sum / N_OPTIONS); + double binomial_ispc = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + binomial_put_ispc(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_mcycles(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) + sum += result[i]; + binomial_ispc = std::min(binomial_ispc, dt); + } + printf("[binomial ispc, 1 thread]:\t[%.3f] million cycles (avg %f)\n", + binomial_ispc, sum / nOptions); + + // + // Binomial options pricing model, ispc implementation, tasks + // + double binomial_tasks = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + binomial_put_ispc_tasks(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_mcycles(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) + sum += result[i]; + binomial_tasks = std::min(binomial_tasks, dt); + } + printf("[binomial ispc, tasks]:\t\t[%.3f] million cycles (avg %f)\n", + binomial_tasks, sum / nOptions); // // Binomial options, serial implementation // - reset_and_start_timer(); - binomial_put_serial(S, X, T, r, v, result, N_OPTIONS); - double binomial_serial = get_elapsed_mcycles(); - sum = 0.f; - for (int i = 0; i < N_OPTIONS; ++i) - sum += result[i]; - printf("[binomial serial]:\t\t[%.3f] million cycles (avg %f)\n", - binomial_serial, sum / N_OPTIONS); - - printf("\t\t\t\t(%.2fx speedup from ISPC)\n", binomial_serial / binomial_ispc); - - // - // Black-Scholes options pricing model, ispc implementation - // - sum = 0.f; - reset_and_start_timer(); - for (int a = 0; a < N_BLACK_SCHOLES_ROUNDS; ++a) { - black_scholes_ispc(S, X, T, r, v, result, N_OPTIONS); - for (int i = 0; i < N_OPTIONS; ++i) + double binomial_serial = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + binomial_put_serial(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_mcycles(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) sum += result[i]; + binomial_serial = std::min(binomial_serial, dt); } - double bs_ispc = get_elapsed_mcycles(); - printf("[black-scholes ispc]:\t\t[%.3f] million cycles (avg %f)\n", - bs_ispc, sum / (N_BLACK_SCHOLES_ROUNDS * N_OPTIONS)); + printf("[binomial serial]:\t\t[%.3f] million cycles (avg %f)\n", + binomial_serial, sum / nOptions); + + printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", + binomial_serial / binomial_ispc, binomial_serial / binomial_tasks); + + // + // Black-Scholes options pricing model, ispc implementation, 1 thread + // + double bs_ispc = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + black_scholes_ispc(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_mcycles(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) + sum += result[i]; + bs_ispc = std::min(bs_ispc, dt); + } + printf("[black-scholes ispc, 1 thread]:\t[%.3f] million cycles (avg %f)\n", + bs_ispc, sum / nOptions); + + // + // Black-Scholes options pricing model, ispc implementation, tasks + // + double bs_ispc_tasks = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + black_scholes_ispc_tasks(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_mcycles(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) + sum += result[i]; + bs_ispc_tasks = std::min(bs_ispc_tasks, dt); + } + printf("[black-scholes ispc, tasks]:\t[%.3f] million cycles (avg %f)\n", + bs_ispc_tasks, sum / nOptions); // // Black-Scholes options pricing model, serial implementation // - sum = 0.f; - reset_and_start_timer(); - for (int a = 0; a < N_BLACK_SCHOLES_ROUNDS; ++a) { - black_scholes_serial(S, X, T, r, v, result, N_OPTIONS); - for (int i = 0; i < N_OPTIONS; ++i) + double bs_serial = 1e30; + for (int i = 0; i < 3; ++i) { + reset_and_start_timer(); + black_scholes_serial(S, X, T, r, v, result, nOptions); + double dt = get_elapsed_mcycles(); + sum = 0.; + for (int i = 0; i < nOptions; ++i) sum += result[i]; + bs_serial = std::min(bs_serial, dt); } - double bs_serial = get_elapsed_mcycles(); printf("[black-scholes serial]:\t\t[%.3f] million cycles (avg %f)\n", bs_serial, - sum / (N_BLACK_SCHOLES_ROUNDS * N_OPTIONS)); + sum / nOptions); - printf("\t\t\t\t(%.2fx speedup from ISPC)\n", bs_serial / bs_ispc); + printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", + bs_serial / bs_ispc, bs_serial / bs_ispc_tasks); return 0; } diff --git a/examples/options/options.ispc b/examples/options/options.ispc index f1ea3678..d94281ec 100644 --- a/examples/options/options.ispc +++ b/examples/options/options.ispc @@ -55,6 +55,32 @@ CND(float X) { return w; } +task void +bs_task(uniform float Sa[], uniform float Xa[], uniform float Ta[], + uniform float ra[], uniform float va[], + uniform float result[], uniform int count) { + uniform int first = taskIndex * (count/taskCount); + uniform int last = min(count, (int)((taskIndex+1) * (count/taskCount))); + + foreach (i = first ... last) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + + float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T)); + float d2 = d1 - v * sqrt(T); + + result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2); + } +} + +export void +black_scholes_ispc_tasks(uniform float Sa[], uniform float Xa[], uniform float Ta[], + uniform float ra[], uniform float va[], + uniform float result[], uniform int count) { + uniform int nTasks = max((int)1, (int)count/1024); + launch[nTasks] < bs_task(Sa, Xa, Ta, ra, va, result, count) >; +} + + export void black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], uniform float ra[], uniform float va[], @@ -70,30 +96,59 @@ black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], } +static inline float +binomial_put(float S, float X, float T, float r, float v) { + float V[BINOMIAL_NUM]; + + float dt = T / BINOMIAL_NUM; + float u = exp(v * sqrt(dt)); + float d = 1. / u; + float disc = exp(r * dt); + float Pu = (disc - d) / (u - d); + + for (uniform int j = 0; j < BINOMIAL_NUM; ++j) { + float upow = pow(u, (float)(2*j-BINOMIAL_NUM)); + V[j] = max(0., X - S * upow); + } + + for (uniform int j = BINOMIAL_NUM-1; j >= 0; --j) + for (uniform int k = 0; k < j; ++k) + V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc; + return V[0]; +} + + export void binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], uniform float ra[], uniform float va[], uniform float result[], uniform int count) { - float V[BINOMIAL_NUM]; - foreach (i = 0 ... count) { float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; - - float dt = T / BINOMIAL_NUM; - float u = exp(v * sqrt(dt)); - float d = 1. / u; - float disc = exp(r * dt); - float Pu = (disc - d) / (u - d); - - for (uniform int j = 0; j < BINOMIAL_NUM; ++j) { - float upow = pow(u, (float)(2*j-BINOMIAL_NUM)); - V[j] = max(0., X - S * upow); - } - - for (uniform int j = BINOMIAL_NUM-1; j >= 0; --j) - for (uniform int k = 0; k < j; ++k) - V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc; - - result[i] = V[0]; + result[i] = binomial_put(S, X, T, r, v); } } + + +task void +binomial_task(uniform float Sa[], uniform float Xa[], + uniform float Ta[], uniform float ra[], + uniform float va[], uniform float result[], + uniform int count) { + uniform int first = taskIndex * (count/taskCount); + uniform int last = min(count, (int)((taskIndex+1) * (count/taskCount))); + + foreach (i = first ... last) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; + result[i] = binomial_put(S, X, T, r, v); + } +} + + +export void +binomial_put_ispc_tasks(uniform float Sa[], uniform float Xa[], + uniform float Ta[], uniform float ra[], + uniform float va[], uniform float result[], + uniform int count) { + uniform int nTasks = max((int)1, (int)count/1024); + launch[nTasks] < binomial_task(Sa, Xa, Ta, ra, va, result, count) >; +} diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index 094eb19e..b029b598 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -155,6 +155,7 @@ + diff --git a/examples/options/options_defs.h b/examples/options/options_defs.h index 54b8ec81..ee1450c1 100644 --- a/examples/options/options_defs.h +++ b/examples/options/options_defs.h @@ -35,8 +35,6 @@ #define OPTIONS_DEFS_H 1 #define BINOMIAL_NUM 64 -#define N_OPTIONS 65536 -#define N_BLACK_SCHOLES_ROUNDS 20 #endif // OPTIONS_DEFS_H