From dc7015c5f24e7849bc48b99dec07252cc3ec6715 Mon Sep 17 00:00:00 2001 From: evghenii Date: Sat, 9 Nov 2013 19:08:08 +0100 Subject: [PATCH 1/2] added wc-timer for host code --- examples_cuda/rt/rt.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/examples_cuda/rt/rt.cpp b/examples_cuda/rt/rt.cpp index 9ec340da..79221a07 100644 --- a/examples_cuda/rt/rt.cpp +++ b/examples_cuda/rt/rt.cpp @@ -133,10 +133,12 @@ int main(int argc, char *argv[]) { if (filename == NULL) usage(); + fprintf(stderr, " --- 1 --- \n"); + #define READ(var, n) \ if (fread(&(var), sizeof(var), n, f) != (unsigned int)n) { \ fprintf(stderr, "Unexpected EOF reading scene file\n"); \ - return 1; \ + assert(0); \ } else /* eat ; */ // @@ -149,6 +151,7 @@ int main(int argc, char *argv[]) { perror(fnbuf); return 1; } + fprintf(stderr, " --- 2 --- \n"); // // Nothing fancy, and trouble if we run on a big-endian system, just @@ -170,6 +173,7 @@ int main(int argc, char *argv[]) { perror(fnbuf); return 1; } + fprintf(stderr, " --- 3 --- \n"); // The BVH file starts with an int that gives the total number of BVH // nodes @@ -177,6 +181,7 @@ int main(int argc, char *argv[]) { READ(nNodes, 1); LinearBVHNode *nodes = new LinearBVHNode[nNodes]; +#pragma omp parallel for for (unsigned int i = 0; i < nNodes; ++i) { // Each node is 6x floats for a boox, then an integer for an offset // to the second child node, then an integer that encodes the type @@ -195,10 +200,13 @@ int main(int argc, char *argv[]) { READ(nodes[i].pad, 1); } + fprintf(stderr, " --- 4 --- \n"); + // And then read the triangles uint nTris; READ(nTris, 1); Triangle *triangles = new Triangle[nTris]; +#pragma omp parallel for for (uint i = 0; i < nTris; ++i) { // 9x floats for the 3 vertices float v[9]; @@ -214,6 +222,7 @@ int main(int argc, char *argv[]) { } fclose(f); + fprintf(stderr, " --- 5 --- \n"); int height = int(baseHeight * scale); int width = int(baseWidth * scale); From 3a549e5c2fa1749f30dc4c436462adddb57fbeba Mon Sep 17 00:00:00 2001 From: evghenii Date: Sat, 9 Nov 2013 19:26:19 +0100 Subject: [PATCH 2/2] xeonphi tests added for rt --- examples_cuda/rt/rt.cpp | 15 ++++++++++----- examples_cuda/tasksys.cpp | 2 ++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/examples_cuda/rt/rt.cpp b/examples_cuda/rt/rt.cpp index 79221a07..d4a9ba51 100644 --- a/examples_cuda/rt/rt.cpp +++ b/examples_cuda/rt/rt.cpp @@ -181,7 +181,6 @@ int main(int argc, char *argv[]) { READ(nNodes, 1); LinearBVHNode *nodes = new LinearBVHNode[nNodes]; -#pragma omp parallel for for (unsigned int i = 0; i < nNodes; ++i) { // Each node is 6x floats for a boox, then an integer for an offset // to the second child node, then an integer that encodes the type @@ -206,7 +205,6 @@ int main(int argc, char *argv[]) { uint nTris; READ(nTris, 1); Triangle *triangles = new Triangle[nTris]; -#pragma omp parallel for for (uint i = 0; i < nTris; ++i) { // 9x floats for the 3 vertices float v[9]; @@ -255,19 +253,26 @@ int main(int argc, char *argv[]) { // // Run 3 iterations with ispc + 1 core, record the minimum time // + fprintf(stderr, " --- 6 --- \n"); + fflush(stderr); double minTimeISPCtasks = 1e30; - for (int i = 0; i < 3; ++i) { - reset_and_start_timer(); + for (int i = 0; i < 3; ++i) + { + // reset_and_start_timer(); const double t0 = rtc(); raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera, camera2world, image, id, nodes, triangles); double dt = rtc() - t0; //get_elapsed_mcycles(); minTimeISPCtasks = std::min(dt, minTimeISPCtasks); } - printf("[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n", + fprintf(stderr, "[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n", minTimeISPCtasks, width, height); + fflush(stderr); + fprintf(stderr, " --- 7 --- \n"); + fflush(stderr); writeImage(id, image, width, height, "rt-ispc-tasks.ppm"); + return 0; memset(id, 0, width*height*sizeof(int)); memset(image, 0, width*height*sizeof(float)); diff --git a/examples_cuda/tasksys.cpp b/examples_cuda/tasksys.cpp index cccff55b..55cbccd5 100644 --- a/examples_cuda/tasksys.cpp +++ b/examples_cuda/tasksys.cpp @@ -59,7 +59,9 @@ #define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_CILK +*/ #define ISPC_USE_OMP +/* #define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_PARALLEL_FOR