Merge pull request #942 from aguskov/master
Slight example code cleanup
This commit is contained in:
@@ -14,7 +14,7 @@ dirs:
|
|||||||
clean:
|
clean:
|
||||||
/bin/rm -rf objs *~ ao
|
/bin/rm -rf objs *~ ao
|
||||||
|
|
||||||
ao: objs/ao.o objs/instrument.o objs/ao_ispc.o ../tasksys.cpp
|
ao: objs/ao.o objs/instrument.o objs/ao_instrumented_ispc.o ../tasksys.cpp
|
||||||
$(CXX) $(CXXFLAGS) -o $@ $^ -lm -lpthread
|
$(CXX) $(CXXFLAGS) -o $@ $^ -lm -lpthread
|
||||||
|
|
||||||
objs/%.o: %.cpp dirs
|
objs/%.o: %.cpp dirs
|
||||||
@@ -23,4 +23,4 @@ objs/%.o: %.cpp dirs
|
|||||||
objs/ao.o: objs/ao_instrumented_ispc.h
|
objs/ao.o: objs/ao_instrumented_ispc.h
|
||||||
|
|
||||||
objs/%_ispc.h objs/%_ispc.o: %.ispc dirs
|
objs/%_ispc.h objs/%_ispc.o: %.ispc dirs
|
||||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_instrumented_ispc.h
|
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||||
|
|||||||
@@ -35,6 +35,8 @@
|
|||||||
#define NOMINMAX
|
#define NOMINMAX
|
||||||
#pragma warning (disable: 4244)
|
#pragma warning (disable: 4244)
|
||||||
#pragma warning (disable: 4305)
|
#pragma warning (disable: 4305)
|
||||||
|
// preventing MSVC fopen() deprecation complaints
|
||||||
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
@@ -34,6 +34,8 @@
|
|||||||
#include "instrument.h"
|
#include "instrument.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
@@ -46,7 +48,7 @@ struct CallInfo {
|
|||||||
|
|
||||||
static std::map<std::string, CallInfo> callInfo;
|
static std::map<std::string, CallInfo> callInfo;
|
||||||
|
|
||||||
int countbits(int i) {
|
int countbits(uint64_t i) {
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
while (i) {
|
while (i) {
|
||||||
if (i & 0x1)
|
if (i & 0x1)
|
||||||
@@ -61,13 +63,12 @@ int countbits(int i) {
|
|||||||
// command-line flag is given while compiling.
|
// command-line flag is given while compiling.
|
||||||
void
|
void
|
||||||
ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask) {
|
ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask) {
|
||||||
char sline[16];
|
std::stringstream s;
|
||||||
sprintf(sline, "%04d", line);
|
s << fn << "(" << std::setfill('0') << std::setw(4) << line << ") - "
|
||||||
std::string s = std::string(fn) + std::string("(") + std::string(sline) +
|
<< note;
|
||||||
std::string(") - ") + std::string(note);
|
|
||||||
|
|
||||||
// Find or create a CallInfo instance for this callsite.
|
// Find or create a CallInfo instance for this callsite.
|
||||||
CallInfo &ci = callInfo[s];
|
CallInfo &ci = callInfo[s.str()];
|
||||||
|
|
||||||
// And update its statistics...
|
// And update its statistics...
|
||||||
++ci.count;
|
++ci.count;
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
int nframes = test_iterations[2];
|
int nframes = test_iterations[2];
|
||||||
double ispcCycles = 1e30;
|
double ispcCycles = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[0]; ++i) {
|
||||||
framebuffer.clear();
|
framebuffer.clear();
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
for (int j = 0; j < nframes; ++j)
|
for (int j = 0; j < nframes; ++j)
|
||||||
@@ -123,7 +123,7 @@ int main(int argc, char** argv) {
|
|||||||
#endif // __cilk
|
#endif // __cilk
|
||||||
|
|
||||||
double serialCycles = 1e30;
|
double serialCycles = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[1]; ++i) {
|
||||||
framebuffer.clear();
|
framebuffer.clear();
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
for (int j = 0; j < nframes; ++j)
|
for (int j = 0; j < nframes; ++j)
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// time of three runs.
|
// time of three runs.
|
||||||
//
|
//
|
||||||
double minISPC = 1e30;
|
double minISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[0]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
@@ -119,7 +119,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
double minSerial = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[1]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// time of three runs.
|
// time of three runs.
|
||||||
//
|
//
|
||||||
double minISPC = 1e30;
|
double minISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[0]; ++i) {
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (unsigned int i = 0; i < width * height; ++i)
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
buf[i] = 0;
|
buf[i] = 0;
|
||||||
@@ -130,7 +130,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
double minSerial = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[1]; ++i) {
|
||||||
// Clear out the buffer
|
// Clear out the buffer
|
||||||
for (unsigned int i = 0; i < width * height; ++i)
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
buf[i] = 0;
|
buf[i] = 0;
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// time of three runs.
|
// time of three runs.
|
||||||
//
|
//
|
||||||
double minISPC = 1e30;
|
double minISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[0]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
noise_ispc(x0, y0, x1, y1, width, height, buf);
|
noise_ispc(x0, y0, x1, y1, width, height, buf);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
@@ -115,7 +115,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
double minSerial = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[1]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
noise_serial(x0, y0, x1, y1, width, height, buf);
|
noise_serial(x0, y0, x1, y1, width, height, buf);
|
||||||
double dt = get_elapsed_mcycles();
|
double dt = get_elapsed_mcycles();
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// Run 3 iterations with ispc + 1 core, record the minimum time
|
// Run 3 iterations with ispc + 1 core, record the minimum time
|
||||||
//
|
//
|
||||||
double minTimeISPC = 1e30;
|
double minTimeISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (uint i = 0; i < test_iterations[0]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera,
|
raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera,
|
||||||
camera2world, image, id, nodes, triangles);
|
camera2world, image, id, nodes, triangles);
|
||||||
@@ -231,7 +231,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// Run 3 iterations with ispc + 1 core, record the minimum time
|
// Run 3 iterations with ispc + 1 core, record the minimum time
|
||||||
//
|
//
|
||||||
double minTimeISPCtasks = 1e30;
|
double minTimeISPCtasks = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (uint i = 0; i < test_iterations[1]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
|
raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera,
|
||||||
camera2world, image, id, nodes, triangles);
|
camera2world, image, id, nodes, triangles);
|
||||||
@@ -252,7 +252,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minTimeSerial = 1e30;
|
double minTimeSerial = 1e30;
|
||||||
for (int i = 0; i < test_iterations[2]; ++i) {
|
for (uint i = 0; i < test_iterations[2]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
raytrace_serial(width, height, baseWidth, baseHeight, raster2camera,
|
raytrace_serial(width, height, baseWidth, baseHeight, raster2camera,
|
||||||
camera2world, image, id, nodes, triangles);
|
camera2world, image, id, nodes, triangles);
|
||||||
|
|||||||
@@ -37,6 +37,8 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <sstream>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
@@ -61,9 +63,9 @@ static void progressBar(const int x, const int n, const int width = 50)
|
|||||||
bstr += "]";
|
bstr += "]";
|
||||||
|
|
||||||
// print percentage
|
// print percentage
|
||||||
char pstr0[32];
|
std::stringstream pstr0;
|
||||||
sprintf(pstr0, " %2d %c ", static_cast<int>(f*100.0),'%');
|
pstr0 << " " << static_cast<int>(f*100.0) << " % ";
|
||||||
const std::string pstr(pstr0);
|
const std::string pstr(pstr0.str());
|
||||||
std::copy(pstr.begin(), pstr.end(), bstr.begin() + (width/2-2));
|
std::copy(pstr.begin(), pstr.end(), bstr.begin() + (width/2-2));
|
||||||
|
|
||||||
std::cout << bstr;
|
std::cout << bstr;
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// the minimum time of three runs.
|
// the minimum time of three runs.
|
||||||
//
|
//
|
||||||
double minTimeISPC = 1e30;
|
double minTimeISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[0]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
|
loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
|
||||||
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
||||||
@@ -121,7 +121,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// the minimum time of three runs.
|
// the minimum time of three runs.
|
||||||
//
|
//
|
||||||
double minTimeISPCTasks = 1e30;
|
double minTimeISPCTasks = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[1]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
|
loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
|
||||||
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
||||||
@@ -140,7 +140,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minTimeSerial = 1e30;
|
double minTimeSerial = 1e30;
|
||||||
for (int i = 0; i < test_iterations[2]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[2]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width,
|
loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width,
|
||||||
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
width, Nz - width, Nx, Ny, Nz, coeff, vsq,
|
||||||
|
|||||||
@@ -58,8 +58,6 @@ __inline__ uint64_t rdtsc() {
|
|||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
// This is just a stub, it's not used on Windows.
|
|
||||||
double rtc() { return 0.0; };
|
|
||||||
#define rdtsc __rdtsc
|
#define rdtsc __rdtsc
|
||||||
#else // WIN32
|
#else // WIN32
|
||||||
__inline__ uint64_t rdtsc() {
|
__inline__ uint64_t rdtsc() {
|
||||||
@@ -97,7 +95,10 @@ static double tstart, tend;
|
|||||||
static inline void reset_and_start_timer()
|
static inline void reset_and_start_timer()
|
||||||
{
|
{
|
||||||
start = rdtsc();
|
start = rdtsc();
|
||||||
|
#ifndef WIN32
|
||||||
|
// Unused in Windows build, rtc() causing link errors
|
||||||
tstart = rtc();
|
tstart = rtc();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns the number of millions of elapsed processor cycles since the
|
/* Returns the number of millions of elapsed processor cycles since the
|
||||||
@@ -108,8 +109,11 @@ static inline double get_elapsed_mcycles()
|
|||||||
return (end-start) / (1024. * 1024.);
|
return (end-start) / (1024. * 1024.);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
|
// Unused in Windows build, rtc() causing link errors
|
||||||
static inline double get_elapsed_msec()
|
static inline double get_elapsed_msec()
|
||||||
{
|
{
|
||||||
tend = rtc();
|
tend = rtc();
|
||||||
return (tend - tstart)*1e3;
|
return (tend - tstart)*1e3;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// time of three runs.
|
// time of three runs.
|
||||||
//
|
//
|
||||||
double minISPC = 1e30;
|
double minISPC = 1e30;
|
||||||
for (int i = 0; i < test_iterations[0]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[0]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
volume_ispc(density, n, raster2camera, camera2world,
|
volume_ispc(density, n, raster2camera, camera2world,
|
||||||
width, height, image);
|
width, height, image);
|
||||||
@@ -184,7 +184,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// tasks; report the minimum time of three runs.
|
// tasks; report the minimum time of three runs.
|
||||||
//
|
//
|
||||||
double minISPCtasks = 1e30;
|
double minISPCtasks = 1e30;
|
||||||
for (int i = 0; i < test_iterations[1]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[1]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
volume_ispc_tasks(density, n, raster2camera, camera2world,
|
volume_ispc_tasks(density, n, raster2camera, camera2world,
|
||||||
width, height, image);
|
width, height, image);
|
||||||
@@ -205,7 +205,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// minimum time.
|
// minimum time.
|
||||||
//
|
//
|
||||||
double minSerial = 1e30;
|
double minSerial = 1e30;
|
||||||
for (int i = 0; i < test_iterations[2]; ++i) {
|
for (unsigned int i = 0; i < test_iterations[2]; ++i) {
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
volume_serial(density, n, raster2camera, camera2world,
|
volume_serial(density, n, raster2camera, camera2world,
|
||||||
width, height, image);
|
width, height, image);
|
||||||
|
|||||||
Reference in New Issue
Block a user