diff --git a/tests_ispcpp/CycleTimer.h b/tests_ispcpp/CycleTimer.h new file mode 100644 index 00000000..a44290be --- /dev/null +++ b/tests_ispcpp/CycleTimer.h @@ -0,0 +1,177 @@ +#ifndef _SYRAH_CYCLE_TIMER_H_ +#define _SYRAH_CYCLE_TIMER_H_ + +#if defined(__APPLE__) + #if defined(__x86_64__) + #include + #else + #include + #include + #endif // __x86_64__ or not + + #include // fprintf + #include // exit + +#elif _WIN32 +# include +# include +#else +# include +# include +# include +# include +#endif + + + // This uses the cycle counter of the processor. Different + // processors in the system will have different values for this. If + // you process moves across processors, then the delta time you + // measure will likely be incorrect. This is mostly for fine + // grained measurements where the process is likely to be on the + // same processor. For more global things you should use the + // Time interface. + + // Also note that if you processors' speeds change (i.e. processors + // scaling) or if you are in a heterogenous environment, you will + // likely get spurious results. + class CycleTimer { + public: + typedef unsigned long long SysClock; + + ////////// + // Return the current CPU time, in terms of clock ticks. + // Time zero is at some arbitrary point in the past. + static SysClock currentTicks() { +#if defined(__APPLE__) && !defined(__x86_64__) + return mach_absolute_time(); +#elif defined(_WIN32) + LARGE_INTEGER qwTime; + QueryPerformanceCounter(&qwTime); + return qwTime.QuadPart; +#elif defined(__x86_64__) + unsigned int a, d; + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return static_cast(a) | + (static_cast(d) << 32); +#elif defined(__ARM_NEON__) && 0 // mrc requires superuser. + unsigned int val; + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); + return val; +#else + timespec spec; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec); + return CycleTimer::SysClock(static_cast(spec.tv_sec) * 1e9 + static_cast(spec.tv_nsec)); +#endif + } + + ////////// + // Return the current CPU time, in terms of seconds. + // This is slower than currentTicks(). Time zero is at + // some arbitrary point in the past. + static double currentSeconds() { + return currentTicks() * secondsPerTick(); + } + + ////////// + // Return the conversion from seconds to ticks. + static double ticksPerSecond() { + return 1.0/secondsPerTick(); + } + + static const char* tickUnits() { +#if defined(__APPLE__) && !defined(__x86_64__) + return "ns"; +#elif defined(__WIN32__) || defined(__x86_64__) + return "cycles"; +#else + return "ns"; // clock_gettime +#endif + } + + ////////// + // Return the conversion from ticks to seconds. + static double secondsPerTick() { + static bool initialized = false; + static double secondsPerTick_val; + if (initialized) return secondsPerTick_val; +#if defined(__APPLE__) + #ifdef __x86_64__ + int args[] = {CTL_HW, HW_CPU_FREQ}; + unsigned int Hz; + size_t len = sizeof(Hz); + if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) { + fprintf(stderr, "Failed to initialize secondsPerTick_val!\n"); + exit(-1); + } + secondsPerTick_val = 1.0 / (double) Hz; + #else + mach_timebase_info_data_t time_info; + mach_timebase_info(&time_info); + + // Scales to nanoseconds without 1e-9f + secondsPerTick_val = (1e-9*static_cast(time_info.numer))/ + static_cast(time_info.denom); + #endif // x86_64 or not +#elif defined(_WIN32) + LARGE_INTEGER qwTicksPerSec; + QueryPerformanceFrequency(&qwTicksPerSec); + secondsPerTick_val = 1.0/static_cast(qwTicksPerSec.QuadPart); +#else + FILE *fp = fopen("/proc/cpuinfo","r"); + char input[1024]; + if (!fp) { + fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo."); + exit(-1); + } + // In case we don't find it, e.g. on the N900 + secondsPerTick_val = 1e-9; + while (!feof(fp) && fgets(input, 1024, fp)) { + // NOTE(boulos): Because reading cpuinfo depends on dynamic + // frequency scaling it's better to read the @ sign first + float GHz, MHz; + if (strstr(input, "model name")) { + char* at_sign = strstr(input, "@"); + if (at_sign) { + char* after_at = at_sign + 1; + char* GHz_str = strstr(after_at, "GHz"); + char* MHz_str = strstr(after_at, "MHz"); + if (GHz_str) { + *GHz_str = '\0'; + if (1 == sscanf(after_at, "%f", &GHz)) { + //printf("GHz = %f\n", GHz); + secondsPerTick_val = 1e-9f / GHz; + break; + } + } else if (MHz_str) { + *MHz_str = '\0'; + if (1 == sscanf(after_at, "%f", &MHz)) { + //printf("MHz = %f\n", MHz); + secondsPerTick_val = 1e-6f / GHz; + break; + } + } + } + } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) { + //printf("MHz = %f\n", MHz); + secondsPerTick_val = 1e-6f / MHz; + break; + } + } + fclose(fp); +#endif + + initialized = true; + return secondsPerTick_val; + } + + ////////// + // Return the conversion from ticks to milliseconds. + static double msPerTick() { + return secondsPerTick() * 1000.0; + } + + private: + CycleTimer(); + }; + +#endif // #ifndef _SYRAH_CYCLE_TIMER_H_