Add clock() function to standard library.

Also corrected the declaration of num_cores() to return a
uniform value.
This commit is contained in:
Matt Pharr
2012-01-22 13:05:27 -08:00
parent 1f0f2ec05f
commit d805e8b183
3 changed files with 39 additions and 4 deletions

View File

@@ -1811,6 +1811,22 @@ ok:
ret void
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; read hw clock
define i64 @__clock() nounwind uwtable ssp {
entry:
tail call void asm sideeffect "xorl %eax,%eax \0A cpuid", "~{rax},~{rbx},~{rcx},~{rdx},~{dirflag},~{fpsr},~{flags}"() nounwind
%0 = tail call { i32, i32 } asm sideeffect "rdtsc", "={ax},={dx},~{dirflag},~{fpsr},~{flags}"() nounwind
%asmresult = extractvalue { i32, i32 } %0, 0
%asmresult1 = extractvalue { i32, i32 } %0, 1
%conv = zext i32 %asmresult1 to i64
%shl = shl nuw i64 %conv, 32
%conv2 = zext i32 %asmresult to i64
%or = or i64 %shl, %conv2
ret i64 %or
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; stdlib transcendentals
;;

View File

@@ -3444,12 +3444,27 @@ pointer types.
System Information
------------------
A routine is available to find the number of CPU cores available in the
system:
The value of a high-precision hardware clock counter is returned by the
``clock()`` routine; its value increments by one each processor cycle.
Thus, taking the difference between the values returned by ``clock()`` and
different points in program execution gives the number of cycles between
those points in the program.
::
int num_cores()
uniform int64 clock()
Note that ``clock()`` flushes the processor pipeline. It has an overhead
of a hundred or so cycles, so for very fine-grained measurements, it may be
worthwhile to measure the cost of calling ``clock()`` and subtracting that
value from reported results.
A routine is also available to find the number of CPU cores available in
the system:
::
uniform int num_cores()
This value can be useful for adapting the granularity of parallel task
decomposition depending on the number of processors in the system.

View File

@@ -787,10 +787,14 @@ packed_store_active(uniform int * uniform a, int vals) {
///////////////////////////////////////////////////////////////////////////
// System information
static inline int num_cores() {
static inline uniform int num_cores() {
return __num_cores();
}
static inline uniform int64 clock() {
return __clock();
}
///////////////////////////////////////////////////////////////////////////
// Atomics and memory barriers