Finish support for 64-bit types in stdlib. Fixes issue #14.

Add much more suppport for doubles and in64 types in the standard library, basically supporting everything for them that are supported for floats and int32s. (The notable exceptions being the approximate rcp() and rsqrt() functions, which don't really have sensible analogs for doubles (or at least not built-in instructions).)
2011-07-07 13:25:55 +01:00
parent f1aaf0115e
commit 5a53a43ed0
49 changed files with 1727 additions and 128 deletions
--- a/stdlib-sse2.ll
+++ b/stdlib-sse2.ll
@@ -152,6 +152,40 @@ define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinlin
  ret float %binop.i
 }

+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; rounding doubles
+
+declare double @round(double)
+declare double @floor(double)
+declare double @ceil(double)
+
+define internal <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
+  unary1to4(double, @round)
+}
+
+define internal double @__round_uniform_double(double) nounwind readonly alwaysinline {
+  %r = call double @round(double %0)
+  ret double %r
+}
+
+define internal <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
+  unary1to4(double, @floor)
+}
+
+define internal double @__floor_uniform_double(double) nounwind readonly alwaysinline {
+  %r = call double @floor(double %0)
+  ret double %r
+}
+
+define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
+  unary1to4(double, @ceil)
+}
+
+define internal double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
+  %r = call double @ceil(double %0)
+  ret double %r
+}
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; min/max

@@ -252,7 +286,7 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
 ; it does generate non-POPCNT code and in particular better code than
 ; the below does.)

-define internal i32 @__popcnt(i32) nounwind readonly alwaysinline {
+define internal i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
 entry:
  br label %loop

@@ -269,6 +303,16 @@ exit:
  ret i32 %newcount
 }

+define internal i32 @__popcnt_int64(i64) nounwind readnone alwaysinline {
+  %vec = bitcast i64 %0 to <2 x i32>
+  %v0 = extractelement <2 x i32> %vec, i32 0
+  %v1 = extractelement <2 x i32> %vec, i32 1
+  %c0 = call i32 @__popcnt_int32(i32 %v0)
+  %c1 = call i32 @__popcnt_int32(i32 %v1)
+  %sum = add i32 %c0, %c1
+  ret i32 %sum
+}
+

 define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
  %v1 = shufflevector <4 x float> %v, <4 x float> undef,