Scalar target builtins bugfixes.

Typo in __max_varying_double. Add declarations for half functions. Use the gen_scatter macro to get the scatter functions.
2012-01-29 13:47:44 -08:00
parent b50f6f1730
commit f2fbc168af
1 changed files with 13 additions and 84 deletions
--- a/builtins/target-generic-1.ll
+++ b/builtins/target-generic-1.ll
@@ -41,89 +41,10 @@ gen_gather(1, i16)
 gen_gather(1, i32)
 gen_gather(1, i64)

-define  void @__scatter_elt_i8(i8 * %base, <1 x i32> %offsets, <1 x i8> %values,
-                                       i32 %lane) nounwind alwaysinline {
-  %offset32 = extractelement <1 x i32> %offsets, i32 %lane
-;  %offset64 = zext i32 %offset32 to i64
-;  %ptrdelta = add i64 %ptr64, %offset64
-;  %ptr = inttoptr i64 %ptrdelta to i8 *
-  %ptroffset = getelementptr i8 *%base, i32 %offset32
-  %ptr = bitcast i8 * %ptroffset to i8 *
-  %storeval = extractelement <1 x i8> %values, i32 %lane
-  store i8 %storeval, i8 * %ptr
-  ret void
-}
-
-define void @__scatter_base_offsets_i8(i8* %base, <1 x i32> %offsets, <1 x i8> %values,
-                                       <1 x i32> %mask) nounwind alwaysinline {
-  ;; And use the `per_lane' macro to do all of the per-lane work for scatter...
-  ;%ptr64 = ptrtoint i8 * %base to i64
-  call void @__scatter_elt_i8(i8 *%base, <1 x i32> %offsets, <1 x i8> %values, i32 0)
-  ret void
-}
-
-define  void @__scatter_elt_i16(i8 * %base, <1 x i32> %offsets, <1 x i16> %values,
-                                       i32 %lane) nounwind alwaysinline {
-  %offset32 = extractelement <1 x i32> %offsets, i32 %lane
-;  %offset64 = zext i32 %offset32 to i64
-;  %ptrdelta = add i64 %ptr64, %offset64
-;  %ptr = inttoptr i64 %ptrdelta to i16 *
-  %ptroffset = getelementptr i8 *%base, i32 %offset32
-  %ptr = bitcast i8 * %ptroffset to i16 *
-  %storeval = extractelement <1 x i16> %values, i32 %lane
-  store i16 %storeval, i16 * %ptr
-  ret void
-}
-
-define void @__scatter_base_offsets_i16(i8* %base, <1 x i32> %offsets, <1 x i16> %values,
-                                       <1 x i32> %mask) nounwind alwaysinline {
-  ;; And use the `per_lane' macro to do all of the per-lane work for scatter...
-  ;%ptr64 = ptrtoint i8 * %base to i64
-  call void @__scatter_elt_i16(i8 *%base, <1 x i32> %offsets, <1 x i16> %values, i32 0)
-  ret void
-}
-
-define  void @__scatter_elt_i32(i8 * %base, <1 x i32> %offsets, <1 x i32> %values,
-                                       i32 %lane) nounwind alwaysinline {
-  %offset32 = extractelement <1 x i32> %offsets, i32 %lane
-;  %offset64 = zext i32 %offset32 to i64
-;  %ptrdelta = add i64 %ptr64, %offset64
-;  %ptr = inttoptr i64 %ptrdelta to i32 *
-  %ptroffset = getelementptr i8 *%base, i32 %offset32
-  %ptr = bitcast i8 * %ptroffset to i32 *
-  %storeval = extractelement <1 x i32> %values, i32 %lane
-  store i32 %storeval, i32 * %ptr
-  ret void
-}
-
-define void @__scatter_base_offsets_i32(i8* %base, <1 x i32> %offsets, <1 x i32> %values,
-                                       <1 x i32> %mask) nounwind alwaysinline {
-  ;; And use the `per_lane' macro to do all of the per-lane work for scatter...
-  ;%ptr64 = ptrtoint i8 * %base to i64
-  call void @__scatter_elt_i32(i8 *%base, <1 x i32> %offsets, <1 x i32> %values, i32 0)
-  ret void
-}
-
-define  void @__scatter_elt_i64(i8 * %base, <1 x i32> %offsets, <1 x i64> %values,
-                                       i32 %lane) nounwind alwaysinline {
-  %offset32 = extractelement <1 x i32> %offsets, i32 %lane
-;  %offset64 = zext i32 %offset32 to i64
-;  %ptrdelta = add i64 %ptr64, %offset64
-;  %ptr = inttoptr i64 %ptrdelta to i64 *
-  %ptroffset = getelementptr i8 *%base, i32 %offset32
-  %ptr = bitcast i8 * %ptroffset to i64 *
-  %storeval = extractelement <1 x i64> %values, i32 %lane
-  store i64 %storeval, i64 * %ptr
-  ret void
-}
-
-define void @__scatter_base_offsets_i64(i8* %base, <1 x i32> %offsets, <1 x i64> %values,
-                                       <1 x i32> %mask) nounwind alwaysinline {
-  ;; And use the `per_lane' macro to do all of the per-lane work for scatter...
-  ;%ptr64 = ptrtoint i8 * %base to i64
-  call void @__scatter_elt_i64(i8 *%base, <1 x i32> %offsets, <1 x i64> %values, i32 0)
-  ret void
-}
+gen_scatter(1, i8)
+gen_scatter(1, i16)
+gen_scatter(1, i32)
+gen_scatter(1, i64)


 define  <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
@@ -854,7 +775,7 @@ define  <1 x double> @__max_varying_double(<1 x double>, <1 x double>) nounwind
  ;ret <1 x double> %ret
  %a = extractelement <1 x double> %0, i32 0
  %b = extractelement <1 x double> %1, i32 0
-  %d = fcmp olt double %a, %b  
+  %d = fcmp ogt double %a, %b  
  %r = select i1 %d, double %a, double %b
  %rv = insertelement <1 x double> undef, double %r, i32 0
  ret <1 x double> %rv    
@@ -1004,3 +925,11 @@ ctlztz()

 define_prefetches()

+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; half conversion routines
+
+declare float @__half_to_float_uniform(i16 %v) nounwind readnone
+declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
+declare i16 @__float_to_half_uniform(float %v) nounwind readnone
+declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
+