diff --git a/stdlib.ispc b/stdlib.ispc
index 33c716c9..a8c52f08 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -49,236 +49,293 @@
 ///////////////////////////////////////////////////////////////////////////
 // Low level primitives
 
+__declspec(safe,cost0)
 static inline float floatbits(unsigned int a) {
     return __floatbits_varying_int32(a);
 }
 
+__declspec(safe,cost0)
 static inline uniform float floatbits(uniform unsigned int a) {
     return __floatbits_uniform_int32(a);
 }
 
+__declspec(safe,cost0)
 static inline float floatbits(int a) {
     return __floatbits_varying_int32(a);
 }
 
+__declspec(safe,cost0)
 static inline uniform float floatbits(uniform int a) {
     return __floatbits_uniform_int32(a);
 }
 
+__declspec(safe,cost0)
 static inline double doublebits(unsigned int64 a) {
     return __doublebits_varying_int64(a);
 }
 
+__declspec(safe,cost0)
 static inline uniform double doublebits(uniform unsigned int64 a) {
     return __doublebits_uniform_int64(a);
 }
 
+__declspec(safe,cost0)
 static inline unsigned int intbits(float a) {
     return __intbits_varying_float(a);
 }
 
+__declspec(safe,cost0)
 static inline uniform unsigned int intbits(uniform float a) {
     return __intbits_uniform_float(a);
 }
 
+__declspec(safe,cost0)
 static inline unsigned int64 intbits(double d) {
     return __intbits_varying_double(d);
 }
 
+__declspec(safe,cost0)
 static inline uniform unsigned int64 intbits(uniform double d) {
     return __intbits_uniform_double(d);
 }
 
+__declspec(safe)
 static inline float broadcast(float v, uniform int i) {
     return __broadcast_float(v, i);
 }
 
+__declspec(safe)
 static inline int8 broadcast(int8 v, uniform int i) {
     return __broadcast_i8(v, i);
 }
 
+__declspec(safe)
 static inline int16 broadcast(int16 v, uniform int i) {
     return __broadcast_i16(v, i);
 }
 
+__declspec(safe) 
 static inline int32 broadcast(int32 v, uniform int i) {
     return __broadcast_i32(v, i);
 }
 
+__declspec(safe) 
 static inline double broadcast(double v, uniform int i) {
     return __broadcast_double(v, i);
 }
 
+__declspec(safe) 
 static inline int64 broadcast(int64 v, uniform int i) {
     return __broadcast_i64(v, i);
 }
 
+__declspec(safe) 
 static inline float rotate(float v, uniform int i) {
     return __rotate_float(v, i);
 }
 
+__declspec(safe) 
 static inline int8 rotate(int8 v, uniform int i) {
     return __rotate_i8(v, i);
 }
 
+__declspec(safe) 
 static inline int16 rotate(int16 v, uniform int i) {
     return __rotate_i16(v, i);
 }
 
+__declspec(safe) 
 static inline int32 rotate(int32 v, uniform int i) {
     return __rotate_i32(v, i);
 }
 
+__declspec(safe) 
 static inline double rotate(double v, uniform int i) {
     return __rotate_double(v, i);
 }
 
+__declspec(safe) 
 static inline int64 rotate(int64 v, uniform int i) {
     return __rotate_i64(v, i);
 }
 
+__declspec(safe) 
 static inline float shuffle(float v, int i) {
     return __shuffle_float(v, i);
 }
 
+__declspec(safe) 
 static inline int8 shuffle(int8 v, int i) {
     return __shuffle_i8(v, i);
 }
 
+__declspec(safe) 
 static inline int16 shuffle(int16 v, int i) {
     return __shuffle_i16(v, i);
 }
 
+__declspec(safe) 
 static inline int32 shuffle(int32 v, int i) {
     return __shuffle_i32(v, i);
 }
 
+__declspec(safe) 
 static inline double shuffle(double v, int i) {
     return __shuffle_double(v, i);
 }
 
+__declspec(safe) 
 static inline int64 shuffle(int64 v, int i) {
     return __shuffle_i64(v, i);
 }
 
+__declspec(safe) 
 static inline float shuffle(float v0, float v1, int i) {
     return __shuffle2_float(v0, v1, i);
 }
 
+__declspec(safe) 
 static inline int8 shuffle(int8 v0, int8 v1, int i) {
     return __shuffle2_i8(v0, v1, i);
 }
 
+__declspec(safe) 
 static inline int16 shuffle(int16 v0, int16 v1, int i) {
     return __shuffle2_i16(v0, v1, i);
 }
 
+__declspec(safe) 
 static inline int32 shuffle(int32 v0, int32 v1, int i) {
     return __shuffle2_i32(v0, v1, i);
 }
 
+__declspec(safe) 
 static inline double shuffle(double v0, double v1, int i) {
     return __shuffle2_double(v0, v1, i);
 }
 
+__declspec(safe) 
 static inline int64 shuffle(int64 v0, int64 v1, int i) {
     return __shuffle2_i64(v0, v1, i);
 }
 
 // x[i]
+__declspec(safe,cost1) 
 static inline uniform float extract(float x, uniform int i) {
     return floatbits(__extract_int32((int)intbits(x), i));
 }
 
+__declspec(safe,cost1) 
 static inline uniform int8 extract(int8 x, uniform int i) {
     return __extract_int8(x, i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int8 extract(unsigned int8 x, uniform int i) {
     return __extract_int8(x, (unsigned int)i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int16 extract(int16 x, uniform int i) {
     return __extract_int16(x, i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int16 extract(unsigned int16 x, uniform int i) {
     return __extract_int16(x, (unsigned int)i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int32 extract(int32 x, uniform int i) {
     return __extract_int32(x, i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int32 extract(unsigned int32 x, uniform int i) {
     return __extract_int32(x, (unsigned int)i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform double extract(double x, uniform int i) {
     return doublebits(__extract_int64((int64)intbits(x), i));
 }
 
+__declspec(safe,cost1) 
 static inline uniform int64 extract(int64 x, uniform int i) {
     return __extract_int64(x, i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int64 extract(unsigned int64 x, uniform int i) {
     return __extract_int64(x, (unsigned int)i);
 }
 
 // x[i] = v
+__declspec(safe,cost1) 
 static inline float insert(float x, uniform int i, uniform float v) {
     return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v)));
 }
 
+__declspec(safe,cost1) 
 static inline int8 insert(int8 x, uniform int i, uniform int8 v) {
     return __insert_int8(x, i, v);
 }
 
+__declspec(safe,cost1) 
 static inline unsigned int8 insert(unsigned int8 x, uniform int i, 
                                     uniform unsigned int8 v) {
     return __insert_int8(x, (unsigned int)i, v);
 }
 
+__declspec(safe,cost1) 
 static inline int16 insert(int16 x, uniform int i, uniform int16 v) {
     return __insert_int16(x, i, v);
 }
 
+__declspec(safe,cost1) 
 static inline unsigned int16 insert(unsigned int16 x, uniform int i, 
                                     uniform unsigned int16 v) {
     return __insert_int16(x, (unsigned int)i, v);
 }
 
+__declspec(safe,cost1) 
 static inline int32 insert(int32 x, uniform int i, uniform int32 v) {
     return __insert_int32(x, i, v);
 }
 
+__declspec(safe,cost1) 
 static inline unsigned int32 insert(unsigned int32 x, uniform int i, 
                                     uniform unsigned int32 v) {
     return __insert_int32(x, (unsigned int)i, v);
 }
 
+__declspec(safe,cost1) 
 static inline double insert(double x, uniform int i, uniform double v) {
     return doublebits(__insert_int64((int64)intbits(x), i, (int64)intbits(v)));
 }
 
+__declspec(safe,cost1) 
 static inline int64 insert(int64 x, uniform int i, uniform int64 v) {
     return __insert_int64(x, i, v);
 }
 
+__declspec(safe,cost1) 
 static inline unsigned int64 insert(unsigned int64 x, uniform int i, 
                                     uniform unsigned int64 v) {
     return __insert_int64(x, (unsigned int)i, v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int32 sign_extend(uniform bool v) {
     return __sext_uniform_bool(v);
 }
 
+__declspec(safe,cost1) 
 static inline int32 sign_extend(bool v) {
     return __sext_varying_bool(v);
 }
 
+__declspec(safe) 
 static inline uniform bool any(bool v) {
     // We only care about whether "any" is true for the active program instances,
     // so we have to make v with the current program mask.
@@ -289,6 +346,7 @@ static inline uniform bool any(bool v) {
 #endif
 }
 
+__declspec(safe) 
 static inline uniform bool all(bool v) {
     // As with any(), we need to explicitly mask v with the current program mask
     // so we're only looking at the current lanes
@@ -300,14 +358,17 @@ static inline uniform bool all(bool v) {
     return __movmsk(match) == (1 << programCount) - 1;
 }
 
+__declspec(safe) 
 static inline uniform int32 popcnt(uniform int32 v) {
     return __popcnt_int32(v);
 }
 
+__declspec(safe) 
 static inline uniform int popcnt(uniform int64 v) {
     return (int32)__popcnt_int64(v);
 }
 
+__declspec(safe) 
 static inline int popcnt(int v) {
     int r;
     for (uniform int i = 0; i < programCount; ++i)
@@ -315,6 +376,7 @@ static inline int popcnt(int v) {
     return __mask ? r : 0;
 }
 
+__declspec(safe) 
 static inline int popcnt(int64 v) {
     int r;
     for (uniform int i = 0; i < programCount; ++i)
@@ -322,6 +384,7 @@ static inline int popcnt(int64 v) {
     return __mask ? r : 0;
 }
 
+__declspec(safe) 
 static inline uniform int popcnt(bool v) {
     // As with any() and all(), only count across the active lanes
 #ifdef ISPC_TARGET_GENERIC
@@ -331,6 +394,7 @@ static inline uniform int popcnt(bool v) {
 #endif
 }
 
+__declspec(safe) 
 static inline uniform int lanemask() {
     return __movmsk(__mask);
 }
@@ -445,46 +509,55 @@ static inline void memset64(void * varying ptr, int8 val, int64 count) {
 ///////////////////////////////////////////////////////////////////////////
 // count leading/trailing zeros
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int32
 count_leading_zeros(uniform unsigned int32 v) {
     return __count_leading_zeros_i32(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int64
 count_leading_zeros(uniform unsigned int64 v) {
     return __count_leading_zeros_i64(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int32
 count_trailing_zeros(uniform unsigned int32 v) {
     return __count_trailing_zeros_i32(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int64
 count_trailing_zeros(uniform unsigned int64 v) {
     return __count_trailing_zeros_i64(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int32
 count_leading_zeros(uniform int32 v) {
     return __count_leading_zeros_i32(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int64
 count_leading_zeros(uniform int64 v) {
     return __count_leading_zeros_i64(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int32
 count_trailing_zeros(uniform int32 v) {
     return __count_trailing_zeros_i32(v);
 }
 
+__declspec(safe,cost1) 
 static inline uniform int64
 count_trailing_zeros(uniform int64 v) {
     return __count_trailing_zeros_i64(v);
 }
 
+__declspec(safe) 
 static inline unsigned int32
 count_leading_zeros(unsigned int32 v) {
     unsigned int32 r;
@@ -493,6 +566,7 @@ count_leading_zeros(unsigned int32 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline unsigned int64
 count_leading_zeros(unsigned int64 v) {
     unsigned int64 r;
@@ -501,6 +575,7 @@ count_leading_zeros(unsigned int64 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline unsigned int32
 count_trailing_zeros(unsigned int32 v) {
     unsigned int32 r;
@@ -509,6 +584,7 @@ count_trailing_zeros(unsigned int32 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline unsigned int64
 count_trailing_zeros(unsigned int64 v) {
     unsigned int64 r;
@@ -517,6 +593,7 @@ count_trailing_zeros(unsigned int64 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline int32
 count_leading_zeros(int32 v) {
     int32 r;
@@ -525,6 +602,7 @@ count_leading_zeros(int32 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline int64
 count_leading_zeros(int64 v) {
     int64 r;
@@ -533,6 +611,7 @@ count_leading_zeros(int64 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline int32
 count_trailing_zeros(int32 v) {
     int32 r;
@@ -541,6 +620,7 @@ count_trailing_zeros(int32 v) {
     return r;
 }
 
+__declspec(safe) 
 static inline int64
 count_trailing_zeros(int64 v) {
     int64 r;
@@ -606,18 +686,22 @@ soa_to_aos4(int32 v0, int32 v1, int32 v2, int32 v3, uniform int32 a[]) {
 ///////////////////////////////////////////////////////////////////////////
 // Prefetching
 
+__declspec(safe,cost1) 
 static inline void prefetch_l1(const void * uniform ptr) {
     __prefetch_read_uniform_1((uniform int8 * uniform)ptr);
 }
 
+__declspec(safe,cost1) 
 static inline void prefetch_l2(const void * uniform ptr) {
     __prefetch_read_uniform_2((uniform int8 * uniform)ptr);
 }
 
+__declspec(safe,cost1) 
 static inline void prefetch_l3(const void * uniform ptr) {
     __prefetch_read_uniform_3((uniform int8 * uniform)ptr);
 }
 
+__declspec(safe,cost1) 
 static inline void prefetch_nt(const void * uniform ptr) {
      __prefetch_read_uniform_nt((uniform int8 * uniform)ptr);
 }
@@ -665,12 +749,14 @@ static inline void prefetch_nt(const void * varying ptr) {
 ///////////////////////////////////////////////////////////////////////////
 // Horizontal ops / reductions
 
+__declspec(safe) 
 static inline uniform float reduce_add(float x) {
     // zero the lanes where the mask is off
     return __reduce_add_float(__mask ? x : 0.);
 }
 
 
+__declspec(safe) 
 static inline uniform float reduce_min(float v) {
     // For the lanes where the mask is off, replace the given value with
     // infinity, so that it doesn't affect the result.
@@ -680,6 +766,7 @@ static inline uniform float reduce_min(float v) {
     return __reduce_min_float(__mask ? v : __floatbits_varying_int32(iflt_max));
 }
 
+__declspec(safe) 
 static inline uniform float reduce_max(float v) {
     // For the lanes where the mask is off, replace the given value with
     // negative infinity, so that it doesn't affect the result.
@@ -689,11 +776,13 @@ static inline uniform float reduce_max(float v) {
     return __reduce_max_float(__mask ? v : __floatbits_varying_int32(iflt_neg_max));
 }
 
+__declspec(safe) 
 static inline uniform int reduce_add(int x) {
     // Zero out the values for lanes that aren't running
     return __reduce_add_int32(__mask ? x : 0);
 }
 
+__declspec(safe) 
 static inline uniform int reduce_min(int v) {
     // Set values for non-running lanes to the maximum integer value so
     // they don't affect the result.
@@ -701,6 +790,7 @@ static inline uniform int reduce_min(int v) {
     return __reduce_min_int32(__mask ? v : int_max);
 }
 
+__declspec(safe) 
 static inline uniform int reduce_max(int v) {
     // Set values for non-running lanes to the minimum integer value so
     // they don't affect the result.
@@ -708,12 +798,14 @@ static inline uniform int reduce_max(int v) {
     return __reduce_max_int32(__mask ? v : int_min);
 }
 
+__declspec(safe) 
 static inline uniform unsigned int reduce_add(unsigned int x) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
     return __reduce_add_uint32(__mask ? x : 0);
 }
 
+__declspec(safe) 
 static inline uniform unsigned int reduce_min(unsigned int v) {
     // Set values for non-running lanes to the maximum unsigned integer
     // value so they don't affect the result.
@@ -721,18 +813,20 @@ static inline uniform unsigned int reduce_min(unsigned int v) {
     return __reduce_min_uint32(__mask ? v : uint_max);
 }
 
+__declspec(safe) 
 static inline uniform unsigned int reduce_max(unsigned int v) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
     return __reduce_max_uint32(__mask ? v : 0);
 }
 
-
+__declspec(safe) 
 static inline uniform double reduce_add(double x) {
     // zero the lanes where the mask is off
     return __reduce_add_double(__mask ? x : 0.);
 }
 
+__declspec(safe) 
 static inline uniform double reduce_min(double v) {
     int64 iflt_max = 0x7ff0000000000000; // infinity
     // Must use __doublebits_varying_int64, not doublebits(), since with the
@@ -740,6 +834,7 @@ static inline uniform double reduce_min(double v) {
     return __reduce_min_double(__mask ? v : __doublebits_varying_int64(iflt_max));
 }
 
+__declspec(safe) 
 static inline uniform double reduce_max(double v) {
     const int64 iflt_neg_max = 0xfff0000000000000; // -infinity
     // Must use __doublebits_varying_int64, not doublebits(), since with the
@@ -747,11 +842,13 @@ static inline uniform double reduce_max(double v) {
     return __reduce_max_double(__mask ? v : __doublebits_varying_int64(iflt_neg_max));
 }
 
+__declspec(safe) 
 static inline uniform int64 reduce_add(int64 x) {
     // Zero out the values for lanes that aren't running
     return __reduce_add_int64(__mask ? x : 0);
 }
 
+__declspec(safe) 
 static inline uniform int64 reduce_min(int64 v) {
     // Set values for non-running lanes to the maximum integer value so
     // they don't affect the result.
@@ -759,6 +856,7 @@ static inline uniform int64 reduce_min(int64 v) {
     return __reduce_min_int64(__mask ? v : int_max);
 }
 
+__declspec(safe) 
 static inline uniform int64 reduce_max(int64 v) {
     // Set values for non-running lanes to the minimum integer value so
     // they don't affect the result.
@@ -766,12 +864,14 @@ static inline uniform int64 reduce_max(int64 v) {
     return __reduce_max_int64(__mask ? v : int_min);
 }
 
+__declspec(safe) 
 static inline uniform unsigned int64 reduce_add(unsigned int64 x) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
     return __reduce_add_int64(__mask ? x : 0);
 }
 
+__declspec(safe) 
 static inline uniform unsigned int64 reduce_min(unsigned int64 v) {
     // Set values for non-running lanes to the maximum unsigned integer
     // value so they don't affect the result.
@@ -779,6 +879,7 @@ static inline uniform unsigned int64 reduce_min(unsigned int64 v) {
     return __reduce_min_uint64(__mask ? v : uint_max);
 }
 
+__declspec(safe) 
 static inline uniform unsigned int64 reduce_max(unsigned int64 v) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
@@ -786,10 +887,12 @@ static inline uniform unsigned int64 reduce_max(unsigned int64 v) {
 }
 
 #define REDUCE_EQUAL(TYPE, FUNCTYPE, MASKTYPE)                     \
+__declspec(safe)                                                   \
 static inline uniform bool reduce_equal(TYPE v) {                  \
     uniform TYPE unusedValue;                                      \
     return __reduce_equal_##FUNCTYPE(v, &unusedValue, (MASKTYPE)__mask); \
 }                                                                  \
+__declspec(safe)                                                   \
 static inline uniform bool reduce_equal(TYPE v, uniform TYPE * uniform value) { \
     return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask);       \
 }
@@ -889,6 +992,7 @@ static inline uniform int num_cores() {
     return __num_cores();
 }
 
+__declspec(safe) 
 static inline uniform int64 clock() {
     return __clock();
 }
@@ -896,6 +1000,7 @@ static inline uniform int64 clock() {
 ///////////////////////////////////////////////////////////////////////////
 // Floating-Point Math
 
+__declspec(safe,cost1) 
 static inline float abs(float a) {
     // Floating-point hack: zeroing the high bit clears the sign
     unsigned int i = intbits(a);
@@ -903,12 +1008,14 @@ static inline float abs(float a) {
     return floatbits(i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform float abs(uniform float a) {
     uniform unsigned int i = intbits(a);
     i &= 0x7fffffff;
     return floatbits(i);
 }
 
+__declspec(safe,cost1) 
 static inline double abs(double a) {
     // zeroing the high bit clears the sign
     unsigned int64 i = intbits(a);
@@ -916,84 +1023,103 @@ static inline double abs(double a) {
     return doublebits(i);
 }
 
+__declspec(safe,cost1) 
 static inline uniform double abs(uniform double a) {
     uniform unsigned int64 i = intbits(a);
     i &= 0x7fffffffffffffff;
     return doublebits(i);
 }
 
+__declspec(safe,cost1) 
 static inline unsigned int signbits(float x) {
     unsigned int i = intbits(x);
     return (i & 0x80000000);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int signbits(uniform float x) {
     uniform unsigned int i = intbits(x);
     return (i & 0x80000000);
 }
 
+__declspec(safe,cost1) 
 static inline unsigned int64 signbits(double x) {
     unsigned int64 i = intbits(x);
     return (i & 0x8000000000000000);
 }
 
+__declspec(safe,cost1) 
 static inline uniform unsigned int64 signbits(uniform double x) {
     uniform unsigned int64 i = intbits(x);
     return (i & 0x8000000000000000);
 }
 
+__declspec(safe,cost2) 
 static inline float round(float x) {
     return __round_varying_float(x);
 }
 
+__declspec(safe,cost2) 
 static inline uniform float round(uniform float x) {
     return __round_uniform_float(x);
 }
 
+__declspec(safe,cost2) 
 static inline double round(double x) {
     return __round_varying_double(x);
 }
 
+__declspec(safe,cost2) 
 static inline uniform double round(uniform double x) {
     return __round_uniform_double(x);
 }
 
+__declspec(safe,cost2) 
 static inline float floor(float x) {
     return __floor_varying_float(x);
 }
 
+__declspec(safe,cost2) 
 static inline uniform float floor(uniform float x) {
     return __floor_uniform_float(x);
 }
 
+__declspec(safe,cost2) 
 static inline double floor(double x) {
     return __floor_varying_double(x);
 }
 
+__declspec(safe,cost2) 
 static inline uniform double floor(uniform double x) {
     return __floor_uniform_double(x);
 }
 
+__declspec(safe,cost2) 
 static inline float ceil(float x) {
     return __ceil_varying_float(x);
 }
 
+__declspec(safe,cost2) 
 static inline uniform float ceil(uniform float x) {
     return __ceil_uniform_float(x);
 }
 
+__declspec(safe,cost2) 
 static inline double ceil(double x) {
     return __ceil_varying_double(x);
 }
 
+__declspec(safe,cost2) 
 static inline uniform double ceil(uniform double x) {
     return __ceil_uniform_double(x);
 }
 
+__declspec(safe) 
 static inline float rcp(float v) {
     return __rcp_varying_float(v);
 }
 
+__declspec(safe) 
 static inline uniform float rcp(uniform float v) {
     return __rcp_uniform_float(v);
 }
@@ -1003,18 +1129,22 @@ static inline uniform float rcp(uniform float v) {
 
 // float
 
+__declspec(safe,cost1) 
 static inline float min(float a, float b) {
     return __min_varying_float(a, b);
 }
 
+__declspec(safe,cost1) 
 static inline uniform float min(uniform float a, uniform float b) {
     return __min_uniform_float(a, b);
 }
 
+__declspec(safe,cost1) 
 static inline float max(float a, float b) {
     return __max_varying_float(a, b);
 }
 
+__declspec(safe,cost1) 
 static inline uniform float max(uniform float a, uniform float b) {
     return __max_uniform_float(a, b);
 }
@@ -1022,158 +1152,194 @@ static inline uniform float max(uniform float a, uniform float b) {
 
 // double
 
+__declspec(safe) 
 static inline double min(double a, double b) {
     return __min_varying_double(a, b);
 }
 
+__declspec(safe) 
 static inline uniform double min(uniform double a, uniform double b) {
     return __min_uniform_double(a, b);
 }
 
+__declspec(safe) 
 static inline double max(double a, double b) {
     return __max_varying_double(a, b);
 }
 
+__declspec(safe) 
 static inline uniform double max(uniform double a, uniform double b) {
     return __max_uniform_double(a, b);
 }
 
 // int8
 
+__declspec(safe,cost2) 
 static inline uniform unsigned int8 min(uniform unsigned int8 a,
                                         uniform unsigned int8 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2) 
 static inline uniform unsigned int8 max(uniform unsigned int8 a, 
                                         uniform unsigned int8 b) {
     return (a > b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline uniform int8 min(uniform int8 a, uniform int8 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline uniform int8 max(uniform int8 a, uniform int8 b) {
     return (a > b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline unsigned int8 min(unsigned int8 a, unsigned int8 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline unsigned int8 max(unsigned int8 a, unsigned int8 b) {
     return (a > b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline int8 min(int8 a, int8 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline int8 max(int8 a, int8 b) {
     return (a > b) ? a : b;
 }
 
 // int16
 
+__declspec(safe,cost2)
 static inline uniform unsigned int16 min(uniform unsigned int16 a, 
                                          uniform unsigned int16 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline uniform unsigned int16 max(uniform unsigned int16 a, 
                                          uniform unsigned int16 b) {
     return (a > b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline uniform int16 min(uniform int16 a, uniform int16 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline uniform int16 max(uniform int16 a, uniform int16 b) {
     return (a > b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline unsigned int16 min(unsigned int16 a, unsigned int16 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline unsigned int16 max(unsigned int16 a, unsigned int16 b) {
     return (a > b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline int16 min(int16 a, int16 b) {
     return (a < b) ? a : b;
 }
 
+__declspec(safe,cost2)
 static inline int16 max(int16 a, int16 b) {
     return (a > b) ? a : b;
 }
 
 // int32
 
+__declspec(safe,cost1)
 static inline unsigned int min(unsigned int a, unsigned int b) {
     return __min_varying_uint32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform unsigned int min(uniform unsigned int a, uniform unsigned int b) {
     return __min_uniform_uint32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline unsigned int max(unsigned int a, unsigned int b) {
     return __max_varying_uint32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform unsigned int max(uniform unsigned int a, uniform unsigned int b) {
     return __max_uniform_uint32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline int min(int a, int b) {
     return __min_varying_int32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform int min(uniform int a, uniform int b) {
     return __min_uniform_int32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline int max(int a, int b) {
     return __max_varying_int32(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform int max(uniform int a, uniform int b) {
     return __max_uniform_int32(a, b);
 }
 
 // int64
 
+__declspec(safe,cost1)
 static inline unsigned int64 min(unsigned int64 a, unsigned int64 b) {
     return __min_varying_uint64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform unsigned int64 min(uniform unsigned int64 a, uniform unsigned int64 b) {
     return __min_uniform_uint64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline unsigned int64 max(unsigned int64 a, unsigned int64 b) {
     return __max_varying_uint64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform unsigned int64 max(uniform unsigned int64 a, uniform unsigned int64 b) {
     return __max_uniform_uint64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline int64 min(int64 a, int64 b) {
     return __min_varying_int64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform int64 min(uniform int64 a, uniform int64 b) {
     return __min_uniform_int64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline int64 max(int64 a, int64 b) {
     return __max_varying_int64(a, b);
 }
 
+__declspec(safe,cost1)
 static inline uniform int64 max(uniform int64 a, uniform int64 b) {
     return __max_uniform_int64(a, b);
 }
@@ -1183,31 +1349,37 @@ static inline uniform int64 max(uniform int64 a, uniform int64 b) {
 
 // float
 
+__declspec(safe,cost2)
 static inline float clamp(float v, float low, float high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform float clamp(uniform float v, uniform float low, uniform float high) {
     return min(max(v, low), high);
 }
 
 // int8
 
+__declspec(safe,cost2)
 static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low, 
                                    unsigned int8 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform unsigned int8 clamp(uniform unsigned int8 v, 
                                            uniform unsigned int8 low, 
                                            uniform unsigned int8 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline int8 clamp(int8 v, int8 low, int8 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform int8 clamp(uniform int8 v, uniform int8 low, 
                                   uniform int8 high) {
     return min(max(v, low), high);
@@ -1215,21 +1387,25 @@ static inline uniform int8 clamp(uniform int8 v, uniform int8 low,
 
 // int16
 
+__declspec(safe,cost2)
 static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low, 
                                    unsigned int16 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform unsigned int16 clamp(uniform unsigned int16 v, 
                                            uniform unsigned int16 low, 
                                            uniform unsigned int16 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline int16 clamp(int16 v, int16 low, int16 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform int16 clamp(uniform int16 v, uniform int16 low, 
                                   uniform int16 high) {
     return min(max(v, low), high);
@@ -1237,40 +1413,48 @@ static inline uniform int16 clamp(uniform int16 v, uniform int16 low,
 
 // int32
 
+__declspec(safe,cost2)
 static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigned int low, 
                                          uniform unsigned int high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline int clamp(int v, int low, int high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform int clamp(uniform int v, uniform int low, uniform int high) {
     return min(max(v, low), high);
 }
 
 // int64
 
+__declspec(safe,cost2)
 static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, 
                                    unsigned int64 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform unsigned int64 clamp(uniform unsigned int64 v, 
                                            uniform unsigned int64 low, 
                                            uniform unsigned int64 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline int64 clamp(int64 v, int64 low, int64 high) {
     return min(max(v, low), high);
 }
 
+__declspec(safe,cost2)
 static inline uniform int64 clamp(uniform int64 v, uniform int64 low, 
                                   uniform int64 high) {
     return min(max(v, low), high);
@@ -1668,22 +1852,27 @@ LOCAL_CMPXCHG(double)
 ///////////////////////////////////////////////////////////////////////////
 // Transcendentals (float precision)
 
+__declspec(safe)
 static inline float sqrt(float v) {
     return __sqrt_varying_float(v);
 }
 
+__declspec(safe)
 static inline uniform float sqrt(uniform float v) {
     return __sqrt_uniform_float(v);
 }
 
+__declspec(safe)
 static inline float rsqrt(float v) {
     return __rsqrt_varying_float(v);
 }
 
+__declspec(safe)
 static inline uniform float rsqrt(uniform float v) {
     return __rsqrt_uniform_float(v);
 }
 
+__declspec(safe)
 static inline float ldexp(float x, int n) {
     unsigned int ex = 0x7F800000u;
     unsigned int ix = intbits(x);
@@ -1694,6 +1883,7 @@ static inline float ldexp(float x, int n) {
     return floatbits(ix);
 }
 
+__declspec(safe)
 static inline uniform float ldexp(uniform float x, uniform int n) {
     uniform unsigned int ex = 0x7F800000u;
     uniform unsigned int ix = intbits(x);
@@ -1704,6 +1894,7 @@ static inline uniform float ldexp(uniform float x, uniform int n) {
     return floatbits(ix);
 }
 
+__declspec(safe)
 static inline float frexp(float x, varying int * uniform pw2) {
     unsigned int ex = 0x7F800000u;              // exponent mask
     unsigned int ix = intbits(x);
@@ -1714,6 +1905,7 @@ static inline float frexp(float x, varying int * uniform pw2) {
     return floatbits(ix);
 }
 
+__declspec(safe)
 static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
     uniform unsigned int ex = 0x7F800000u;              // exponent mask
     uniform unsigned int ix = intbits(x);
@@ -1727,6 +1919,7 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
 // Most of the transcendental implementations in ispc code here come from
 // Solomon Boulos's "syrah": https://github.com/boulos/syrah/
 
+__declspec(safe)
 static inline float sin(float x_full) {
     if (__math_lib == __math_lib_svml) {
         return __svml_sin(x_full);
@@ -1788,6 +1981,7 @@ static inline float sin(float x_full) {
 }
 
 
+__declspec(safe)
 static inline uniform float sin(uniform float x_full) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -1853,6 +2047,7 @@ static inline uniform float sin(uniform float x_full) {
 }
 
 
+__declspec(safe)
 static inline float asin(float x) {
     bool isneg = x < 0;
     x = abs(x);
@@ -1909,6 +2104,7 @@ static inline float asin(float x) {
 }
 
 
+__declspec(safe)
 static inline uniform float asin(uniform float x) {
     uniform bool isneg = x < 0;
     x = abs(x);
@@ -1960,6 +2156,7 @@ static inline uniform float asin(uniform float x) {
 }
 
 
+__declspec(safe)
 static inline float cos(float x_full) {
     if (__math_lib == __math_lib_svml) {
         return __svml_cos(x_full);
@@ -2020,6 +2217,7 @@ static inline float cos(float x_full) {
 }
 
 
+__declspec(safe)
 static inline uniform float cos(uniform float x_full) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2084,16 +2282,19 @@ static inline uniform float cos(uniform float x_full) {
 }
 
 
+__declspec(safe)
 static inline float acos(float v) {
     return 1.57079637050628662109375 - asin(v);
 }
 
 
+__declspec(safe)
 static inline uniform float acos(uniform float v) {
     return 1.57079637050628662109375 - asin(v);
 }
 
 
+__declspec(safe)
 static inline void sincos(float x_full, varying float * uniform sin_result, 
                           varying float * uniform cos_result) {
     if (__math_lib == __math_lib_svml) {
@@ -2163,6 +2364,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
 }
 
 
+__declspec(safe)
 static inline void sincos(uniform float x_full, uniform float * uniform sin_result,
                           uniform float * uniform cos_result) {
     if (__math_lib == __math_lib_system ||
@@ -2225,6 +2427,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu
 }
 
 
+__declspec(safe)
 static inline float tan(float x_full) {
     if (__math_lib == __math_lib_svml) {
         return __svml_tan(x_full);
@@ -2303,6 +2506,7 @@ static inline float tan(float x_full) {
 }
 
 
+__declspec(safe)
 static inline uniform float tan(uniform float x_full) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2374,6 +2578,7 @@ static inline uniform float tan(uniform float x_full) {
 }
 
 
+__declspec(safe)
 static inline float atan(float x_full) {
     if (__math_lib == __math_lib_svml) {
         return __svml_atan(x_full);
@@ -2424,6 +2629,7 @@ static inline float atan(float x_full) {
 }
 
 
+__declspec(safe)
 static inline uniform float atan(uniform float x_full) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2467,6 +2673,7 @@ static inline uniform float atan(uniform float x_full) {
 }
 
 
+__declspec(safe)
 static inline float atan2(float y, float x) {
     if (__math_lib == __math_lib_svml) {
         return __svml_atan2(y, x);
@@ -2505,6 +2712,7 @@ static inline float atan2(float y, float x) {
 }
 
 
+__declspec(safe)
 static inline uniform float atan2(uniform float y, uniform float x) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2525,6 +2733,7 @@ static inline uniform float atan2(uniform float y, uniform float x) {
 }
 
 
+__declspec(safe)
 static inline float exp(float x_full) {
     if (__math_lib == __math_lib_svml) {
         return __svml_exp(x_full);
@@ -2603,6 +2812,7 @@ static inline float exp(float x_full) {
     }
 }
 
+__declspec(safe)
 static inline uniform float exp(uniform float x_full) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2677,6 +2887,7 @@ static inline uniform float exp(uniform float x_full) {
 // Range reduction for logarithms takes log(x) -> log(2^n * y) -> n
 // * log(2) + log(y) where y is the reduced range (usually in [1/2,
 // 1)).
+__declspec(safe)
 static inline void __range_reduce_log(float input, varying float * uniform reduced, 
                                       varying int * uniform exponent) {
     int int_version = intbits(input);
@@ -2707,6 +2918,7 @@ static inline void __range_reduce_log(float input, varying float * uniform reduc
 
 
 
+__declspec(safe)
 static inline void __range_reduce_log(uniform float input, uniform float * uniform reduced, 
                                       uniform int * uniform exponent) {
     uniform int int_version = intbits(input);
@@ -2722,6 +2934,7 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo
 }
 
 
+__declspec(safe)
 static inline float log(float x_full) {
     if (__math_lib == __math_lib_svml) {
         return __svml_log(x_full);
@@ -2809,6 +3022,7 @@ static inline float log(float x_full) {
     }
 }
 
+__declspec(safe)
 static inline uniform float log(uniform float x_full) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2889,6 +3103,7 @@ static inline uniform float log(uniform float x_full) {
     }
 }
 
+__declspec(safe)
 static inline float pow(float a, float b) {
     if (__math_lib == __math_lib_svml) {
         return __svml_pow(a, b);
@@ -2907,6 +3122,7 @@ static inline float pow(float a, float b) {
     }
 }
 
+__declspec(safe)
 static inline uniform float pow(uniform float a, uniform float b) {
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
@@ -2921,14 +3137,17 @@ static inline uniform float pow(uniform float a, uniform float b) {
 ///////////////////////////////////////////////////////////////////////////
 // Transcendentals (double precision)
 
+__declspec(safe)
 static inline double sqrt(double v) {
     return __sqrt_varying_double(v);
 }
 
+__declspec(safe)
 static inline uniform double sqrt(uniform double v) {
     return __sqrt_uniform_double(v);
 }
 
+__declspec(safe)
 static inline double ldexp(double x, int n) {
     unsigned int64 ex = 0x7ff0000000000000;
     unsigned int64 ix = intbits(x);
@@ -2939,6 +3158,7 @@ static inline double ldexp(double x, int n) {
     return doublebits(ix);
 }
 
+__declspec(safe)
 static inline uniform double ldexp(uniform double x, uniform int n) {
     uniform unsigned int64 ex = 0x7ff0000000000000;
     uniform unsigned int64 ix = intbits(x);
@@ -2949,6 +3169,7 @@ static inline uniform double ldexp(uniform double x, uniform int n) {
     return doublebits(ix);
 }
 
+__declspec(safe)
 static inline double frexp(double x, varying int * uniform pw2) {
     unsigned int64 ex = 0x7ff0000000000000;              // exponent mask
     unsigned int64 ix = intbits(x);
@@ -2959,6 +3180,7 @@ static inline double frexp(double x, varying int * uniform pw2) {
     return doublebits(ix);
 }
 
+__declspec(safe)
 static inline uniform double frexp(uniform double x, uniform int * uniform pw2) {
     uniform unsigned int64 ex = 0x7ff0000000000000;              // exponent mask
     uniform unsigned int64 ix = intbits(x);
@@ -2969,6 +3191,7 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2)
     return doublebits(ix);
 }
 
+__declspec(safe)
 static inline double sin(double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return sin((float)x);
@@ -2982,6 +3205,7 @@ static inline double sin(double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double sin(uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return sin((float)x);
@@ -2989,6 +3213,7 @@ static inline uniform double sin(uniform double x) {
         return __stdlib_sin(x);
 }
 
+__declspec(safe)
 static inline double cos(double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return cos((float)x);
@@ -3002,6 +3227,7 @@ static inline double cos(double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double cos(uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return cos((float)x);
@@ -3009,6 +3235,7 @@ static inline uniform double cos(uniform double x) {
         return __stdlib_cos(x);
 }
 
+__declspec(safe)
 static inline void sincos(double x, varying double * uniform sin_result,
                           varying double * uniform cos_result) {
     if (__math_lib == __math_lib_ispc_fast) {
@@ -3027,6 +3254,7 @@ static inline void sincos(double x, varying double * uniform sin_result,
     }
 }
 
+__declspec(safe)
 static inline void sincos(uniform double x, uniform double * uniform sin_result,
                           uniform double * uniform cos_result) {
     if (__math_lib == __math_lib_ispc_fast) {
@@ -3039,6 +3267,7 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
         __stdlib_sincos(x, sin_result, cos_result);
 }
 
+__declspec(safe)
 static inline double tan(double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return tan((float)x);
@@ -3052,6 +3281,7 @@ static inline double tan(double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double tan(uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return tan((float)x);
@@ -3059,6 +3289,7 @@ static inline uniform double tan(uniform double x) {
         return __stdlib_tan(x);
 }
 
+__declspec(safe)
 static inline double atan(double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return atan((float)x);
@@ -3072,6 +3303,7 @@ static inline double atan(double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double atan(uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return atan((float)x);
@@ -3079,6 +3311,7 @@ static inline uniform double atan(uniform double x) {
         return __stdlib_atan(x);
 }
 
+__declspec(safe)
 static inline double atan2(double y, double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return atan2((float)y, (float)x);
@@ -3092,6 +3325,7 @@ static inline double atan2(double y, double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double atan2(uniform double y, uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return atan2((float)y, (float)x);
@@ -3099,6 +3333,7 @@ static inline uniform double atan2(uniform double y, uniform double x) {
         return __stdlib_atan2(y, x);
 }
 
+__declspec(safe)
 static inline double exp(double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return exp((float)x);
@@ -3112,6 +3347,7 @@ static inline double exp(double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double exp(uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return exp((float)x);
@@ -3119,6 +3355,7 @@ static inline uniform double exp(uniform double x) {
         return __stdlib_exp(x);
 }
 
+__declspec(safe)
 static inline double log(double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return log((float)x);
@@ -3132,6 +3369,7 @@ static inline double log(double x) {
     }
 }
 
+__declspec(safe)
 static inline uniform double log(uniform double x) {
     if (__math_lib == __math_lib_ispc_fast)
         return log((float)x);
@@ -3139,6 +3377,7 @@ static inline uniform double log(uniform double x) {
         return __stdlib_log(x);
 }
 
+__declspec(safe)
 static inline double pow(double a, double b) {
     if (__math_lib == __math_lib_ispc_fast)
         return pow((float)a, (float)b);
@@ -3152,6 +3391,7 @@ static inline double pow(double a, double b) {
     }
 }
 
+__declspec(safe)
 static inline uniform double pow(uniform double a, uniform double b) {
     if (__math_lib == __math_lib_ispc_fast)
         return pow((float)a, (float)b);
@@ -3162,6 +3402,7 @@ static inline uniform double pow(uniform double a, uniform double b) {
 ///////////////////////////////////////////////////////////////////////////
 // half-precision floats
 
+__declspec(safe)
 static inline uniform float half_to_float(uniform unsigned int16 h) {
     if (__have_native_half) {
         return __half_to_float_uniform(h);
@@ -3224,6 +3465,7 @@ static inline uniform float half_to_float(uniform unsigned int16 h) {
     }
 }
 
+__declspec(safe)
 static inline float half_to_float(unsigned int16 h) {
     if (__have_native_half) {
         return __half_to_float_varying(h);
@@ -3287,6 +3529,7 @@ static inline float half_to_float(unsigned int16 h) {
 }
 
 
+__declspec(safe)
 static inline uniform int16 float_to_half(uniform float f) {
     if (__have_native_half) {
         return __float_to_half_uniform(f);
@@ -3358,6 +3601,7 @@ static inline uniform int16 float_to_half(uniform float f) {
 }
 
 
+__declspec(safe)
 static inline int16 float_to_half(float f) {
     if (__have_native_half) {
         return __float_to_half_varying(f);
@@ -3429,6 +3673,7 @@ static inline int16 float_to_half(float f) {
 }
 
 
+__declspec(safe)
 static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
     if (__have_native_half) {
         return __half_to_float_uniform(h);
@@ -3450,6 +3695,7 @@ static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
     }
 }
 
+__declspec(safe)
 static inline float half_to_float_fast(unsigned int16 h) {
     if (__have_native_half) {
         return __half_to_float_varying(h);
@@ -3471,6 +3717,7 @@ static inline float half_to_float_fast(unsigned int16 h) {
     }
 }
 
+__declspec(safe)
 static inline uniform int16 float_to_half_fast(uniform float f) {
     if (__have_native_half) {
         return __float_to_half_uniform(f);
@@ -3496,6 +3743,7 @@ static inline uniform int16 float_to_half_fast(uniform float f) {
     }
 }
 
+__declspec(safe)
 static inline int16 float_to_half_fast(float f) {
     if (__have_native_half) {
         return __float_to_half_varying(f);