Scalar target builtins bugfixes.
Typo in __max_varying_double. Add declarations for half functions. Use the gen_scatter macro to get the scatter functions.
This commit is contained in:
@@ -41,89 +41,10 @@ gen_gather(1, i16)
|
|||||||
gen_gather(1, i32)
|
gen_gather(1, i32)
|
||||||
gen_gather(1, i64)
|
gen_gather(1, i64)
|
||||||
|
|
||||||
define void @__scatter_elt_i8(i8 * %base, <1 x i32> %offsets, <1 x i8> %values,
|
gen_scatter(1, i8)
|
||||||
i32 %lane) nounwind alwaysinline {
|
gen_scatter(1, i16)
|
||||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
gen_scatter(1, i32)
|
||||||
; %offset64 = zext i32 %offset32 to i64
|
gen_scatter(1, i64)
|
||||||
; %ptrdelta = add i64 %ptr64, %offset64
|
|
||||||
; %ptr = inttoptr i64 %ptrdelta to i8 *
|
|
||||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
|
||||||
%ptr = bitcast i8 * %ptroffset to i8 *
|
|
||||||
%storeval = extractelement <1 x i8> %values, i32 %lane
|
|
||||||
store i8 %storeval, i8 * %ptr
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_base_offsets_i8(i8* %base, <1 x i32> %offsets, <1 x i8> %values,
|
|
||||||
<1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
|
||||||
;%ptr64 = ptrtoint i8 * %base to i64
|
|
||||||
call void @__scatter_elt_i8(i8 *%base, <1 x i32> %offsets, <1 x i8> %values, i32 0)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_elt_i16(i8 * %base, <1 x i32> %offsets, <1 x i16> %values,
|
|
||||||
i32 %lane) nounwind alwaysinline {
|
|
||||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
|
||||||
; %offset64 = zext i32 %offset32 to i64
|
|
||||||
; %ptrdelta = add i64 %ptr64, %offset64
|
|
||||||
; %ptr = inttoptr i64 %ptrdelta to i16 *
|
|
||||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
|
||||||
%ptr = bitcast i8 * %ptroffset to i16 *
|
|
||||||
%storeval = extractelement <1 x i16> %values, i32 %lane
|
|
||||||
store i16 %storeval, i16 * %ptr
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_base_offsets_i16(i8* %base, <1 x i32> %offsets, <1 x i16> %values,
|
|
||||||
<1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
|
||||||
;%ptr64 = ptrtoint i8 * %base to i64
|
|
||||||
call void @__scatter_elt_i16(i8 *%base, <1 x i32> %offsets, <1 x i16> %values, i32 0)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_elt_i32(i8 * %base, <1 x i32> %offsets, <1 x i32> %values,
|
|
||||||
i32 %lane) nounwind alwaysinline {
|
|
||||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
|
||||||
; %offset64 = zext i32 %offset32 to i64
|
|
||||||
; %ptrdelta = add i64 %ptr64, %offset64
|
|
||||||
; %ptr = inttoptr i64 %ptrdelta to i32 *
|
|
||||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
|
||||||
%ptr = bitcast i8 * %ptroffset to i32 *
|
|
||||||
%storeval = extractelement <1 x i32> %values, i32 %lane
|
|
||||||
store i32 %storeval, i32 * %ptr
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_base_offsets_i32(i8* %base, <1 x i32> %offsets, <1 x i32> %values,
|
|
||||||
<1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
|
||||||
;%ptr64 = ptrtoint i8 * %base to i64
|
|
||||||
call void @__scatter_elt_i32(i8 *%base, <1 x i32> %offsets, <1 x i32> %values, i32 0)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_elt_i64(i8 * %base, <1 x i32> %offsets, <1 x i64> %values,
|
|
||||||
i32 %lane) nounwind alwaysinline {
|
|
||||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
|
||||||
; %offset64 = zext i32 %offset32 to i64
|
|
||||||
; %ptrdelta = add i64 %ptr64, %offset64
|
|
||||||
; %ptr = inttoptr i64 %ptrdelta to i64 *
|
|
||||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
|
||||||
%ptr = bitcast i8 * %ptroffset to i64 *
|
|
||||||
%storeval = extractelement <1 x i64> %values, i32 %lane
|
|
||||||
store i64 %storeval, i64 * %ptr
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__scatter_base_offsets_i64(i8* %base, <1 x i32> %offsets, <1 x i64> %values,
|
|
||||||
<1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
|
||||||
;%ptr64 = ptrtoint i8 * %base to i64
|
|
||||||
call void @__scatter_elt_i64(i8 *%base, <1 x i32> %offsets, <1 x i64> %values, i32 0)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||||
@@ -854,7 +775,7 @@ define <1 x double> @__max_varying_double(<1 x double>, <1 x double>) nounwind
|
|||||||
;ret <1 x double> %ret
|
;ret <1 x double> %ret
|
||||||
%a = extractelement <1 x double> %0, i32 0
|
%a = extractelement <1 x double> %0, i32 0
|
||||||
%b = extractelement <1 x double> %1, i32 0
|
%b = extractelement <1 x double> %1, i32 0
|
||||||
%d = fcmp olt double %a, %b
|
%d = fcmp ogt double %a, %b
|
||||||
%r = select i1 %d, double %a, double %b
|
%r = select i1 %d, double %a, double %b
|
||||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||||
ret <1 x double> %rv
|
ret <1 x double> %rv
|
||||||
@@ -1004,3 +925,11 @@ ctlztz()
|
|||||||
|
|
||||||
define_prefetches()
|
define_prefetches()
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user