Scalar target builtins bugfixes.
Typo in __max_varying_double. Add declarations for half functions. Use the gen_scatter macro to get the scatter functions.
This commit is contained in:
@@ -41,89 +41,10 @@ gen_gather(1, i16)
|
||||
gen_gather(1, i32)
|
||||
gen_gather(1, i64)
|
||||
|
||||
define void @__scatter_elt_i8(i8 * %base, <1 x i32> %offsets, <1 x i8> %values,
|
||||
i32 %lane) nounwind alwaysinline {
|
||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
||||
; %offset64 = zext i32 %offset32 to i64
|
||||
; %ptrdelta = add i64 %ptr64, %offset64
|
||||
; %ptr = inttoptr i64 %ptrdelta to i8 *
|
||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
||||
%ptr = bitcast i8 * %ptroffset to i8 *
|
||||
%storeval = extractelement <1 x i8> %values, i32 %lane
|
||||
store i8 %storeval, i8 * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets_i8(i8* %base, <1 x i32> %offsets, <1 x i8> %values,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
;%ptr64 = ptrtoint i8 * %base to i64
|
||||
call void @__scatter_elt_i8(i8 *%base, <1 x i32> %offsets, <1 x i8> %values, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_elt_i16(i8 * %base, <1 x i32> %offsets, <1 x i16> %values,
|
||||
i32 %lane) nounwind alwaysinline {
|
||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
||||
; %offset64 = zext i32 %offset32 to i64
|
||||
; %ptrdelta = add i64 %ptr64, %offset64
|
||||
; %ptr = inttoptr i64 %ptrdelta to i16 *
|
||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
||||
%ptr = bitcast i8 * %ptroffset to i16 *
|
||||
%storeval = extractelement <1 x i16> %values, i32 %lane
|
||||
store i16 %storeval, i16 * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets_i16(i8* %base, <1 x i32> %offsets, <1 x i16> %values,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
;%ptr64 = ptrtoint i8 * %base to i64
|
||||
call void @__scatter_elt_i16(i8 *%base, <1 x i32> %offsets, <1 x i16> %values, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_elt_i32(i8 * %base, <1 x i32> %offsets, <1 x i32> %values,
|
||||
i32 %lane) nounwind alwaysinline {
|
||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
||||
; %offset64 = zext i32 %offset32 to i64
|
||||
; %ptrdelta = add i64 %ptr64, %offset64
|
||||
; %ptr = inttoptr i64 %ptrdelta to i32 *
|
||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
||||
%ptr = bitcast i8 * %ptroffset to i32 *
|
||||
%storeval = extractelement <1 x i32> %values, i32 %lane
|
||||
store i32 %storeval, i32 * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets_i32(i8* %base, <1 x i32> %offsets, <1 x i32> %values,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
;%ptr64 = ptrtoint i8 * %base to i64
|
||||
call void @__scatter_elt_i32(i8 *%base, <1 x i32> %offsets, <1 x i32> %values, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_elt_i64(i8 * %base, <1 x i32> %offsets, <1 x i64> %values,
|
||||
i32 %lane) nounwind alwaysinline {
|
||||
%offset32 = extractelement <1 x i32> %offsets, i32 %lane
|
||||
; %offset64 = zext i32 %offset32 to i64
|
||||
; %ptrdelta = add i64 %ptr64, %offset64
|
||||
; %ptr = inttoptr i64 %ptrdelta to i64 *
|
||||
%ptroffset = getelementptr i8 *%base, i32 %offset32
|
||||
%ptr = bitcast i8 * %ptroffset to i64 *
|
||||
%storeval = extractelement <1 x i64> %values, i32 %lane
|
||||
store i64 %storeval, i64 * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets_i64(i8* %base, <1 x i32> %offsets, <1 x i64> %values,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
;%ptr64 = ptrtoint i8 * %base to i64
|
||||
call void @__scatter_elt_i64(i8 *%base, <1 x i32> %offsets, <1 x i64> %values, i32 0)
|
||||
ret void
|
||||
}
|
||||
gen_scatter(1, i8)
|
||||
gen_scatter(1, i16)
|
||||
gen_scatter(1, i32)
|
||||
gen_scatter(1, i64)
|
||||
|
||||
|
||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||
@@ -854,7 +775,7 @@ define <1 x double> @__max_varying_double(<1 x double>, <1 x double>) nounwind
|
||||
;ret <1 x double> %ret
|
||||
%a = extractelement <1 x double> %0, i32 0
|
||||
%b = extractelement <1 x double> %1, i32 0
|
||||
%d = fcmp olt double %a, %b
|
||||
%d = fcmp ogt double %a, %b
|
||||
%r = select i1 %d, double %a, double %b
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <1 x double> %rv
|
||||
@@ -1004,3 +925,11 @@ ctlztz()
|
||||
|
||||
define_prefetches()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
|
||||
Reference in New Issue
Block a user