Rename gather/scatter_base_offsets functions to *factored_based_offsets*.
No functional change; just preparation for having a path that doesn't factor the offsets into constant and varying parts, which will be better for AVX2 and KNC.
This commit is contained in:
@@ -334,18 +334,18 @@ define void @__masked_store_blend_double(<WIDTH x double>* nocapture,
|
||||
;; gather/scatter
|
||||
|
||||
define(`gather_scatter', `
|
||||
declare <WIDTH x $1> @__gather_base_offsets32_$1(i8 * nocapture, <WIDTH x i32>,
|
||||
declare <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * nocapture, <WIDTH x i32>,
|
||||
i32, <WIDTH x i32>, <WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather_base_offsets64_$1(i8 * nocapture, <WIDTH x i64>,
|
||||
declare <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * nocapture, <WIDTH x i64>,
|
||||
i32, <WIDTH x i64>, <WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64>,
|
||||
<WIDTH x i1>) nounwind readonly
|
||||
|
||||
declare void @__scatter_base_offsets32_$1(i8* nocapture, <WIDTH x i32>,
|
||||
declare void @__scatter_factored_base_offsets32_$1(i8* nocapture, <WIDTH x i32>,
|
||||
i32, <WIDTH x i32>, <WIDTH x $1>, <WIDTH x i1>) nounwind
|
||||
declare void @__scatter_base_offsets64_$1(i8* nocapture, <WIDTH x i64>,
|
||||
declare void @__scatter_factored_base_offsets64_$1(i8* nocapture, <WIDTH x i64>,
|
||||
i32, <WIDTH x i64>, <WIDTH x $1>, <WIDTH x i1>) nounwind
|
||||
declare void @__scatter32_$1(<WIDTH x i32>, <WIDTH x $1>,
|
||||
<WIDTH x i1>) nounwind
|
||||
|
||||
156
builtins/util.m4
156
builtins/util.m4
@@ -1599,7 +1599,7 @@ declare void @__pseudo_masked_store_double(<WIDTH x double> * nocapture, <WIDTH
|
||||
; offset_delta feeds into the free offset calculation.
|
||||
;
|
||||
; varying int{8,16,32,float,64,double}
|
||||
; __pseudo_gather_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base,
|
||||
; __pseudo_gather_factored_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base,
|
||||
; int{32,64} offsets, uniform int32 offset_scale,
|
||||
; int{32,64} offset_delta, mask)
|
||||
;
|
||||
@@ -1621,30 +1621,30 @@ declare <WIDTH x float> @__pseudo_gather64_float(<WIDTH x i64>, <WIDTH x MASK>)
|
||||
declare <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x double> @__pseudo_gather64_double(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
||||
|
||||
declare <WIDTH x i8> @__pseudo_gather_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare <WIDTH x i8> @__pseudo_gather_factored_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x i16> @__pseudo_gather_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare <WIDTH x i16> @__pseudo_gather_factored_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x i32> @__pseudo_gather_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare <WIDTH x i32> @__pseudo_gather_factored_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x float> @__pseudo_gather_base_offsets32_float(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare <WIDTH x float> @__pseudo_gather_factored_base_offsets32_float(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x i64> @__pseudo_gather_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare <WIDTH x i64> @__pseudo_gather_factored_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x double> @__pseudo_gather_base_offsets32_double(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare <WIDTH x double> @__pseudo_gather_factored_base_offsets32_double(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
|
||||
declare <WIDTH x i8> @__pseudo_gather_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare <WIDTH x i8> @__pseudo_gather_factored_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x i16> @__pseudo_gather_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare <WIDTH x i16> @__pseudo_gather_factored_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x i32> @__pseudo_gather_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare <WIDTH x i32> @__pseudo_gather_factored_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x float> @__pseudo_gather_base_offsets64_float(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare <WIDTH x float> @__pseudo_gather_factored_base_offsets64_float(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x i64> @__pseudo_gather_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare <WIDTH x i64> @__pseudo_gather_factored_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
declare <WIDTH x double> @__pseudo_gather_base_offsets64_double(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare <WIDTH x double> @__pseudo_gather_factored_base_offsets64_double(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x MASK>) nounwind readonly
|
||||
|
||||
; Similarly to the pseudo-gathers defined above, we also declare undefined
|
||||
@@ -1660,7 +1660,7 @@ declare <WIDTH x double> @__pseudo_gather_base_offsets64_double(i8 *, <WIDTH x i
|
||||
; The GatherScatterFlattenOpt optimization pass also finds these and
|
||||
; transforms them to scatters like:
|
||||
;
|
||||
; void __pseudo_scatter_base_offsets{32,64}_i8(uniform int8 *base,
|
||||
; void __pseudo_scatter_factored_base_offsets{32,64}_i8(uniform int8 *base,
|
||||
; varying int32 offsets, uniform int32 offset_scale,
|
||||
; varying int{32,64} offset_delta, varying int8 values, mask)
|
||||
; (and similarly for 16/32/64 bit values)
|
||||
@@ -1682,30 +1682,30 @@ declare void @__pseudo_scatter64_float(<WIDTH x i64>, <WIDTH x float>, <WIDTH x
|
||||
declare void @__pseudo_scatter64_i64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter64_double(<WIDTH x i64>, <WIDTH x double>, <WIDTH x MASK>) nounwind
|
||||
|
||||
declare void @__pseudo_scatter_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x i8>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x i16>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x i32>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets32_float(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets32_float(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x float>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets32_double(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets32_double(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||
<WIDTH x double>, <WIDTH x MASK>) nounwind
|
||||
|
||||
declare void @__pseudo_scatter_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x i8>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x i16>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x i32>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets64_float(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets64_float(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x float>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||
declare void @__pseudo_scatter_base_offsets64_double(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_factored_base_offsets64_double(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x double>, <WIDTH x MASK>) nounwind
|
||||
|
||||
declare float @__log_uniform_float(float) nounwind readnone
|
||||
@@ -1872,103 +1872,103 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
||||
call void @__usedouble(<WIDTH x double> %g64_d)
|
||||
|
||||
%pgbo32_8 = call <WIDTH x i8>
|
||||
@__pseudo_gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %pgbo32_8)
|
||||
%pgbo32_16 = call <WIDTH x i16>
|
||||
@__pseudo_gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %pgbo32_16)
|
||||
%pgbo32_32 = call <WIDTH x i32>
|
||||
@__pseudo_gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %pgbo32_32)
|
||||
%pgbo32_f = call <WIDTH x float>
|
||||
@__pseudo_gather_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__usefloat(<WIDTH x float> %pgbo32_f)
|
||||
%pgbo32_64 = call <WIDTH x i64>
|
||||
@__pseudo_gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %pgbo32_64)
|
||||
%pgbo32_d = call <WIDTH x double>
|
||||
@__pseudo_gather_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__usedouble(<WIDTH x double> %pgbo32_d)
|
||||
|
||||
%gbo32_8 = call <WIDTH x i8>
|
||||
@__gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %gbo32_8)
|
||||
%gbo32_16 = call <WIDTH x i16>
|
||||
@__gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__gather_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %gbo32_16)
|
||||
%gbo32_32 = call <WIDTH x i32>
|
||||
@__gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__gather_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %gbo32_32)
|
||||
%gbo32_f = call <WIDTH x float>
|
||||
@__gather_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__gather_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__usefloat(<WIDTH x float> %gbo32_f)
|
||||
%gbo32_64 = call <WIDTH x i64>
|
||||
@__gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__gather_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %gbo32_64)
|
||||
%gbo32_d = call <WIDTH x double>
|
||||
@__gather_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
@__gather_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__usedouble(<WIDTH x double> %gbo32_d)
|
||||
|
||||
|
||||
%pgbo64_8 = call <WIDTH x i8>
|
||||
@__pseudo_gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %pgbo64_8)
|
||||
%pgbo64_16 = call <WIDTH x i16>
|
||||
@__pseudo_gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %pgbo64_16)
|
||||
%pgbo64_32 = call <WIDTH x i32>
|
||||
@__pseudo_gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %pgbo64_32)
|
||||
%pgbo64_f = call <WIDTH x float>
|
||||
@__pseudo_gather_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__usefloat(<WIDTH x float> %pgbo64_f)
|
||||
%pgbo64_64 = call <WIDTH x i64>
|
||||
@__pseudo_gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %pgbo64_64)
|
||||
%pgbo64_d = call <WIDTH x double>
|
||||
@__pseudo_gather_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__pseudo_gather_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__usedouble(<WIDTH x double> %pgbo64_d)
|
||||
|
||||
%gbo64_8 = call <WIDTH x i8>
|
||||
@__gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %gbo64_8)
|
||||
%gbo64_16 = call <WIDTH x i16>
|
||||
@__gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__gather_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %gbo64_16)
|
||||
%gbo64_32 = call <WIDTH x i32>
|
||||
@__gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__gather_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %gbo64_32)
|
||||
%gbo64_f = call <WIDTH x float>
|
||||
@__gather_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__gather_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__usefloat(<WIDTH x float> %gbo64_f)
|
||||
%gbo64_64 = call <WIDTH x i64>
|
||||
@__gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__gather_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %gbo64_64)
|
||||
%gbo64_d = call <WIDTH x double>
|
||||
@__gather_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
@__gather_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__usedouble(<WIDTH x double> %gbo64_d)
|
||||
|
||||
@@ -2003,56 +2003,56 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
||||
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__scatter64_double(<WIDTH x i64> %v64, <WIDTH x double> %vd, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__pseudo_scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__pseudo_scatter_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__pseudo_scatter_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__pseudo_scatter_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__pseudo_scatter_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__pseudo_scatter_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__pseudo_scatter_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__pseudo_scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__pseudo_scatter_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__pseudo_scatter_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__pseudo_scatter_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__pseudo_scatter_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__pseudo_scatter_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__pseudo_scatter_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__scatter_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__scatter_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__scatter_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__scatter_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__scatter_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
call void @__scatter_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__scatter_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__scatter_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__scatter_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__scatter_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__scatter_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
call void @__scatter_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
|
||||
|
||||
ret void
|
||||
@@ -3245,7 +3245,7 @@ define <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %o
|
||||
}
|
||||
|
||||
|
||||
define <WIDTH x $1> @__gather_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i32> %offset_delta,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||
@@ -3276,7 +3276,7 @@ define <WIDTH x $1> @__gather_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offset
|
||||
ret <WIDTH x $1> %ret`'eval(WIDTH-1)
|
||||
}
|
||||
|
||||
define <WIDTH x $1> @__gather_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i64> %offset_delta,
|
||||
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||
@@ -3391,7 +3391,7 @@ define void @__scatter_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_s
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
|
||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
@@ -3401,7 +3401,7 @@ define void @__scatter_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
|
||||
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
|
||||
@@ -1306,7 +1306,7 @@ static FORCEINLINE void __masked_store_blend_double(void *p, __vec16_d val,
|
||||
|
||||
// offsets * offsetScale is in bytes (for all of these)
|
||||
|
||||
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
__vec16_i1 mask) { \
|
||||
@@ -1322,18 +1322,18 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
|
||||
|
||||
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_base_offsets32_i8)
|
||||
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_base_offsets64_i8)
|
||||
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_base_offsets32_i16)
|
||||
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_base_offsets64_i16)
|
||||
GATHER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __gather_base_offsets32_i32)
|
||||
GATHER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_base_offsets64_i32)
|
||||
GATHER_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __gather_base_offsets32_float)
|
||||
GATHER_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __gather_base_offsets64_float)
|
||||
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_base_offsets32_i64)
|
||||
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_base_offsets64_i64)
|
||||
GATHER_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __gather_base_offsets32_double)
|
||||
GATHER_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __gather_base_offsets64_double)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_factored_base_offsets32_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_factored_base_offsets64_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_factored_base_offsets32_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_factored_base_offsets64_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __gather_factored_base_offsets32_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_factored_base_offsets64_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __gather_factored_base_offsets32_float)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __gather_factored_base_offsets64_float)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_factored_base_offsets32_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_factored_base_offsets64_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __gather_factored_base_offsets32_double)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __gather_factored_base_offsets64_double)
|
||||
|
||||
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
|
||||
static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec16_i1 mask) { \
|
||||
@@ -1361,7 +1361,7 @@ GATHER_GENERAL(__vec16_d, double, __vec16_i64, __gather64_double)
|
||||
|
||||
// scatter
|
||||
|
||||
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
VTYPE val, __vec16_i1 mask) { \
|
||||
@@ -1375,18 +1375,18 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
|
||||
|
||||
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_base_offsets32_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_base_offsets64_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_base_offsets32_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_base_offsets64_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __scatter_base_offsets32_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_base_offsets64_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __scatter_base_offsets32_float)
|
||||
SCATTER_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __scatter_base_offsets64_float)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_base_offsets32_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_base_offsets64_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __scatter_base_offsets32_double)
|
||||
SCATTER_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __scatter_base_offsets64_double)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_factored_base_offsets32_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_factored_base_offsets64_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_factored_base_offsets32_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_factored_base_offsets64_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __scatter_factored_base_offsets32_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_factored_base_offsets64_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __scatter_factored_base_offsets32_float)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __scatter_factored_base_offsets64_float)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_factored_base_offsets32_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_factored_base_offsets64_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __scatter_factored_base_offsets32_double)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __scatter_factored_base_offsets64_double)
|
||||
|
||||
#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec16_i1 mask) { \
|
||||
|
||||
@@ -1374,7 +1374,7 @@ static FORCEINLINE void __masked_store_blend_double(void *p, __vec32_d val,
|
||||
|
||||
// offsets * offsetScale is in bytes (for all of these)
|
||||
|
||||
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
__vec32_i1 mask) { \
|
||||
@@ -1390,18 +1390,18 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
|
||||
|
||||
GATHER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __gather_base_offsets32_i8)
|
||||
GATHER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __gather_base_offsets64_i8)
|
||||
GATHER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __gather_base_offsets32_i16)
|
||||
GATHER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __gather_base_offsets64_i16)
|
||||
GATHER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __gather_base_offsets32_i32)
|
||||
GATHER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __gather_base_offsets64_i32)
|
||||
GATHER_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __gather_base_offsets32_float)
|
||||
GATHER_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __gather_base_offsets64_float)
|
||||
GATHER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __gather_base_offsets32_i64)
|
||||
GATHER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __gather_base_offsets64_i64)
|
||||
GATHER_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __gather_base_offsets32_double)
|
||||
GATHER_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __gather_base_offsets64_double)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __gather_factored_base_offsets32_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __gather_factored_base_offsets64_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __gather_factored_base_offsets32_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __gather_factored_base_offsets64_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __gather_factored_base_offsets32_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __gather_factored_base_offsets64_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __gather_factored_base_offsets32_float)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __gather_factored_base_offsets64_float)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __gather_factored_base_offsets32_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __gather_factored_base_offsets64_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __gather_factored_base_offsets32_double)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __gather_factored_base_offsets64_double)
|
||||
|
||||
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
|
||||
static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec32_i1 mask) { \
|
||||
@@ -1429,7 +1429,7 @@ GATHER_GENERAL(__vec32_d, double, __vec32_i64, __gather64_double)
|
||||
|
||||
// scatter
|
||||
|
||||
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
VTYPE val, __vec32_i1 mask) { \
|
||||
@@ -1443,18 +1443,18 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
|
||||
|
||||
SCATTER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __scatter_base_offsets32_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __scatter_base_offsets64_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __scatter_base_offsets32_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __scatter_base_offsets64_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __scatter_base_offsets32_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __scatter_base_offsets64_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __scatter_base_offsets32_float)
|
||||
SCATTER_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __scatter_base_offsets64_float)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __scatter_base_offsets32_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __scatter_base_offsets64_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __scatter_base_offsets32_double)
|
||||
SCATTER_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __scatter_base_offsets64_double)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __scatter_factored_base_offsets32_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __scatter_factored_base_offsets64_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __scatter_factored_base_offsets32_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __scatter_factored_base_offsets64_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __scatter_factored_base_offsets32_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __scatter_factored_base_offsets64_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __scatter_factored_base_offsets32_float)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __scatter_factored_base_offsets64_float)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __scatter_factored_base_offsets32_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __scatter_factored_base_offsets64_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __scatter_factored_base_offsets32_double)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __scatter_factored_base_offsets64_double)
|
||||
|
||||
#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec32_i1 mask) { \
|
||||
|
||||
@@ -1507,7 +1507,7 @@ static FORCEINLINE void __masked_store_blend_double(void *p, __vec64_d val,
|
||||
|
||||
// offsets * offsetScale is in bytes (for all of these)
|
||||
|
||||
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
__vec64_i1 mask) { \
|
||||
@@ -1523,18 +1523,18 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
|
||||
|
||||
GATHER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __gather_base_offsets32_i8)
|
||||
GATHER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __gather_base_offsets64_i8)
|
||||
GATHER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __gather_base_offsets32_i16)
|
||||
GATHER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __gather_base_offsets64_i16)
|
||||
GATHER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __gather_base_offsets32_i32)
|
||||
GATHER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __gather_base_offsets64_i32)
|
||||
GATHER_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __gather_base_offsets32_float)
|
||||
GATHER_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __gather_base_offsets64_float)
|
||||
GATHER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __gather_base_offsets32_i64)
|
||||
GATHER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __gather_base_offsets64_i64)
|
||||
GATHER_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __gather_base_offsets32_double)
|
||||
GATHER_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __gather_base_offsets64_double)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __gather_factored_base_offsets32_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __gather_factored_base_offsets64_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __gather_factored_base_offsets32_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __gather_factored_base_offsets64_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __gather_factored_base_offsets32_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __gather_factored_base_offsets64_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __gather_factored_base_offsets32_float)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __gather_factored_base_offsets64_float)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __gather_factored_base_offsets32_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __gather_factored_base_offsets64_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __gather_factored_base_offsets32_double)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __gather_factored_base_offsets64_double)
|
||||
|
||||
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
|
||||
static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec64_i1 mask) { \
|
||||
@@ -1562,7 +1562,7 @@ GATHER_GENERAL(__vec64_d, double, __vec64_i64, __gather64_double)
|
||||
|
||||
// scatter
|
||||
|
||||
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
VTYPE val, __vec64_i1 mask) { \
|
||||
@@ -1576,18 +1576,18 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
|
||||
|
||||
SCATTER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __scatter_base_offsets32_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __scatter_base_offsets64_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __scatter_base_offsets32_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __scatter_base_offsets64_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __scatter_base_offsets32_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __scatter_base_offsets64_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __scatter_base_offsets32_float)
|
||||
SCATTER_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __scatter_base_offsets64_float)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __scatter_base_offsets32_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __scatter_base_offsets64_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __scatter_base_offsets32_double)
|
||||
SCATTER_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __scatter_base_offsets64_double)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __scatter_factored_base_offsets32_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __scatter_factored_base_offsets64_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __scatter_factored_base_offsets32_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __scatter_factored_base_offsets64_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __scatter_factored_base_offsets32_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __scatter_factored_base_offsets64_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __scatter_factored_base_offsets32_float)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __scatter_factored_base_offsets64_float)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __scatter_factored_base_offsets32_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __scatter_factored_base_offsets64_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __scatter_factored_base_offsets32_double)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __scatter_factored_base_offsets64_double)
|
||||
|
||||
#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec64_i1 mask) { \
|
||||
|
||||
@@ -1940,7 +1940,7 @@ static FORCEINLINE void __masked_store_blend_float(void *p, __vec16_f val,
|
||||
|
||||
// offsets * offsetScale is in bytes (for all of these)
|
||||
|
||||
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC)
|
||||
#define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC)
|
||||
/*
|
||||
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
@@ -1958,7 +1958,7 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
*/
|
||||
|
||||
static FORCEINLINE __vec16_i32
|
||||
__gather_base_offsets32_i32(uint8_t *base, __vec16_i32 varyingOffset,
|
||||
__gather_factored_base_offsets32_i32(uint8_t *base, __vec16_i32 varyingOffset,
|
||||
uint32_t scale, __vec16_i32 constOffset,
|
||||
__vec16_i1 mask) {
|
||||
__vec16_i32 vscale = _mm512_extload_epi32(&scale, _MM_UPCONV_EPI32_NONE, _MM_BROADCAST_1X16, _MM_HINT_NONE);
|
||||
@@ -1973,7 +1973,7 @@ __gather_base_offsets32_i32(uint8_t *base, __vec16_i32 varyingOffset,
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f
|
||||
__gather_base_offsets32_float(uint8_t *base, __vec16_i32 varyingOffset,
|
||||
__gather_factored_base_offsets32_float(uint8_t *base, __vec16_i32 varyingOffset,
|
||||
uint32_t scale, __vec16_i32 constOffset,
|
||||
__vec16_i1 mask) {
|
||||
__vec16_i32 vscale = _mm512_extload_epi32(&scale, _MM_UPCONV_EPI32_NONE, _MM_BROADCAST_1X16, _MM_HINT_NONE);
|
||||
@@ -1987,13 +1987,13 @@ __gather_base_offsets32_float(uint8_t *base, __vec16_i32 varyingOffset,
|
||||
return ret;
|
||||
}
|
||||
|
||||
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_base_offsets32_i8)
|
||||
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_base_offsets64_i8)
|
||||
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_base_offsets32_i16)
|
||||
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_base_offsets64_i16)
|
||||
GATHER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_base_offsets64_i32)
|
||||
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_base_offsets32_i64)
|
||||
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_base_offsets64_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_factored_base_offsets32_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_factored_base_offsets64_i8)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_factored_base_offsets32_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_factored_base_offsets64_i16)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_factored_base_offsets64_i32)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_factored_base_offsets32_i64)
|
||||
GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_factored_base_offsets64_i64)
|
||||
|
||||
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC)
|
||||
/*
|
||||
@@ -2039,7 +2039,7 @@ static FORCEINLINE __vec16_i32 __gather64_i32(__vec16_i64 ptrs, __vec16_i1 mask)
|
||||
*/
|
||||
// scatter
|
||||
|
||||
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC)
|
||||
#define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC)
|
||||
/*
|
||||
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
@@ -2054,16 +2054,16 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
}
|
||||
*/
|
||||
|
||||
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_base_offsets32_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_base_offsets64_i8)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_base_offsets32_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_base_offsets64_i16)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_base_offsets64_i32)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_base_offsets32_i64)
|
||||
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_base_offsets64_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_factored_base_offsets32_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_factored_base_offsets64_i8)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_factored_base_offsets32_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_factored_base_offsets64_i16)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_factored_base_offsets64_i32)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_factored_base_offsets32_i64)
|
||||
SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_factored_base_offsets64_i64)
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets32_i32(uint8_t *b, __vec16_i32 varyingOffset,
|
||||
__scatter_factored_base_offsets32_i32(uint8_t *b, __vec16_i32 varyingOffset,
|
||||
uint32_t scale, __vec16_i32 constOffset,
|
||||
__vec16_i32 val, __vec16_i1 mask)
|
||||
{
|
||||
@@ -2072,7 +2072,7 @@ __scatter_base_offsets32_i32(uint8_t *b, __vec16_i32 varyingOffset,
|
||||
}
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets32_float(void *base, const __vec16_i32 &varyingOffset,
|
||||
__scatter_factored_base_offsets32_float(void *base, const __vec16_i32 &varyingOffset,
|
||||
uint32_t scale, const __vec16_i32 &constOffset,
|
||||
const __vec16_f &val, const __vec16_i1 mask)
|
||||
{
|
||||
|
||||
@@ -3007,84 +3007,84 @@ lGatherBaseOffsets64(RetVec, RetScalar, unsigned char *p, __vec4_i64 offsets,
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i8
|
||||
__gather_base_offsets32_i8(unsigned char *b, __vec4_i32 offsets,
|
||||
__gather_factored_base_offsets32_i8(unsigned char *b, __vec4_i32 offsets,
|
||||
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets32(__vec4_i8(), uint8_t(), b, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i8
|
||||
__gather_base_offsets64_i8(unsigned char *b, __vec4_i64 offsets,
|
||||
__gather_factored_base_offsets64_i8(unsigned char *b, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets64(__vec4_i8(), uint8_t(), b, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i16
|
||||
__gather_base_offsets32_i16(unsigned char *b, __vec4_i32 offsets,
|
||||
__gather_factored_base_offsets32_i16(unsigned char *b, __vec4_i32 offsets,
|
||||
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets32(__vec4_i16(), uint16_t(), b, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i16
|
||||
__gather_base_offsets64_i16(unsigned char *b, __vec4_i64 offsets,
|
||||
__gather_factored_base_offsets64_i16(unsigned char *b, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets64(__vec4_i16(), uint16_t(), b, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i32
|
||||
__gather_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
|
||||
__gather_factored_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
|
||||
__vec4_i32 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets32(__vec4_i32(), uint32_t(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i32
|
||||
__gather_base_offsets64_i32(unsigned char *p, __vec4_i64 offsets,
|
||||
__gather_factored_base_offsets64_i32(unsigned char *p, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets64(__vec4_i32(), uint32_t(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f
|
||||
__gather_base_offsets32_float(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
|
||||
__gather_factored_base_offsets32_float(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
|
||||
__vec4_i32 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets32(__vec4_f(), float(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f
|
||||
__gather_base_offsets64_float(unsigned char *p, __vec4_i64 offsets,
|
||||
__gather_factored_base_offsets64_float(unsigned char *p, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets64(__vec4_f(), float(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i64
|
||||
__gather_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
|
||||
__gather_factored_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
|
||||
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets32(__vec4_i64(), uint64_t(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i64
|
||||
__gather_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
|
||||
__gather_factored_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets64(__vec4_i64(), uint64_t(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_d
|
||||
__gather_base_offsets32_double(unsigned char *p, __vec4_i32 offsets,
|
||||
__gather_factored_base_offsets32_double(unsigned char *p, __vec4_i32 offsets,
|
||||
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets32(__vec4_d(), double(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_d
|
||||
__gather_base_offsets64_double(unsigned char *p, __vec4_i64 offsets,
|
||||
__gather_factored_base_offsets64_double(unsigned char *p, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
|
||||
return lGatherBaseOffsets64(__vec4_d(), double(), p, offsets, scale,
|
||||
constOffset, mask);
|
||||
@@ -3252,7 +3252,7 @@ static FORCEINLINE __vec4_d __gather64_double(__vec4_i64 ptrs, __vec4_i1 mask) {
|
||||
|
||||
#define SCATTER32_64(SUFFIX, VEC_SUFFIX, TYPE, EXTRACT) \
|
||||
static FORCEINLINE void \
|
||||
__scatter_base_offsets32_##SUFFIX (unsigned char *b, __vec4_i32 offsets, \
|
||||
__scatter_factored_base_offsets32_##SUFFIX (unsigned char *b, __vec4_i32 offsets, \
|
||||
uint32_t scale, __vec4_i32 constOffset, \
|
||||
__vec4_##VEC_SUFFIX val, __vec4_i1 mask) { \
|
||||
uint32_t m = _mm_extract_ps(mask.v, 0); \
|
||||
@@ -3281,7 +3281,7 @@ __scatter_base_offsets32_##SUFFIX (unsigned char *b, __vec4_i32 offsets, \
|
||||
} \
|
||||
} \
|
||||
static FORCEINLINE void \
|
||||
__scatter_base_offsets64_##SUFFIX(unsigned char *p, __vec4_i64 offsets, \
|
||||
__scatter_factored_base_offsets64_##SUFFIX(unsigned char *p, __vec4_i64 offsets, \
|
||||
uint32_t scale, __vec4_i64 constOffset, \
|
||||
__vec4_##VEC_SUFFIX val, __vec4_i1 mask) { \
|
||||
uint32_t m = _mm_extract_ps(mask.v, 0); \
|
||||
@@ -3322,7 +3322,7 @@ SCATTER32_64(float, f, float, _mm_extract_ps_as_float)
|
||||
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
|
||||
__scatter_factored_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
|
||||
uint32_t scale, __vec4_i32 constOffset, __vec4_i64 val,
|
||||
__vec4_i1 mask) {
|
||||
uint32_t m = _mm_extract_ps(mask.v, 0);
|
||||
@@ -3359,7 +3359,7 @@ __scatter_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
|
||||
}
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
|
||||
__scatter_factored_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset,
|
||||
__vec4_i64 val, __vec4_i1 mask) {
|
||||
uint32_t m = _mm_extract_ps(mask.v, 0);
|
||||
@@ -3396,17 +3396,17 @@ __scatter_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
|
||||
}
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets32_double(unsigned char *p, __vec4_i32 offsets,
|
||||
__scatter_factored_base_offsets32_double(unsigned char *p, __vec4_i32 offsets,
|
||||
uint32_t scale, __vec4_i32 constOffset, __vec4_d val,
|
||||
__vec4_i1 mask) {
|
||||
__scatter_base_offsets32_i64(p, offsets, scale, constOffset, val, mask);
|
||||
__scatter_factored_base_offsets32_i64(p, offsets, scale, constOffset, val, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets64_double(unsigned char *p, __vec4_i64 offsets,
|
||||
__scatter_factored_base_offsets64_double(unsigned char *p, __vec4_i64 offsets,
|
||||
uint32_t scale, __vec4_i64 constOffset, __vec4_d val,
|
||||
__vec4_i1 mask) {
|
||||
__scatter_base_offsets64_i64(p, offsets, scale, constOffset, val, mask);
|
||||
__scatter_factored_base_offsets64_i64(p, offsets, scale, constOffset, val, mask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
322
opt.cpp
322
opt.cpp
@@ -1689,57 +1689,57 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
|
||||
};
|
||||
|
||||
GSInfo gsFuncs[] = {
|
||||
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8",
|
||||
"__pseudo_gather_base_offsets32_i8", true),
|
||||
GSInfo("__pseudo_gather32_i16", "__pseudo_gather_base_offsets32_i16",
|
||||
"__pseudo_gather_base_offsets32_i16", true),
|
||||
GSInfo("__pseudo_gather32_i32", "__pseudo_gather_base_offsets32_i32",
|
||||
"__pseudo_gather_base_offsets32_i32", true),
|
||||
GSInfo("__pseudo_gather32_float", "__pseudo_gather_base_offsets32_float",
|
||||
"__pseudo_gather_base_offsets32_float", true),
|
||||
GSInfo("__pseudo_gather32_i64", "__pseudo_gather_base_offsets32_i64",
|
||||
"__pseudo_gather_base_offsets32_i64", true),
|
||||
GSInfo("__pseudo_gather32_double", "__pseudo_gather_base_offsets32_double",
|
||||
"__pseudo_gather_base_offsets32_double", true),
|
||||
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_factored_base_offsets32_i8",
|
||||
"__pseudo_gather_factored_base_offsets32_i8", true),
|
||||
GSInfo("__pseudo_gather32_i16", "__pseudo_gather_factored_base_offsets32_i16",
|
||||
"__pseudo_gather_factored_base_offsets32_i16", true),
|
||||
GSInfo("__pseudo_gather32_i32", "__pseudo_gather_factored_base_offsets32_i32",
|
||||
"__pseudo_gather_factored_base_offsets32_i32", true),
|
||||
GSInfo("__pseudo_gather32_float", "__pseudo_gather_factored_base_offsets32_float",
|
||||
"__pseudo_gather_factored_base_offsets32_float", true),
|
||||
GSInfo("__pseudo_gather32_i64", "__pseudo_gather_factored_base_offsets32_i64",
|
||||
"__pseudo_gather_factored_base_offsets32_i64", true),
|
||||
GSInfo("__pseudo_gather32_double", "__pseudo_gather_factored_base_offsets32_double",
|
||||
"__pseudo_gather_factored_base_offsets32_double", true),
|
||||
|
||||
GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_base_offsets32_i8",
|
||||
"__pseudo_scatter_base_offsets32_i8", false),
|
||||
GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_base_offsets32_i16",
|
||||
"__pseudo_scatter_base_offsets32_i16", false),
|
||||
GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_base_offsets32_i32",
|
||||
"__pseudo_scatter_base_offsets32_i32", false),
|
||||
GSInfo("__pseudo_scatter32_float", "__pseudo_scatter_base_offsets32_float",
|
||||
"__pseudo_scatter_base_offsets32_float", false),
|
||||
GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_base_offsets32_i64",
|
||||
"__pseudo_scatter_base_offsets32_i64", false),
|
||||
GSInfo("__pseudo_scatter32_double", "__pseudo_scatter_base_offsets32_double",
|
||||
"__pseudo_scatter_base_offsets32_double", false),
|
||||
GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
|
||||
"__pseudo_scatter_factored_base_offsets32_i8", false),
|
||||
GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
|
||||
"__pseudo_scatter_factored_base_offsets32_i16", false),
|
||||
GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
|
||||
"__pseudo_scatter_factored_base_offsets32_i32", false),
|
||||
GSInfo("__pseudo_scatter32_float", "__pseudo_scatter_factored_base_offsets32_float",
|
||||
"__pseudo_scatter_factored_base_offsets32_float", false),
|
||||
GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
|
||||
"__pseudo_scatter_factored_base_offsets32_i64", false),
|
||||
GSInfo("__pseudo_scatter32_double", "__pseudo_scatter_factored_base_offsets32_double",
|
||||
"__pseudo_scatter_factored_base_offsets32_double", false),
|
||||
|
||||
GSInfo("__pseudo_gather64_i8", "__pseudo_gather_base_offsets64_i8",
|
||||
"__pseudo_gather_base_offsets32_i8", true),
|
||||
GSInfo("__pseudo_gather64_i16", "__pseudo_gather_base_offsets64_i16",
|
||||
"__pseudo_gather_base_offsets32_i16", true),
|
||||
GSInfo("__pseudo_gather64_i32", "__pseudo_gather_base_offsets64_i32",
|
||||
"__pseudo_gather_base_offsets32_i32", true),
|
||||
GSInfo("__pseudo_gather64_float", "__pseudo_gather_base_offsets64_float",
|
||||
"__pseudo_gather_base_offsets32_float", true),
|
||||
GSInfo("__pseudo_gather64_i64", "__pseudo_gather_base_offsets64_i64",
|
||||
"__pseudo_gather_base_offsets32_i64", true),
|
||||
GSInfo("__pseudo_gather64_double", "__pseudo_gather_base_offsets64_double",
|
||||
"__pseudo_gather_base_offsets32_double", true),
|
||||
GSInfo("__pseudo_gather64_i8", "__pseudo_gather_factored_base_offsets64_i8",
|
||||
"__pseudo_gather_factored_base_offsets32_i8", true),
|
||||
GSInfo("__pseudo_gather64_i16", "__pseudo_gather_factored_base_offsets64_i16",
|
||||
"__pseudo_gather_factored_base_offsets32_i16", true),
|
||||
GSInfo("__pseudo_gather64_i32", "__pseudo_gather_factored_base_offsets64_i32",
|
||||
"__pseudo_gather_factored_base_offsets32_i32", true),
|
||||
GSInfo("__pseudo_gather64_float", "__pseudo_gather_factored_base_offsets64_float",
|
||||
"__pseudo_gather_factored_base_offsets32_float", true),
|
||||
GSInfo("__pseudo_gather64_i64", "__pseudo_gather_factored_base_offsets64_i64",
|
||||
"__pseudo_gather_factored_base_offsets32_i64", true),
|
||||
GSInfo("__pseudo_gather64_double", "__pseudo_gather_factored_base_offsets64_double",
|
||||
"__pseudo_gather_factored_base_offsets32_double", true),
|
||||
|
||||
GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_base_offsets64_i8",
|
||||
"__pseudo_scatter_base_offsets32_i8", false),
|
||||
GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_base_offsets64_i16",
|
||||
"__pseudo_scatter_base_offsets32_i16", false),
|
||||
GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_base_offsets64_i32",
|
||||
"__pseudo_scatter_base_offsets32_i32", false),
|
||||
GSInfo("__pseudo_scatter64_float", "__pseudo_scatter_base_offsets64_float",
|
||||
"__pseudo_scatter_base_offsets32_float", false),
|
||||
GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_base_offsets64_i64",
|
||||
"__pseudo_scatter_base_offsets32_i64", false),
|
||||
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_base_offsets64_double",
|
||||
"__pseudo_scatter_base_offsets32_double", false),
|
||||
GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_factored_base_offsets64_i8",
|
||||
"__pseudo_scatter_factored_base_offsets32_i8", false),
|
||||
GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_factored_base_offsets64_i16",
|
||||
"__pseudo_scatter_factored_base_offsets32_i16", false),
|
||||
GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_factored_base_offsets64_i32",
|
||||
"__pseudo_scatter_factored_base_offsets32_i32", false),
|
||||
GSInfo("__pseudo_scatter64_float", "__pseudo_scatter_factored_base_offsets64_float",
|
||||
"__pseudo_scatter_factored_base_offsets32_float", false),
|
||||
GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_factored_base_offsets64_i64",
|
||||
"__pseudo_scatter_factored_base_offsets32_i64", false),
|
||||
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_factored_base_offsets64_double",
|
||||
"__pseudo_scatter_factored_base_offsets32_double", false),
|
||||
};
|
||||
|
||||
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||
@@ -1858,57 +1858,57 @@ lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
|
||||
};
|
||||
|
||||
GSBOInfo gsFuncs[] = {
|
||||
GSBOInfo("__pseudo_gather_base_offsets32_i8",
|
||||
"__pseudo_gather_base_offsets32_i8", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets32_i16",
|
||||
"__pseudo_gather_base_offsets32_i16", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets32_i32",
|
||||
"__pseudo_gather_base_offsets32_i32", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets32_float",
|
||||
"__pseudo_gather_base_offsets32_float", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets32_i64",
|
||||
"__pseudo_gather_base_offsets32_i64", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets32_double",
|
||||
"__pseudo_gather_base_offsets32_double", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets32_i8",
|
||||
"__pseudo_gather_factored_base_offsets32_i8", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets32_i16",
|
||||
"__pseudo_gather_factored_base_offsets32_i16", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets32_i32",
|
||||
"__pseudo_gather_factored_base_offsets32_i32", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets32_float",
|
||||
"__pseudo_gather_factored_base_offsets32_float", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets32_i64",
|
||||
"__pseudo_gather_factored_base_offsets32_i64", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets32_double",
|
||||
"__pseudo_gather_factored_base_offsets32_double", true),
|
||||
|
||||
GSBOInfo( "__pseudo_scatter_base_offsets32_i8",
|
||||
"__pseudo_scatter_base_offsets32_i8", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets32_i16",
|
||||
"__pseudo_scatter_base_offsets32_i16", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets32_i32",
|
||||
"__pseudo_scatter_base_offsets32_i32", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets32_float",
|
||||
"__pseudo_scatter_base_offsets32_float", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets32_i64",
|
||||
"__pseudo_scatter_base_offsets32_i64", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets32_double",
|
||||
"__pseudo_scatter_base_offsets32_double", false),
|
||||
GSBOInfo( "__pseudo_scatter_factored_base_offsets32_i8",
|
||||
"__pseudo_scatter_factored_base_offsets32_i8", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets32_i16",
|
||||
"__pseudo_scatter_factored_base_offsets32_i16", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets32_i32",
|
||||
"__pseudo_scatter_factored_base_offsets32_i32", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets32_float",
|
||||
"__pseudo_scatter_factored_base_offsets32_float", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets32_i64",
|
||||
"__pseudo_scatter_factored_base_offsets32_i64", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets32_double",
|
||||
"__pseudo_scatter_factored_base_offsets32_double", false),
|
||||
|
||||
GSBOInfo( "__pseudo_gather_base_offsets64_i8",
|
||||
"__pseudo_gather_base_offsets32_i8", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets64_i16",
|
||||
"__pseudo_gather_base_offsets32_i16", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets64_i32",
|
||||
"__pseudo_gather_base_offsets32_i32", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets64_float",
|
||||
"__pseudo_gather_base_offsets32_float", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets64_i64",
|
||||
"__pseudo_gather_base_offsets32_i64", true),
|
||||
GSBOInfo("__pseudo_gather_base_offsets64_double",
|
||||
"__pseudo_gather_base_offsets32_double", true),
|
||||
GSBOInfo( "__pseudo_gather_factored_base_offsets64_i8",
|
||||
"__pseudo_gather_factored_base_offsets32_i8", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets64_i16",
|
||||
"__pseudo_gather_factored_base_offsets32_i16", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets64_i32",
|
||||
"__pseudo_gather_factored_base_offsets32_i32", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets64_float",
|
||||
"__pseudo_gather_factored_base_offsets32_float", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets64_i64",
|
||||
"__pseudo_gather_factored_base_offsets32_i64", true),
|
||||
GSBOInfo("__pseudo_gather_factored_base_offsets64_double",
|
||||
"__pseudo_gather_factored_base_offsets32_double", true),
|
||||
|
||||
GSBOInfo( "__pseudo_scatter_base_offsets64_i8",
|
||||
"__pseudo_scatter_base_offsets32_i8", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets64_i16",
|
||||
"__pseudo_scatter_base_offsets32_i16", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets64_i32",
|
||||
"__pseudo_scatter_base_offsets32_i32", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets64_float",
|
||||
"__pseudo_scatter_base_offsets32_float", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets64_i64",
|
||||
"__pseudo_scatter_base_offsets32_i64", false),
|
||||
GSBOInfo("__pseudo_scatter_base_offsets64_double",
|
||||
"__pseudo_scatter_base_offsets32_double", false),
|
||||
GSBOInfo( "__pseudo_scatter_factored_base_offsets64_i8",
|
||||
"__pseudo_scatter_factored_base_offsets32_i8", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets64_i16",
|
||||
"__pseudo_scatter_factored_base_offsets32_i16", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets64_i32",
|
||||
"__pseudo_scatter_factored_base_offsets32_i32", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets64_float",
|
||||
"__pseudo_scatter_factored_base_offsets32_float", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets64_i64",
|
||||
"__pseudo_scatter_factored_base_offsets32_i64", false),
|
||||
GSBOInfo("__pseudo_scatter_factored_base_offsets64_double",
|
||||
"__pseudo_scatter_factored_base_offsets32_double", false),
|
||||
};
|
||||
|
||||
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||
@@ -2025,29 +2025,29 @@ lGSToLoadStore(llvm::CallInst *callInst) {
|
||||
};
|
||||
|
||||
GatherImpInfo gInfo[] = {
|
||||
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__masked_load_i8",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i8", "__masked_load_i8",
|
||||
LLVMTypes::Int8Type, 1),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets32_i16", "__masked_load_i16",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i16", "__masked_load_i16",
|
||||
LLVMTypes::Int16Type, 2),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets32_i32", "__masked_load_i32",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i32", "__masked_load_i32",
|
||||
LLVMTypes::Int32Type, 4),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets32_float", "__masked_load_float",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets32_float", "__masked_load_float",
|
||||
LLVMTypes::FloatType, 4),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets32_i64", "__masked_load_i64",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i64", "__masked_load_i64",
|
||||
LLVMTypes::Int64Type, 8),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets32_double", "__masked_load_double",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets32_double", "__masked_load_double",
|
||||
LLVMTypes::DoubleType, 8),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets64_i8", "__masked_load_i8",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i8", "__masked_load_i8",
|
||||
LLVMTypes::Int8Type, 1),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets64_i16", "__masked_load_i16",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i16", "__masked_load_i16",
|
||||
LLVMTypes::Int16Type, 2),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets64_i32", "__masked_load_i32",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i32", "__masked_load_i32",
|
||||
LLVMTypes::Int32Type, 4),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets64_float", "__masked_load_float",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets64_float", "__masked_load_float",
|
||||
LLVMTypes::FloatType, 4),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets64_i64", "__masked_load_i64",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i64", "__masked_load_i64",
|
||||
LLVMTypes::Int64Type, 8),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__masked_load_double",
|
||||
GatherImpInfo("__pseudo_gather_factored_base_offsets64_double", "__masked_load_double",
|
||||
LLVMTypes::DoubleType, 8)
|
||||
};
|
||||
|
||||
@@ -2067,29 +2067,29 @@ lGSToLoadStore(llvm::CallInst *callInst) {
|
||||
};
|
||||
|
||||
ScatterImpInfo sInfo[] = {
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_masked_store_i8",
|
||||
LLVMTypes::Int8VectorPointerType, 1),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_i16", "__pseudo_masked_store_i16",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_masked_store_i16",
|
||||
LLVMTypes::Int16VectorPointerType, 2),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_i32", "__pseudo_masked_store_i32",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_masked_store_i32",
|
||||
LLVMTypes::Int32VectorPointerType, 4),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_float", "__pseudo_masked_store_float",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_float", "__pseudo_masked_store_float",
|
||||
LLVMTypes::FloatVectorPointerType, 4),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_i64", "__pseudo_masked_store_i64",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_masked_store_i64",
|
||||
LLVMTypes::Int64VectorPointerType, 8),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_double", "__pseudo_masked_store_double",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_double", "__pseudo_masked_store_double",
|
||||
LLVMTypes::DoubleVectorPointerType, 8),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_i8", "__pseudo_masked_store_i8",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i8", "__pseudo_masked_store_i8",
|
||||
LLVMTypes::Int8VectorPointerType, 1),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_i16", "__pseudo_masked_store_i16",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i16", "__pseudo_masked_store_i16",
|
||||
LLVMTypes::Int16VectorPointerType, 2),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_i32", "__pseudo_masked_store_i32",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i32", "__pseudo_masked_store_i32",
|
||||
LLVMTypes::Int32VectorPointerType, 4),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_float", "__pseudo_masked_store_float",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_float", "__pseudo_masked_store_float",
|
||||
LLVMTypes::FloatVectorPointerType, 4),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_i64", "__pseudo_masked_store_i64",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i64", "__pseudo_masked_store_i64",
|
||||
LLVMTypes::Int64VectorPointerType, 8),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_double", "__pseudo_masked_store_double",
|
||||
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_double", "__pseudo_masked_store_double",
|
||||
LLVMTypes::DoubleVectorPointerType, 8)
|
||||
};
|
||||
|
||||
@@ -3354,10 +3354,10 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
DEBUG_START_PASS("GatherCoalescePass");
|
||||
|
||||
llvm::Function *gatherFuncs[] = {
|
||||
m->module->getFunction("__pseudo_gather_base_offsets32_i32"),
|
||||
m->module->getFunction("__pseudo_gather_base_offsets32_float"),
|
||||
m->module->getFunction("__pseudo_gather_base_offsets64_i32"),
|
||||
m->module->getFunction("__pseudo_gather_base_offsets64_float"),
|
||||
m->module->getFunction("__pseudo_gather_factored_base_offsets32_i32"),
|
||||
m->module->getFunction("__pseudo_gather_factored_base_offsets32_float"),
|
||||
m->module->getFunction("__pseudo_gather_factored_base_offsets64_i32"),
|
||||
m->module->getFunction("__pseudo_gather_factored_base_offsets64_float"),
|
||||
};
|
||||
int nGatherFuncs = sizeof(gatherFuncs) / sizeof(gatherFuncs[0]);
|
||||
|
||||
@@ -3367,7 +3367,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e;
|
||||
++iter) {
|
||||
// Iterate over all of the instructions and look for calls to
|
||||
// __pseudo_gather_base_offsets{32,64}_{i32,float} calls.
|
||||
// __pseudo_gather_factored_base_offsets{32,64}_{i32,float} calls.
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
if (callInst == NULL)
|
||||
continue;
|
||||
@@ -3639,19 +3639,19 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
|
||||
};
|
||||
|
||||
LowerGSInfo lgsInfo[] = {
|
||||
LowerGSInfo("__pseudo_gather_base_offsets32_i8", "__gather_base_offsets32_i8", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets32_i16", "__gather_base_offsets32_i16", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets32_i32", "__gather_base_offsets32_i32", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets32_float", "__gather_base_offsets32_float", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets32_i64", "__gather_base_offsets32_i64", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets32_double", "__gather_base_offsets32_double", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i8", "__gather_factored_base_offsets32_i8", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i16", "__gather_factored_base_offsets32_i16", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i32", "__gather_factored_base_offsets32_i32", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets32_float", "__gather_factored_base_offsets32_float", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i64", "__gather_factored_base_offsets32_i64", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets32_double", "__gather_factored_base_offsets32_double", true),
|
||||
|
||||
LowerGSInfo("__pseudo_gather_base_offsets64_i8", "__gather_base_offsets64_i8", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets64_i16", "__gather_base_offsets64_i16", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets64_i32", "__gather_base_offsets64_i32", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets64_float", "__gather_base_offsets64_float", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets64_i64", "__gather_base_offsets64_i64", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets64_double", "__gather_base_offsets64_double", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i8", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i16", "__gather_factored_base_offsets64_i16", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i32", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_float", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i64", "__gather_factored_base_offsets64_i64", true),
|
||||
LowerGSInfo("__pseudo_gather_factored_base_offsets64_double", "__gather_factored_base_offsets64_double", true),
|
||||
|
||||
LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true),
|
||||
LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true),
|
||||
@@ -3667,19 +3667,19 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
|
||||
LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true),
|
||||
LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true),
|
||||
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets32_i8", "__scatter_base_offsets32_i8", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets32_i16", "__scatter_base_offsets32_i16", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets32_i32", "__scatter_base_offsets32_i32", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets32_float", "__scatter_base_offsets32_float", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets32_i64", "__scatter_base_offsets32_i64", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets32_double", "__scatter_base_offsets32_double", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i8", "__scatter_factored_base_offsets32_i8", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i16", "__scatter_factored_base_offsets32_i16", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i32", "__scatter_factored_base_offsets32_i32", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_float", "__scatter_factored_base_offsets32_float", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i64", "__scatter_factored_base_offsets32_i64", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_double", "__scatter_factored_base_offsets32_double", false),
|
||||
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets64_i8", "__scatter_base_offsets64_i8", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets64_i16", "__scatter_base_offsets64_i16", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets64_i32", "__scatter_base_offsets64_i32", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets64_float", "__scatter_base_offsets64_float", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets64_i64", "__scatter_base_offsets64_i64", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets64_double", "__scatter_base_offsets64_double", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i8", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i16", "__scatter_factored_base_offsets64_i16", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i32", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_float", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i64", "__scatter_factored_base_offsets64_i64", false),
|
||||
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_double", "__scatter_factored_base_offsets64_double", false),
|
||||
|
||||
LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false),
|
||||
LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false),
|
||||
@@ -3899,12 +3899,12 @@ bool
|
||||
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
||||
const char *names[] = {
|
||||
"__fast_masked_vload",
|
||||
"__gather_base_offsets32_i8", "__gather_base_offsets32_i16",
|
||||
"__gather_base_offsets32_i32", "__gather_base_offsets32_i64",
|
||||
"__gather_base_offsets32_float", "__gather_base_offsets32_double",
|
||||
"__gather_base_offsets64_i8", "__gather_base_offsets64_i16",
|
||||
"__gather_base_offsets64_i32", "__gather_base_offsets64_i64",
|
||||
"__gather_base_offsets64_float", "__gather_base_offsets64_double",
|
||||
"__gather_factored_base_offsets32_i8", "__gather_factored_base_offsets32_i16",
|
||||
"__gather_factored_base_offsets32_i32", "__gather_factored_base_offsets32_i64",
|
||||
"__gather_factored_base_offsets32_float", "__gather_factored_base_offsets32_double",
|
||||
"__gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i16",
|
||||
"__gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i64",
|
||||
"__gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_double",
|
||||
"__gather32_i8", "__gather32_i16",
|
||||
"__gather32_i32", "__gather32_i64",
|
||||
"__gather32_float", "__gather32_double",
|
||||
@@ -3926,12 +3926,12 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
||||
"__masked_store_blend_i8", "__masked_store_blend_i16",
|
||||
"__masked_store_blend_i32", "__masked_store_blend_i64",
|
||||
"__masked_store_blend_float", "__masked_store_blend_double",
|
||||
"__scatter_base_offsets32_i8", "__scatter_base_offsets32_i16",
|
||||
"__scatter_base_offsets32_i32", "__scatter_base_offsets32_i64",
|
||||
"__scatter_base_offsets32_float", "__scatter_base_offsets32_double",
|
||||
"__scatter_base_offsets64_i8", "__scatter_base_offsets64_i16",
|
||||
"__scatter_base_offsets64_i32", "__scatter_base_offsets64_i64",
|
||||
"__scatter_base_offsets64_float", "__scatter_base_offsets64_double",
|
||||
"__scatter_factored_base_offsets32_i8", "__scatter_factored_base_offsets32_i16",
|
||||
"__scatter_factored_base_offsets32_i32", "__scatter_factored_base_offsets32_i64",
|
||||
"__scatter_factored_base_offsets32_float", "__scatter_factored_base_offsets32_double",
|
||||
"__scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i16",
|
||||
"__scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i64",
|
||||
"__scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_double",
|
||||
"__scatter_elt32_i8", "__scatter_elt32_i16",
|
||||
"__scatter_elt32_i32", "__scatter_elt32_i64",
|
||||
"__scatter_elt32_float", "__scatter_elt32_double",
|
||||
|
||||
Reference in New Issue
Block a user