Rename gather/scatter_base_offsets functions to *factored_based_offsets*.

No functional change; just preparation for having a path that doesn't
factor the offsets into constant and varying parts, which will be better
for AVX2 and KNC.
This commit is contained in:
Matt Pharr
2012-07-11 11:06:30 -07:00
parent 8e19d54e75
commit ec0280be11
8 changed files with 361 additions and 361 deletions

View File

@@ -334,18 +334,18 @@ define void @__masked_store_blend_double(<WIDTH x double>* nocapture,
;; gather/scatter ;; gather/scatter
define(`gather_scatter', ` define(`gather_scatter', `
declare <WIDTH x $1> @__gather_base_offsets32_$1(i8 * nocapture, <WIDTH x i32>, declare <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * nocapture, <WIDTH x i32>,
i32, <WIDTH x i32>, <WIDTH x i1>) nounwind readonly i32, <WIDTH x i32>, <WIDTH x i1>) nounwind readonly
declare <WIDTH x $1> @__gather_base_offsets64_$1(i8 * nocapture, <WIDTH x i64>, declare <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * nocapture, <WIDTH x i64>,
i32, <WIDTH x i64>, <WIDTH x i1>) nounwind readonly i32, <WIDTH x i64>, <WIDTH x i1>) nounwind readonly
declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32>, declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32>,
<WIDTH x i1>) nounwind readonly <WIDTH x i1>) nounwind readonly
declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64>, declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64>,
<WIDTH x i1>) nounwind readonly <WIDTH x i1>) nounwind readonly
declare void @__scatter_base_offsets32_$1(i8* nocapture, <WIDTH x i32>, declare void @__scatter_factored_base_offsets32_$1(i8* nocapture, <WIDTH x i32>,
i32, <WIDTH x i32>, <WIDTH x $1>, <WIDTH x i1>) nounwind i32, <WIDTH x i32>, <WIDTH x $1>, <WIDTH x i1>) nounwind
declare void @__scatter_base_offsets64_$1(i8* nocapture, <WIDTH x i64>, declare void @__scatter_factored_base_offsets64_$1(i8* nocapture, <WIDTH x i64>,
i32, <WIDTH x i64>, <WIDTH x $1>, <WIDTH x i1>) nounwind i32, <WIDTH x i64>, <WIDTH x $1>, <WIDTH x i1>) nounwind
declare void @__scatter32_$1(<WIDTH x i32>, <WIDTH x $1>, declare void @__scatter32_$1(<WIDTH x i32>, <WIDTH x $1>,
<WIDTH x i1>) nounwind <WIDTH x i1>) nounwind

View File

@@ -1599,7 +1599,7 @@ declare void @__pseudo_masked_store_double(<WIDTH x double> * nocapture, <WIDTH
; offset_delta feeds into the free offset calculation. ; offset_delta feeds into the free offset calculation.
; ;
; varying int{8,16,32,float,64,double} ; varying int{8,16,32,float,64,double}
; __pseudo_gather_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base, ; __pseudo_gather_factored_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base,
; int{32,64} offsets, uniform int32 offset_scale, ; int{32,64} offsets, uniform int32 offset_scale,
; int{32,64} offset_delta, mask) ; int{32,64} offset_delta, mask)
; ;
@@ -1621,30 +1621,30 @@ declare <WIDTH x float> @__pseudo_gather64_float(<WIDTH x i64>, <WIDTH x MASK>)
declare <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly declare <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
declare <WIDTH x double> @__pseudo_gather64_double(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly declare <WIDTH x double> @__pseudo_gather64_double(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i8> @__pseudo_gather_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i8> @__pseudo_gather_factored_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16> @__pseudo_gather_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i16> @__pseudo_gather_factored_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i32> @__pseudo_gather_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i32> @__pseudo_gather_factored_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x float> @__pseudo_gather_base_offsets32_float(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x float> @__pseudo_gather_factored_base_offsets32_float(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i64> @__pseudo_gather_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i64> @__pseudo_gather_factored_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x double> @__pseudo_gather_base_offsets32_double(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x double> @__pseudo_gather_factored_base_offsets32_double(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i8> @__pseudo_gather_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i8> @__pseudo_gather_factored_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16> @__pseudo_gather_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i16> @__pseudo_gather_factored_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i32> @__pseudo_gather_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i32> @__pseudo_gather_factored_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x float> @__pseudo_gather_base_offsets64_float(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x float> @__pseudo_gather_factored_base_offsets64_float(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i64> @__pseudo_gather_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i64> @__pseudo_gather_factored_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x double> @__pseudo_gather_base_offsets64_double(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x double> @__pseudo_gather_factored_base_offsets64_double(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
; Similarly to the pseudo-gathers defined above, we also declare undefined ; Similarly to the pseudo-gathers defined above, we also declare undefined
@@ -1660,7 +1660,7 @@ declare <WIDTH x double> @__pseudo_gather_base_offsets64_double(i8 *, <WIDTH x i
; The GatherScatterFlattenOpt optimization pass also finds these and ; The GatherScatterFlattenOpt optimization pass also finds these and
; transforms them to scatters like: ; transforms them to scatters like:
; ;
; void __pseudo_scatter_base_offsets{32,64}_i8(uniform int8 *base, ; void __pseudo_scatter_factored_base_offsets{32,64}_i8(uniform int8 *base,
; varying int32 offsets, uniform int32 offset_scale, ; varying int32 offsets, uniform int32 offset_scale,
; varying int{32,64} offset_delta, varying int8 values, mask) ; varying int{32,64} offset_delta, varying int8 values, mask)
; (and similarly for 16/32/64 bit values) ; (and similarly for 16/32/64 bit values)
@@ -1682,30 +1682,30 @@ declare void @__pseudo_scatter64_float(<WIDTH x i64>, <WIDTH x float>, <WIDTH x
declare void @__pseudo_scatter64_i64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind declare void @__pseudo_scatter64_i64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter64_double(<WIDTH x i64>, <WIDTH x double>, <WIDTH x MASK>) nounwind declare void @__pseudo_scatter64_double(<WIDTH x i64>, <WIDTH x double>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void @__pseudo_scatter_factored_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i8>, <WIDTH x MASK>) nounwind <WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void @__pseudo_scatter_factored_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i16>, <WIDTH x MASK>) nounwind <WIDTH x i16>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void @__pseudo_scatter_factored_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i32>, <WIDTH x MASK>) nounwind <WIDTH x i32>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets32_float(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void @__pseudo_scatter_factored_base_offsets32_float(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x float>, <WIDTH x MASK>) nounwind <WIDTH x float>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void @__pseudo_scatter_factored_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind <WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets32_double(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void @__pseudo_scatter_factored_base_offsets32_double(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x double>, <WIDTH x MASK>) nounwind <WIDTH x double>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void @__pseudo_scatter_factored_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i8>, <WIDTH x MASK>) nounwind <WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void @__pseudo_scatter_factored_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i16>, <WIDTH x MASK>) nounwind <WIDTH x i16>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void @__pseudo_scatter_factored_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i32>, <WIDTH x MASK>) nounwind <WIDTH x i32>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets64_float(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void @__pseudo_scatter_factored_base_offsets64_float(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x float>, <WIDTH x MASK>) nounwind <WIDTH x float>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void @__pseudo_scatter_factored_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind <WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_base_offsets64_double(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void @__pseudo_scatter_factored_base_offsets64_double(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x double>, <WIDTH x MASK>) nounwind <WIDTH x double>, <WIDTH x MASK>) nounwind
declare float @__log_uniform_float(float) nounwind readnone declare float @__log_uniform_float(float) nounwind readnone
@@ -1872,103 +1872,103 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
call void @__usedouble(<WIDTH x double> %g64_d) call void @__usedouble(<WIDTH x double> %g64_d)
%pgbo32_8 = call <WIDTH x i8> %pgbo32_8 = call <WIDTH x i8>
@__pseudo_gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %pgbo32_8) call void @__use8(<WIDTH x i8> %pgbo32_8)
%pgbo32_16 = call <WIDTH x i16> %pgbo32_16 = call <WIDTH x i16>
@__pseudo_gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %pgbo32_16) call void @__use16(<WIDTH x i16> %pgbo32_16)
%pgbo32_32 = call <WIDTH x i32> %pgbo32_32 = call <WIDTH x i32>
@__pseudo_gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %pgbo32_32) call void @__use32(<WIDTH x i32> %pgbo32_32)
%pgbo32_f = call <WIDTH x float> %pgbo32_f = call <WIDTH x float>
@__pseudo_gather_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %pgbo32_f) call void @__usefloat(<WIDTH x float> %pgbo32_f)
%pgbo32_64 = call <WIDTH x i64> %pgbo32_64 = call <WIDTH x i64>
@__pseudo_gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %pgbo32_64) call void @__use64(<WIDTH x i64> %pgbo32_64)
%pgbo32_d = call <WIDTH x double> %pgbo32_d = call <WIDTH x double>
@__pseudo_gather_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %pgbo32_d) call void @__usedouble(<WIDTH x double> %pgbo32_d)
%gbo32_8 = call <WIDTH x i8> %gbo32_8 = call <WIDTH x i8>
@__gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %gbo32_8) call void @__use8(<WIDTH x i8> %gbo32_8)
%gbo32_16 = call <WIDTH x i16> %gbo32_16 = call <WIDTH x i16>
@__gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__gather_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %gbo32_16) call void @__use16(<WIDTH x i16> %gbo32_16)
%gbo32_32 = call <WIDTH x i32> %gbo32_32 = call <WIDTH x i32>
@__gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__gather_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %gbo32_32) call void @__use32(<WIDTH x i32> %gbo32_32)
%gbo32_f = call <WIDTH x float> %gbo32_f = call <WIDTH x float>
@__gather_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__gather_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %gbo32_f) call void @__usefloat(<WIDTH x float> %gbo32_f)
%gbo32_64 = call <WIDTH x i64> %gbo32_64 = call <WIDTH x i64>
@__gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__gather_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %gbo32_64) call void @__use64(<WIDTH x i64> %gbo32_64)
%gbo32_d = call <WIDTH x double> %gbo32_d = call <WIDTH x double>
@__gather_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__gather_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %gbo32_d) call void @__usedouble(<WIDTH x double> %gbo32_d)
%pgbo64_8 = call <WIDTH x i8> %pgbo64_8 = call <WIDTH x i8>
@__pseudo_gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %pgbo64_8) call void @__use8(<WIDTH x i8> %pgbo64_8)
%pgbo64_16 = call <WIDTH x i16> %pgbo64_16 = call <WIDTH x i16>
@__pseudo_gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %pgbo64_16) call void @__use16(<WIDTH x i16> %pgbo64_16)
%pgbo64_32 = call <WIDTH x i32> %pgbo64_32 = call <WIDTH x i32>
@__pseudo_gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %pgbo64_32) call void @__use32(<WIDTH x i32> %pgbo64_32)
%pgbo64_f = call <WIDTH x float> %pgbo64_f = call <WIDTH x float>
@__pseudo_gather_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %pgbo64_f) call void @__usefloat(<WIDTH x float> %pgbo64_f)
%pgbo64_64 = call <WIDTH x i64> %pgbo64_64 = call <WIDTH x i64>
@__pseudo_gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %pgbo64_64) call void @__use64(<WIDTH x i64> %pgbo64_64)
%pgbo64_d = call <WIDTH x double> %pgbo64_d = call <WIDTH x double>
@__pseudo_gather_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %pgbo64_d) call void @__usedouble(<WIDTH x double> %pgbo64_d)
%gbo64_8 = call <WIDTH x i8> %gbo64_8 = call <WIDTH x i8>
@__gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %gbo64_8) call void @__use8(<WIDTH x i8> %gbo64_8)
%gbo64_16 = call <WIDTH x i16> %gbo64_16 = call <WIDTH x i16>
@__gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %gbo64_16) call void @__use16(<WIDTH x i16> %gbo64_16)
%gbo64_32 = call <WIDTH x i32> %gbo64_32 = call <WIDTH x i32>
@__gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %gbo64_32) call void @__use32(<WIDTH x i32> %gbo64_32)
%gbo64_f = call <WIDTH x float> %gbo64_f = call <WIDTH x float>
@__gather_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %gbo64_f) call void @__usefloat(<WIDTH x float> %gbo64_f)
%gbo64_64 = call <WIDTH x i64> %gbo64_64 = call <WIDTH x i64>
@__gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %gbo64_64) call void @__use64(<WIDTH x i64> %gbo64_64)
%gbo64_d = call <WIDTH x double> %gbo64_d = call <WIDTH x double>
@__gather_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %gbo64_d) call void @__usedouble(<WIDTH x double> %gbo64_d)
@@ -2003,56 +2003,56 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask) call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter64_double(<WIDTH x i64> %v64, <WIDTH x double> %vd, <WIDTH x MASK> %mask) call void @__scatter64_double(<WIDTH x i64> %v64, <WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask) <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask) <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x float> %vf, <WIDTH x MASK> %mask) <WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x double> %vd, <WIDTH x MASK> %mask) <WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__pseudo_scatter_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask) <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__pseudo_scatter_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask) <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__pseudo_scatter_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__pseudo_scatter_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x float> %vf, <WIDTH x MASK> %mask) <WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__pseudo_scatter_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__pseudo_scatter_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x double> %vd, <WIDTH x MASK> %mask) <WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__scatter_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask) <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__scatter_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask) <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__scatter_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__scatter_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x float> %vf, <WIDTH x MASK> %mask) <WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__scatter_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__scatter_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x double> %vd, <WIDTH x MASK> %mask) <WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask) <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask) <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_float(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x float> %vf, <WIDTH x MASK> %mask) <WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x double> %vd, <WIDTH x MASK> %mask) <WIDTH x double> %vd, <WIDTH x MASK> %mask)
ret void ret void
@@ -3245,7 +3245,7 @@ define <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %o
} }
define <WIDTH x $1> @__gather_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale, define <WIDTH x $1> @__gather_factored_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
<WIDTH x i32> %offset_delta, <WIDTH x i32> %offset_delta,
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline { <WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
; We can be clever and avoid the per-lane stuff for gathers if we are willing ; We can be clever and avoid the per-lane stuff for gathers if we are willing
@@ -3276,7 +3276,7 @@ define <WIDTH x $1> @__gather_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offset
ret <WIDTH x $1> %ret`'eval(WIDTH-1) ret <WIDTH x $1> %ret`'eval(WIDTH-1)
} }
define <WIDTH x $1> @__gather_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale, define <WIDTH x $1> @__gather_factored_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
<WIDTH x i64> %offset_delta, <WIDTH x i64> %offset_delta,
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline { <WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
; We can be clever and avoid the per-lane stuff for gathers if we are willing ; We can be clever and avoid the per-lane stuff for gathers if we are willing
@@ -3391,7 +3391,7 @@ define void @__scatter_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_s
ret void ret void
} }
define void @__scatter_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale, define void @__scatter_factored_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values, <WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
<WIDTH x i32> %mask) nounwind alwaysinline { <WIDTH x i32> %mask) nounwind alwaysinline {
;; And use the `per_lane' macro to do all of the per-lane work for scatter... ;; And use the `per_lane' macro to do all of the per-lane work for scatter...
@@ -3401,7 +3401,7 @@ define void @__scatter_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32
ret void ret void
} }
define void @__scatter_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale, define void @__scatter_factored_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values, <WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
<WIDTH x i32> %mask) nounwind alwaysinline { <WIDTH x i32> %mask) nounwind alwaysinline {
;; And use the `per_lane' macro to do all of the per-lane work for scatter... ;; And use the `per_lane' macro to do all of the per-lane work for scatter...

View File

@@ -1306,7 +1306,7 @@ static FORCEINLINE void __masked_store_blend_double(void *p, __vec16_d val,
// offsets * offsetScale is in bytes (for all of these) // offsets * offsetScale is in bytes (for all of these)
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ #define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
__vec16_i1 mask) { \ __vec16_i1 mask) { \
@@ -1322,18 +1322,18 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_base_offsets32_i8) GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_factored_base_offsets32_i8)
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_base_offsets64_i8) GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_factored_base_offsets64_i8)
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_base_offsets32_i16) GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_factored_base_offsets32_i16)
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_base_offsets64_i16) GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_factored_base_offsets64_i16)
GATHER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __gather_base_offsets32_i32) GATHER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __gather_factored_base_offsets32_i32)
GATHER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_base_offsets64_i32) GATHER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_factored_base_offsets64_i32)
GATHER_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __gather_base_offsets32_float) GATHER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __gather_factored_base_offsets32_float)
GATHER_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __gather_base_offsets64_float) GATHER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __gather_factored_base_offsets64_float)
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_base_offsets32_i64) GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_factored_base_offsets32_i64)
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_base_offsets64_i64) GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_factored_base_offsets64_i64)
GATHER_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __gather_base_offsets32_double) GATHER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __gather_factored_base_offsets32_double)
GATHER_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __gather_base_offsets64_double) GATHER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __gather_factored_base_offsets64_double)
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ #define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec16_i1 mask) { \ static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec16_i1 mask) { \
@@ -1361,7 +1361,7 @@ GATHER_GENERAL(__vec16_d, double, __vec16_i64, __gather64_double)
// scatter // scatter
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ #define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
VTYPE val, __vec16_i1 mask) { \ VTYPE val, __vec16_i1 mask) { \
@@ -1375,18 +1375,18 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_base_offsets32_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_factored_base_offsets32_i8)
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_base_offsets64_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_factored_base_offsets64_i8)
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_base_offsets32_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_factored_base_offsets32_i16)
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_base_offsets64_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_factored_base_offsets64_i16)
SCATTER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __scatter_base_offsets32_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i32, __scatter_factored_base_offsets32_i32)
SCATTER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_base_offsets64_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_factored_base_offsets64_i32)
SCATTER_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __scatter_base_offsets32_float) SCATTER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i32, __scatter_factored_base_offsets32_float)
SCATTER_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __scatter_base_offsets64_float) SCATTER_FACTORED_BASE_OFFSETS(__vec16_f, float, __vec16_i64, __scatter_factored_base_offsets64_float)
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_base_offsets32_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_factored_base_offsets32_i64)
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_base_offsets64_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_factored_base_offsets64_i64)
SCATTER_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __scatter_base_offsets32_double) SCATTER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i32, __scatter_factored_base_offsets32_double)
SCATTER_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __scatter_base_offsets64_double) SCATTER_FACTORED_BASE_OFFSETS(__vec16_d, double, __vec16_i64, __scatter_factored_base_offsets64_double)
#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ #define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec16_i1 mask) { \ static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec16_i1 mask) { \

View File

@@ -1374,7 +1374,7 @@ static FORCEINLINE void __masked_store_blend_double(void *p, __vec32_d val,
// offsets * offsetScale is in bytes (for all of these) // offsets * offsetScale is in bytes (for all of these)
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ #define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
__vec32_i1 mask) { \ __vec32_i1 mask) { \
@@ -1390,18 +1390,18 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
GATHER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __gather_base_offsets32_i8) GATHER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __gather_factored_base_offsets32_i8)
GATHER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __gather_base_offsets64_i8) GATHER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __gather_factored_base_offsets64_i8)
GATHER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __gather_base_offsets32_i16) GATHER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __gather_factored_base_offsets32_i16)
GATHER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __gather_base_offsets64_i16) GATHER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __gather_factored_base_offsets64_i16)
GATHER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __gather_base_offsets32_i32) GATHER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __gather_factored_base_offsets32_i32)
GATHER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __gather_base_offsets64_i32) GATHER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __gather_factored_base_offsets64_i32)
GATHER_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __gather_base_offsets32_float) GATHER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __gather_factored_base_offsets32_float)
GATHER_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __gather_base_offsets64_float) GATHER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __gather_factored_base_offsets64_float)
GATHER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __gather_base_offsets32_i64) GATHER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __gather_factored_base_offsets32_i64)
GATHER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __gather_base_offsets64_i64) GATHER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __gather_factored_base_offsets64_i64)
GATHER_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __gather_base_offsets32_double) GATHER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __gather_factored_base_offsets32_double)
GATHER_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __gather_base_offsets64_double) GATHER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __gather_factored_base_offsets64_double)
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ #define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec32_i1 mask) { \ static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec32_i1 mask) { \
@@ -1429,7 +1429,7 @@ GATHER_GENERAL(__vec32_d, double, __vec32_i64, __gather64_double)
// scatter // scatter
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ #define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
VTYPE val, __vec32_i1 mask) { \ VTYPE val, __vec32_i1 mask) { \
@@ -1443,18 +1443,18 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
SCATTER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __scatter_base_offsets32_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i32, __scatter_factored_base_offsets32_i8)
SCATTER_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __scatter_base_offsets64_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i8, int8_t, __vec32_i64, __scatter_factored_base_offsets64_i8)
SCATTER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __scatter_base_offsets32_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i32, __scatter_factored_base_offsets32_i16)
SCATTER_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __scatter_base_offsets64_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i16, int16_t, __vec32_i64, __scatter_factored_base_offsets64_i16)
SCATTER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __scatter_base_offsets32_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i32, __scatter_factored_base_offsets32_i32)
SCATTER_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __scatter_base_offsets64_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i32, int32_t, __vec32_i64, __scatter_factored_base_offsets64_i32)
SCATTER_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __scatter_base_offsets32_float) SCATTER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i32, __scatter_factored_base_offsets32_float)
SCATTER_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __scatter_base_offsets64_float) SCATTER_FACTORED_BASE_OFFSETS(__vec32_f, float, __vec32_i64, __scatter_factored_base_offsets64_float)
SCATTER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __scatter_base_offsets32_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i32, __scatter_factored_base_offsets32_i64)
SCATTER_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __scatter_base_offsets64_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec32_i64, int64_t, __vec32_i64, __scatter_factored_base_offsets64_i64)
SCATTER_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __scatter_base_offsets32_double) SCATTER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i32, __scatter_factored_base_offsets32_double)
SCATTER_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __scatter_base_offsets64_double) SCATTER_FACTORED_BASE_OFFSETS(__vec32_d, double, __vec32_i64, __scatter_factored_base_offsets64_double)
#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ #define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec32_i1 mask) { \ static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec32_i1 mask) { \

View File

@@ -1507,7 +1507,7 @@ static FORCEINLINE void __masked_store_blend_double(void *p, __vec64_d val,
// offsets * offsetScale is in bytes (for all of these) // offsets * offsetScale is in bytes (for all of these)
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ #define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
__vec64_i1 mask) { \ __vec64_i1 mask) { \
@@ -1523,18 +1523,18 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
GATHER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __gather_base_offsets32_i8) GATHER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __gather_factored_base_offsets32_i8)
GATHER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __gather_base_offsets64_i8) GATHER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __gather_factored_base_offsets64_i8)
GATHER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __gather_base_offsets32_i16) GATHER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __gather_factored_base_offsets32_i16)
GATHER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __gather_base_offsets64_i16) GATHER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __gather_factored_base_offsets64_i16)
GATHER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __gather_base_offsets32_i32) GATHER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __gather_factored_base_offsets32_i32)
GATHER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __gather_base_offsets64_i32) GATHER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __gather_factored_base_offsets64_i32)
GATHER_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __gather_base_offsets32_float) GATHER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __gather_factored_base_offsets32_float)
GATHER_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __gather_base_offsets64_float) GATHER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __gather_factored_base_offsets64_float)
GATHER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __gather_base_offsets32_i64) GATHER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __gather_factored_base_offsets32_i64)
GATHER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __gather_base_offsets64_i64) GATHER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __gather_factored_base_offsets64_i64)
GATHER_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __gather_base_offsets32_double) GATHER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __gather_factored_base_offsets32_double)
GATHER_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __gather_base_offsets64_double) GATHER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __gather_factored_base_offsets64_double)
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ #define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec64_i1 mask) { \ static FORCEINLINE VTYPE FUNC(PTRTYPE ptrs, __vec64_i1 mask) { \
@@ -1562,7 +1562,7 @@ GATHER_GENERAL(__vec64_d, double, __vec64_i64, __gather64_double)
// scatter // scatter
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \ #define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
VTYPE val, __vec64_i1 mask) { \ VTYPE val, __vec64_i1 mask) { \
@@ -1576,18 +1576,18 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
SCATTER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __scatter_base_offsets32_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i32, __scatter_factored_base_offsets32_i8)
SCATTER_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __scatter_base_offsets64_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i8, int8_t, __vec64_i64, __scatter_factored_base_offsets64_i8)
SCATTER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __scatter_base_offsets32_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i32, __scatter_factored_base_offsets32_i16)
SCATTER_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __scatter_base_offsets64_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i16, int16_t, __vec64_i64, __scatter_factored_base_offsets64_i16)
SCATTER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __scatter_base_offsets32_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i32, __scatter_factored_base_offsets32_i32)
SCATTER_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __scatter_base_offsets64_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i32, int32_t, __vec64_i64, __scatter_factored_base_offsets64_i32)
SCATTER_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __scatter_base_offsets32_float) SCATTER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i32, __scatter_factored_base_offsets32_float)
SCATTER_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __scatter_base_offsets64_float) SCATTER_FACTORED_BASE_OFFSETS(__vec64_f, float, __vec64_i64, __scatter_factored_base_offsets64_float)
SCATTER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __scatter_base_offsets32_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i32, __scatter_factored_base_offsets32_i64)
SCATTER_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __scatter_base_offsets64_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec64_i64, int64_t, __vec64_i64, __scatter_factored_base_offsets64_i64)
SCATTER_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __scatter_base_offsets32_double) SCATTER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i32, __scatter_factored_base_offsets32_double)
SCATTER_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __scatter_base_offsets64_double) SCATTER_FACTORED_BASE_OFFSETS(__vec64_d, double, __vec64_i64, __scatter_factored_base_offsets64_double)
#define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \ #define SCATTER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) \
static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec64_i1 mask) { \ static FORCEINLINE void FUNC(PTRTYPE ptrs, VTYPE val, __vec64_i1 mask) { \

View File

@@ -1940,7 +1940,7 @@ static FORCEINLINE void __masked_store_blend_float(void *p, __vec16_f val,
// offsets * offsetScale is in bytes (for all of these) // offsets * offsetScale is in bytes (for all of these)
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) #define GATHER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC)
/* /*
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
@@ -1958,7 +1958,7 @@ static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
*/ */
static FORCEINLINE __vec16_i32 static FORCEINLINE __vec16_i32
__gather_base_offsets32_i32(uint8_t *base, __vec16_i32 varyingOffset, __gather_factored_base_offsets32_i32(uint8_t *base, __vec16_i32 varyingOffset,
uint32_t scale, __vec16_i32 constOffset, uint32_t scale, __vec16_i32 constOffset,
__vec16_i1 mask) { __vec16_i1 mask) {
__vec16_i32 vscale = _mm512_extload_epi32(&scale, _MM_UPCONV_EPI32_NONE, _MM_BROADCAST_1X16, _MM_HINT_NONE); __vec16_i32 vscale = _mm512_extload_epi32(&scale, _MM_UPCONV_EPI32_NONE, _MM_BROADCAST_1X16, _MM_HINT_NONE);
@@ -1973,7 +1973,7 @@ __gather_base_offsets32_i32(uint8_t *base, __vec16_i32 varyingOffset,
} }
static FORCEINLINE __vec16_f static FORCEINLINE __vec16_f
__gather_base_offsets32_float(uint8_t *base, __vec16_i32 varyingOffset, __gather_factored_base_offsets32_float(uint8_t *base, __vec16_i32 varyingOffset,
uint32_t scale, __vec16_i32 constOffset, uint32_t scale, __vec16_i32 constOffset,
__vec16_i1 mask) { __vec16_i1 mask) {
__vec16_i32 vscale = _mm512_extload_epi32(&scale, _MM_UPCONV_EPI32_NONE, _MM_BROADCAST_1X16, _MM_HINT_NONE); __vec16_i32 vscale = _mm512_extload_epi32(&scale, _MM_UPCONV_EPI32_NONE, _MM_BROADCAST_1X16, _MM_HINT_NONE);
@@ -1987,13 +1987,13 @@ __gather_base_offsets32_float(uint8_t *base, __vec16_i32 varyingOffset,
return ret; return ret;
} }
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_base_offsets32_i8) GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __gather_factored_base_offsets32_i8)
GATHER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_base_offsets64_i8) GATHER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __gather_factored_base_offsets64_i8)
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_base_offsets32_i16) GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __gather_factored_base_offsets32_i16)
GATHER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_base_offsets64_i16) GATHER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __gather_factored_base_offsets64_i16)
GATHER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_base_offsets64_i32) GATHER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __gather_factored_base_offsets64_i32)
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_base_offsets32_i64) GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __gather_factored_base_offsets32_i64)
GATHER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_base_offsets64_i64) GATHER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __gather_factored_base_offsets64_i64)
#define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC) #define GATHER_GENERAL(VTYPE, STYPE, PTRTYPE, FUNC)
/* /*
@@ -2039,7 +2039,7 @@ static FORCEINLINE __vec16_i32 __gather64_i32(__vec16_i64 ptrs, __vec16_i1 mask)
*/ */
// scatter // scatter
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) #define SCATTER_FACTORED_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC)
/* /*
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
@@ -2054,16 +2054,16 @@ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
} }
*/ */
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_base_offsets32_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i32, __scatter_factored_base_offsets32_i8)
SCATTER_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_base_offsets64_i8) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i8, int8_t, __vec16_i64, __scatter_factored_base_offsets64_i8)
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_base_offsets32_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i32, __scatter_factored_base_offsets32_i16)
SCATTER_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_base_offsets64_i16) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i16, int16_t, __vec16_i64, __scatter_factored_base_offsets64_i16)
SCATTER_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_base_offsets64_i32) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i32, int32_t, __vec16_i64, __scatter_factored_base_offsets64_i32)
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_base_offsets32_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i32, __scatter_factored_base_offsets32_i64)
SCATTER_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_base_offsets64_i64) SCATTER_FACTORED_BASE_OFFSETS(__vec16_i64, int64_t, __vec16_i64, __scatter_factored_base_offsets64_i64)
static FORCEINLINE void static FORCEINLINE void
__scatter_base_offsets32_i32(uint8_t *b, __vec16_i32 varyingOffset, __scatter_factored_base_offsets32_i32(uint8_t *b, __vec16_i32 varyingOffset,
uint32_t scale, __vec16_i32 constOffset, uint32_t scale, __vec16_i32 constOffset,
__vec16_i32 val, __vec16_i1 mask) __vec16_i32 val, __vec16_i1 mask)
{ {
@@ -2072,7 +2072,7 @@ __scatter_base_offsets32_i32(uint8_t *b, __vec16_i32 varyingOffset,
} }
static FORCEINLINE void static FORCEINLINE void
__scatter_base_offsets32_float(void *base, const __vec16_i32 &varyingOffset, __scatter_factored_base_offsets32_float(void *base, const __vec16_i32 &varyingOffset,
uint32_t scale, const __vec16_i32 &constOffset, uint32_t scale, const __vec16_i32 &constOffset,
const __vec16_f &val, const __vec16_i1 mask) const __vec16_f &val, const __vec16_i1 mask)
{ {

View File

@@ -3007,84 +3007,84 @@ lGatherBaseOffsets64(RetVec, RetScalar, unsigned char *p, __vec4_i64 offsets,
} }
static FORCEINLINE __vec4_i8 static FORCEINLINE __vec4_i8
__gather_base_offsets32_i8(unsigned char *b, __vec4_i32 offsets, __gather_factored_base_offsets32_i8(unsigned char *b, __vec4_i32 offsets,
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets32(__vec4_i8(), uint8_t(), b, offsets, scale, return lGatherBaseOffsets32(__vec4_i8(), uint8_t(), b, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i8 static FORCEINLINE __vec4_i8
__gather_base_offsets64_i8(unsigned char *b, __vec4_i64 offsets, __gather_factored_base_offsets64_i8(unsigned char *b, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets64(__vec4_i8(), uint8_t(), b, offsets, scale, return lGatherBaseOffsets64(__vec4_i8(), uint8_t(), b, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i16 static FORCEINLINE __vec4_i16
__gather_base_offsets32_i16(unsigned char *b, __vec4_i32 offsets, __gather_factored_base_offsets32_i16(unsigned char *b, __vec4_i32 offsets,
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets32(__vec4_i16(), uint16_t(), b, offsets, scale, return lGatherBaseOffsets32(__vec4_i16(), uint16_t(), b, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i16 static FORCEINLINE __vec4_i16
__gather_base_offsets64_i16(unsigned char *b, __vec4_i64 offsets, __gather_factored_base_offsets64_i16(unsigned char *b, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets64(__vec4_i16(), uint16_t(), b, offsets, scale, return lGatherBaseOffsets64(__vec4_i16(), uint16_t(), b, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i32 static FORCEINLINE __vec4_i32
__gather_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale, __gather_factored_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
__vec4_i32 constOffset, __vec4_i1 mask) { __vec4_i32 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets32(__vec4_i32(), uint32_t(), p, offsets, scale, return lGatherBaseOffsets32(__vec4_i32(), uint32_t(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i32 static FORCEINLINE __vec4_i32
__gather_base_offsets64_i32(unsigned char *p, __vec4_i64 offsets, __gather_factored_base_offsets64_i32(unsigned char *p, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets64(__vec4_i32(), uint32_t(), p, offsets, scale, return lGatherBaseOffsets64(__vec4_i32(), uint32_t(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_f static FORCEINLINE __vec4_f
__gather_base_offsets32_float(uint8_t *p, __vec4_i32 offsets, uint32_t scale, __gather_factored_base_offsets32_float(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
__vec4_i32 constOffset, __vec4_i1 mask) { __vec4_i32 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets32(__vec4_f(), float(), p, offsets, scale, return lGatherBaseOffsets32(__vec4_f(), float(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_f static FORCEINLINE __vec4_f
__gather_base_offsets64_float(unsigned char *p, __vec4_i64 offsets, __gather_factored_base_offsets64_float(unsigned char *p, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets64(__vec4_f(), float(), p, offsets, scale, return lGatherBaseOffsets64(__vec4_f(), float(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i64 static FORCEINLINE __vec4_i64
__gather_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets, __gather_factored_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets32(__vec4_i64(), uint64_t(), p, offsets, scale, return lGatherBaseOffsets32(__vec4_i64(), uint64_t(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_i64 static FORCEINLINE __vec4_i64
__gather_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets, __gather_factored_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets64(__vec4_i64(), uint64_t(), p, offsets, scale, return lGatherBaseOffsets64(__vec4_i64(), uint64_t(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_d static FORCEINLINE __vec4_d
__gather_base_offsets32_double(unsigned char *p, __vec4_i32 offsets, __gather_factored_base_offsets32_double(unsigned char *p, __vec4_i32 offsets,
uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i32 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets32(__vec4_d(), double(), p, offsets, scale, return lGatherBaseOffsets32(__vec4_d(), double(), p, offsets, scale,
constOffset, mask); constOffset, mask);
} }
static FORCEINLINE __vec4_d static FORCEINLINE __vec4_d
__gather_base_offsets64_double(unsigned char *p, __vec4_i64 offsets, __gather_factored_base_offsets64_double(unsigned char *p, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) { uint32_t scale, __vec4_i64 constOffset, __vec4_i1 mask) {
return lGatherBaseOffsets64(__vec4_d(), double(), p, offsets, scale, return lGatherBaseOffsets64(__vec4_d(), double(), p, offsets, scale,
constOffset, mask); constOffset, mask);
@@ -3252,7 +3252,7 @@ static FORCEINLINE __vec4_d __gather64_double(__vec4_i64 ptrs, __vec4_i1 mask) {
#define SCATTER32_64(SUFFIX, VEC_SUFFIX, TYPE, EXTRACT) \ #define SCATTER32_64(SUFFIX, VEC_SUFFIX, TYPE, EXTRACT) \
static FORCEINLINE void \ static FORCEINLINE void \
__scatter_base_offsets32_##SUFFIX (unsigned char *b, __vec4_i32 offsets, \ __scatter_factored_base_offsets32_##SUFFIX (unsigned char *b, __vec4_i32 offsets, \
uint32_t scale, __vec4_i32 constOffset, \ uint32_t scale, __vec4_i32 constOffset, \
__vec4_##VEC_SUFFIX val, __vec4_i1 mask) { \ __vec4_##VEC_SUFFIX val, __vec4_i1 mask) { \
uint32_t m = _mm_extract_ps(mask.v, 0); \ uint32_t m = _mm_extract_ps(mask.v, 0); \
@@ -3281,7 +3281,7 @@ __scatter_base_offsets32_##SUFFIX (unsigned char *b, __vec4_i32 offsets, \
} \ } \
} \ } \
static FORCEINLINE void \ static FORCEINLINE void \
__scatter_base_offsets64_##SUFFIX(unsigned char *p, __vec4_i64 offsets, \ __scatter_factored_base_offsets64_##SUFFIX(unsigned char *p, __vec4_i64 offsets, \
uint32_t scale, __vec4_i64 constOffset, \ uint32_t scale, __vec4_i64 constOffset, \
__vec4_##VEC_SUFFIX val, __vec4_i1 mask) { \ __vec4_##VEC_SUFFIX val, __vec4_i1 mask) { \
uint32_t m = _mm_extract_ps(mask.v, 0); \ uint32_t m = _mm_extract_ps(mask.v, 0); \
@@ -3322,7 +3322,7 @@ SCATTER32_64(float, f, float, _mm_extract_ps_as_float)
static FORCEINLINE void static FORCEINLINE void
__scatter_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets, __scatter_factored_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
uint32_t scale, __vec4_i32 constOffset, __vec4_i64 val, uint32_t scale, __vec4_i32 constOffset, __vec4_i64 val,
__vec4_i1 mask) { __vec4_i1 mask) {
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
@@ -3359,7 +3359,7 @@ __scatter_base_offsets32_i64(unsigned char *p, __vec4_i32 offsets,
} }
static FORCEINLINE void static FORCEINLINE void
__scatter_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets, __scatter_factored_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, uint32_t scale, __vec4_i64 constOffset,
__vec4_i64 val, __vec4_i1 mask) { __vec4_i64 val, __vec4_i1 mask) {
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
@@ -3396,17 +3396,17 @@ __scatter_base_offsets64_i64(unsigned char *p, __vec4_i64 offsets,
} }
static FORCEINLINE void static FORCEINLINE void
__scatter_base_offsets32_double(unsigned char *p, __vec4_i32 offsets, __scatter_factored_base_offsets32_double(unsigned char *p, __vec4_i32 offsets,
uint32_t scale, __vec4_i32 constOffset, __vec4_d val, uint32_t scale, __vec4_i32 constOffset, __vec4_d val,
__vec4_i1 mask) { __vec4_i1 mask) {
__scatter_base_offsets32_i64(p, offsets, scale, constOffset, val, mask); __scatter_factored_base_offsets32_i64(p, offsets, scale, constOffset, val, mask);
} }
static FORCEINLINE void static FORCEINLINE void
__scatter_base_offsets64_double(unsigned char *p, __vec4_i64 offsets, __scatter_factored_base_offsets64_double(unsigned char *p, __vec4_i64 offsets,
uint32_t scale, __vec4_i64 constOffset, __vec4_d val, uint32_t scale, __vec4_i64 constOffset, __vec4_d val,
__vec4_i1 mask) { __vec4_i1 mask) {
__scatter_base_offsets64_i64(p, offsets, scale, constOffset, val, mask); __scatter_factored_base_offsets64_i64(p, offsets, scale, constOffset, val, mask);
} }

322
opt.cpp
View File

@@ -1689,57 +1689,57 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
}; };
GSInfo gsFuncs[] = { GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8", GSInfo("__pseudo_gather32_i8", "__pseudo_gather_factored_base_offsets32_i8",
"__pseudo_gather_base_offsets32_i8", true), "__pseudo_gather_factored_base_offsets32_i8", true),
GSInfo("__pseudo_gather32_i16", "__pseudo_gather_base_offsets32_i16", GSInfo("__pseudo_gather32_i16", "__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_base_offsets32_i16", true), "__pseudo_gather_factored_base_offsets32_i16", true),
GSInfo("__pseudo_gather32_i32", "__pseudo_gather_base_offsets32_i32", GSInfo("__pseudo_gather32_i32", "__pseudo_gather_factored_base_offsets32_i32",
"__pseudo_gather_base_offsets32_i32", true), "__pseudo_gather_factored_base_offsets32_i32", true),
GSInfo("__pseudo_gather32_float", "__pseudo_gather_base_offsets32_float", GSInfo("__pseudo_gather32_float", "__pseudo_gather_factored_base_offsets32_float",
"__pseudo_gather_base_offsets32_float", true), "__pseudo_gather_factored_base_offsets32_float", true),
GSInfo("__pseudo_gather32_i64", "__pseudo_gather_base_offsets32_i64", GSInfo("__pseudo_gather32_i64", "__pseudo_gather_factored_base_offsets32_i64",
"__pseudo_gather_base_offsets32_i64", true), "__pseudo_gather_factored_base_offsets32_i64", true),
GSInfo("__pseudo_gather32_double", "__pseudo_gather_base_offsets32_double", GSInfo("__pseudo_gather32_double", "__pseudo_gather_factored_base_offsets32_double",
"__pseudo_gather_base_offsets32_double", true), "__pseudo_gather_factored_base_offsets32_double", true),
GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_base_offsets32_i8", GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_scatter_base_offsets32_i8", false), "__pseudo_scatter_factored_base_offsets32_i8", false),
GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_base_offsets32_i16", GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_base_offsets32_i16", false), "__pseudo_scatter_factored_base_offsets32_i16", false),
GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_base_offsets32_i32", GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
"__pseudo_scatter_base_offsets32_i32", false), "__pseudo_scatter_factored_base_offsets32_i32", false),
GSInfo("__pseudo_scatter32_float", "__pseudo_scatter_base_offsets32_float", GSInfo("__pseudo_scatter32_float", "__pseudo_scatter_factored_base_offsets32_float",
"__pseudo_scatter_base_offsets32_float", false), "__pseudo_scatter_factored_base_offsets32_float", false),
GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_base_offsets32_i64", GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
"__pseudo_scatter_base_offsets32_i64", false), "__pseudo_scatter_factored_base_offsets32_i64", false),
GSInfo("__pseudo_scatter32_double", "__pseudo_scatter_base_offsets32_double", GSInfo("__pseudo_scatter32_double", "__pseudo_scatter_factored_base_offsets32_double",
"__pseudo_scatter_base_offsets32_double", false), "__pseudo_scatter_factored_base_offsets32_double", false),
GSInfo("__pseudo_gather64_i8", "__pseudo_gather_base_offsets64_i8", GSInfo("__pseudo_gather64_i8", "__pseudo_gather_factored_base_offsets64_i8",
"__pseudo_gather_base_offsets32_i8", true), "__pseudo_gather_factored_base_offsets32_i8", true),
GSInfo("__pseudo_gather64_i16", "__pseudo_gather_base_offsets64_i16", GSInfo("__pseudo_gather64_i16", "__pseudo_gather_factored_base_offsets64_i16",
"__pseudo_gather_base_offsets32_i16", true), "__pseudo_gather_factored_base_offsets32_i16", true),
GSInfo("__pseudo_gather64_i32", "__pseudo_gather_base_offsets64_i32", GSInfo("__pseudo_gather64_i32", "__pseudo_gather_factored_base_offsets64_i32",
"__pseudo_gather_base_offsets32_i32", true), "__pseudo_gather_factored_base_offsets32_i32", true),
GSInfo("__pseudo_gather64_float", "__pseudo_gather_base_offsets64_float", GSInfo("__pseudo_gather64_float", "__pseudo_gather_factored_base_offsets64_float",
"__pseudo_gather_base_offsets32_float", true), "__pseudo_gather_factored_base_offsets32_float", true),
GSInfo("__pseudo_gather64_i64", "__pseudo_gather_base_offsets64_i64", GSInfo("__pseudo_gather64_i64", "__pseudo_gather_factored_base_offsets64_i64",
"__pseudo_gather_base_offsets32_i64", true), "__pseudo_gather_factored_base_offsets32_i64", true),
GSInfo("__pseudo_gather64_double", "__pseudo_gather_base_offsets64_double", GSInfo("__pseudo_gather64_double", "__pseudo_gather_factored_base_offsets64_double",
"__pseudo_gather_base_offsets32_double", true), "__pseudo_gather_factored_base_offsets32_double", true),
GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_base_offsets64_i8", GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_factored_base_offsets64_i8",
"__pseudo_scatter_base_offsets32_i8", false), "__pseudo_scatter_factored_base_offsets32_i8", false),
GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_base_offsets64_i16", GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_factored_base_offsets64_i16",
"__pseudo_scatter_base_offsets32_i16", false), "__pseudo_scatter_factored_base_offsets32_i16", false),
GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_base_offsets64_i32", GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_factored_base_offsets64_i32",
"__pseudo_scatter_base_offsets32_i32", false), "__pseudo_scatter_factored_base_offsets32_i32", false),
GSInfo("__pseudo_scatter64_float", "__pseudo_scatter_base_offsets64_float", GSInfo("__pseudo_scatter64_float", "__pseudo_scatter_factored_base_offsets64_float",
"__pseudo_scatter_base_offsets32_float", false), "__pseudo_scatter_factored_base_offsets32_float", false),
GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_base_offsets64_i64", GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_factored_base_offsets64_i64",
"__pseudo_scatter_base_offsets32_i64", false), "__pseudo_scatter_factored_base_offsets32_i64", false),
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_base_offsets64_double", GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_factored_base_offsets64_double",
"__pseudo_scatter_base_offsets32_double", false), "__pseudo_scatter_factored_base_offsets32_double", false),
}; };
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
@@ -1858,57 +1858,57 @@ lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
}; };
GSBOInfo gsFuncs[] = { GSBOInfo gsFuncs[] = {
GSBOInfo("__pseudo_gather_base_offsets32_i8", GSBOInfo("__pseudo_gather_factored_base_offsets32_i8",
"__pseudo_gather_base_offsets32_i8", true), "__pseudo_gather_factored_base_offsets32_i8", true),
GSBOInfo("__pseudo_gather_base_offsets32_i16", GSBOInfo("__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_base_offsets32_i16", true), "__pseudo_gather_factored_base_offsets32_i16", true),
GSBOInfo("__pseudo_gather_base_offsets32_i32", GSBOInfo("__pseudo_gather_factored_base_offsets32_i32",
"__pseudo_gather_base_offsets32_i32", true), "__pseudo_gather_factored_base_offsets32_i32", true),
GSBOInfo("__pseudo_gather_base_offsets32_float", GSBOInfo("__pseudo_gather_factored_base_offsets32_float",
"__pseudo_gather_base_offsets32_float", true), "__pseudo_gather_factored_base_offsets32_float", true),
GSBOInfo("__pseudo_gather_base_offsets32_i64", GSBOInfo("__pseudo_gather_factored_base_offsets32_i64",
"__pseudo_gather_base_offsets32_i64", true), "__pseudo_gather_factored_base_offsets32_i64", true),
GSBOInfo("__pseudo_gather_base_offsets32_double", GSBOInfo("__pseudo_gather_factored_base_offsets32_double",
"__pseudo_gather_base_offsets32_double", true), "__pseudo_gather_factored_base_offsets32_double", true),
GSBOInfo( "__pseudo_scatter_base_offsets32_i8", GSBOInfo( "__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_scatter_base_offsets32_i8", false), "__pseudo_scatter_factored_base_offsets32_i8", false),
GSBOInfo("__pseudo_scatter_base_offsets32_i16", GSBOInfo("__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_base_offsets32_i16", false), "__pseudo_scatter_factored_base_offsets32_i16", false),
GSBOInfo("__pseudo_scatter_base_offsets32_i32", GSBOInfo("__pseudo_scatter_factored_base_offsets32_i32",
"__pseudo_scatter_base_offsets32_i32", false), "__pseudo_scatter_factored_base_offsets32_i32", false),
GSBOInfo("__pseudo_scatter_base_offsets32_float", GSBOInfo("__pseudo_scatter_factored_base_offsets32_float",
"__pseudo_scatter_base_offsets32_float", false), "__pseudo_scatter_factored_base_offsets32_float", false),
GSBOInfo("__pseudo_scatter_base_offsets32_i64", GSBOInfo("__pseudo_scatter_factored_base_offsets32_i64",
"__pseudo_scatter_base_offsets32_i64", false), "__pseudo_scatter_factored_base_offsets32_i64", false),
GSBOInfo("__pseudo_scatter_base_offsets32_double", GSBOInfo("__pseudo_scatter_factored_base_offsets32_double",
"__pseudo_scatter_base_offsets32_double", false), "__pseudo_scatter_factored_base_offsets32_double", false),
GSBOInfo( "__pseudo_gather_base_offsets64_i8", GSBOInfo( "__pseudo_gather_factored_base_offsets64_i8",
"__pseudo_gather_base_offsets32_i8", true), "__pseudo_gather_factored_base_offsets32_i8", true),
GSBOInfo("__pseudo_gather_base_offsets64_i16", GSBOInfo("__pseudo_gather_factored_base_offsets64_i16",
"__pseudo_gather_base_offsets32_i16", true), "__pseudo_gather_factored_base_offsets32_i16", true),
GSBOInfo("__pseudo_gather_base_offsets64_i32", GSBOInfo("__pseudo_gather_factored_base_offsets64_i32",
"__pseudo_gather_base_offsets32_i32", true), "__pseudo_gather_factored_base_offsets32_i32", true),
GSBOInfo("__pseudo_gather_base_offsets64_float", GSBOInfo("__pseudo_gather_factored_base_offsets64_float",
"__pseudo_gather_base_offsets32_float", true), "__pseudo_gather_factored_base_offsets32_float", true),
GSBOInfo("__pseudo_gather_base_offsets64_i64", GSBOInfo("__pseudo_gather_factored_base_offsets64_i64",
"__pseudo_gather_base_offsets32_i64", true), "__pseudo_gather_factored_base_offsets32_i64", true),
GSBOInfo("__pseudo_gather_base_offsets64_double", GSBOInfo("__pseudo_gather_factored_base_offsets64_double",
"__pseudo_gather_base_offsets32_double", true), "__pseudo_gather_factored_base_offsets32_double", true),
GSBOInfo( "__pseudo_scatter_base_offsets64_i8", GSBOInfo( "__pseudo_scatter_factored_base_offsets64_i8",
"__pseudo_scatter_base_offsets32_i8", false), "__pseudo_scatter_factored_base_offsets32_i8", false),
GSBOInfo("__pseudo_scatter_base_offsets64_i16", GSBOInfo("__pseudo_scatter_factored_base_offsets64_i16",
"__pseudo_scatter_base_offsets32_i16", false), "__pseudo_scatter_factored_base_offsets32_i16", false),
GSBOInfo("__pseudo_scatter_base_offsets64_i32", GSBOInfo("__pseudo_scatter_factored_base_offsets64_i32",
"__pseudo_scatter_base_offsets32_i32", false), "__pseudo_scatter_factored_base_offsets32_i32", false),
GSBOInfo("__pseudo_scatter_base_offsets64_float", GSBOInfo("__pseudo_scatter_factored_base_offsets64_float",
"__pseudo_scatter_base_offsets32_float", false), "__pseudo_scatter_factored_base_offsets32_float", false),
GSBOInfo("__pseudo_scatter_base_offsets64_i64", GSBOInfo("__pseudo_scatter_factored_base_offsets64_i64",
"__pseudo_scatter_base_offsets32_i64", false), "__pseudo_scatter_factored_base_offsets32_i64", false),
GSBOInfo("__pseudo_scatter_base_offsets64_double", GSBOInfo("__pseudo_scatter_factored_base_offsets64_double",
"__pseudo_scatter_base_offsets32_double", false), "__pseudo_scatter_factored_base_offsets32_double", false),
}; };
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
@@ -2025,29 +2025,29 @@ lGSToLoadStore(llvm::CallInst *callInst) {
}; };
GatherImpInfo gInfo[] = { GatherImpInfo gInfo[] = {
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__masked_load_i8", GatherImpInfo("__pseudo_gather_factored_base_offsets32_i8", "__masked_load_i8",
LLVMTypes::Int8Type, 1), LLVMTypes::Int8Type, 1),
GatherImpInfo("__pseudo_gather_base_offsets32_i16", "__masked_load_i16", GatherImpInfo("__pseudo_gather_factored_base_offsets32_i16", "__masked_load_i16",
LLVMTypes::Int16Type, 2), LLVMTypes::Int16Type, 2),
GatherImpInfo("__pseudo_gather_base_offsets32_i32", "__masked_load_i32", GatherImpInfo("__pseudo_gather_factored_base_offsets32_i32", "__masked_load_i32",
LLVMTypes::Int32Type, 4), LLVMTypes::Int32Type, 4),
GatherImpInfo("__pseudo_gather_base_offsets32_float", "__masked_load_float", GatherImpInfo("__pseudo_gather_factored_base_offsets32_float", "__masked_load_float",
LLVMTypes::FloatType, 4), LLVMTypes::FloatType, 4),
GatherImpInfo("__pseudo_gather_base_offsets32_i64", "__masked_load_i64", GatherImpInfo("__pseudo_gather_factored_base_offsets32_i64", "__masked_load_i64",
LLVMTypes::Int64Type, 8), LLVMTypes::Int64Type, 8),
GatherImpInfo("__pseudo_gather_base_offsets32_double", "__masked_load_double", GatherImpInfo("__pseudo_gather_factored_base_offsets32_double", "__masked_load_double",
LLVMTypes::DoubleType, 8), LLVMTypes::DoubleType, 8),
GatherImpInfo("__pseudo_gather_base_offsets64_i8", "__masked_load_i8", GatherImpInfo("__pseudo_gather_factored_base_offsets64_i8", "__masked_load_i8",
LLVMTypes::Int8Type, 1), LLVMTypes::Int8Type, 1),
GatherImpInfo("__pseudo_gather_base_offsets64_i16", "__masked_load_i16", GatherImpInfo("__pseudo_gather_factored_base_offsets64_i16", "__masked_load_i16",
LLVMTypes::Int16Type, 2), LLVMTypes::Int16Type, 2),
GatherImpInfo("__pseudo_gather_base_offsets64_i32", "__masked_load_i32", GatherImpInfo("__pseudo_gather_factored_base_offsets64_i32", "__masked_load_i32",
LLVMTypes::Int32Type, 4), LLVMTypes::Int32Type, 4),
GatherImpInfo("__pseudo_gather_base_offsets64_float", "__masked_load_float", GatherImpInfo("__pseudo_gather_factored_base_offsets64_float", "__masked_load_float",
LLVMTypes::FloatType, 4), LLVMTypes::FloatType, 4),
GatherImpInfo("__pseudo_gather_base_offsets64_i64", "__masked_load_i64", GatherImpInfo("__pseudo_gather_factored_base_offsets64_i64", "__masked_load_i64",
LLVMTypes::Int64Type, 8), LLVMTypes::Int64Type, 8),
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__masked_load_double", GatherImpInfo("__pseudo_gather_factored_base_offsets64_double", "__masked_load_double",
LLVMTypes::DoubleType, 8) LLVMTypes::DoubleType, 8)
}; };
@@ -2067,29 +2067,29 @@ lGSToLoadStore(llvm::CallInst *callInst) {
}; };
ScatterImpInfo sInfo[] = { ScatterImpInfo sInfo[] = {
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8", ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_masked_store_i8",
LLVMTypes::Int8VectorPointerType, 1), LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo("__pseudo_scatter_base_offsets32_i16", "__pseudo_masked_store_i16", ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_masked_store_i16",
LLVMTypes::Int16VectorPointerType, 2), LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo("__pseudo_scatter_base_offsets32_i32", "__pseudo_masked_store_i32", ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_masked_store_i32",
LLVMTypes::Int32VectorPointerType, 4), LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets32_float", "__pseudo_masked_store_float", ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_float", "__pseudo_masked_store_float",
LLVMTypes::FloatVectorPointerType, 4), LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets32_i64", "__pseudo_masked_store_i64", ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_masked_store_i64",
LLVMTypes::Int64VectorPointerType, 8), LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo("__pseudo_scatter_base_offsets32_double", "__pseudo_masked_store_double", ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_double", "__pseudo_masked_store_double",
LLVMTypes::DoubleVectorPointerType, 8), LLVMTypes::DoubleVectorPointerType, 8),
ScatterImpInfo("__pseudo_scatter_base_offsets64_i8", "__pseudo_masked_store_i8", ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i8", "__pseudo_masked_store_i8",
LLVMTypes::Int8VectorPointerType, 1), LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo("__pseudo_scatter_base_offsets64_i16", "__pseudo_masked_store_i16", ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i16", "__pseudo_masked_store_i16",
LLVMTypes::Int16VectorPointerType, 2), LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo("__pseudo_scatter_base_offsets64_i32", "__pseudo_masked_store_i32", ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i32", "__pseudo_masked_store_i32",
LLVMTypes::Int32VectorPointerType, 4), LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets64_float", "__pseudo_masked_store_float", ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_float", "__pseudo_masked_store_float",
LLVMTypes::FloatVectorPointerType, 4), LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets64_i64", "__pseudo_masked_store_i64", ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i64", "__pseudo_masked_store_i64",
LLVMTypes::Int64VectorPointerType, 8), LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo("__pseudo_scatter_base_offsets64_double", "__pseudo_masked_store_double", ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_double", "__pseudo_masked_store_double",
LLVMTypes::DoubleVectorPointerType, 8) LLVMTypes::DoubleVectorPointerType, 8)
}; };
@@ -3354,10 +3354,10 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("GatherCoalescePass"); DEBUG_START_PASS("GatherCoalescePass");
llvm::Function *gatherFuncs[] = { llvm::Function *gatherFuncs[] = {
m->module->getFunction("__pseudo_gather_base_offsets32_i32"), m->module->getFunction("__pseudo_gather_factored_base_offsets32_i32"),
m->module->getFunction("__pseudo_gather_base_offsets32_float"), m->module->getFunction("__pseudo_gather_factored_base_offsets32_float"),
m->module->getFunction("__pseudo_gather_base_offsets64_i32"), m->module->getFunction("__pseudo_gather_factored_base_offsets64_i32"),
m->module->getFunction("__pseudo_gather_base_offsets64_float"), m->module->getFunction("__pseudo_gather_factored_base_offsets64_float"),
}; };
int nGatherFuncs = sizeof(gatherFuncs) / sizeof(gatherFuncs[0]); int nGatherFuncs = sizeof(gatherFuncs) / sizeof(gatherFuncs[0]);
@@ -3367,7 +3367,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e;
++iter) { ++iter) {
// Iterate over all of the instructions and look for calls to // Iterate over all of the instructions and look for calls to
// __pseudo_gather_base_offsets{32,64}_{i32,float} calls. // __pseudo_gather_factored_base_offsets{32,64}_{i32,float} calls.
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter); llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (callInst == NULL) if (callInst == NULL)
continue; continue;
@@ -3639,19 +3639,19 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
}; };
LowerGSInfo lgsInfo[] = { LowerGSInfo lgsInfo[] = {
LowerGSInfo("__pseudo_gather_base_offsets32_i8", "__gather_base_offsets32_i8", true), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i8", "__gather_factored_base_offsets32_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i16", "__gather_base_offsets32_i16", true), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i16", "__gather_factored_base_offsets32_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i32", "__gather_base_offsets32_i32", true), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i32", "__gather_factored_base_offsets32_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets32_float", "__gather_base_offsets32_float", true), LowerGSInfo("__pseudo_gather_factored_base_offsets32_float", "__gather_factored_base_offsets32_float", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i64", "__gather_base_offsets32_i64", true), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i64", "__gather_factored_base_offsets32_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets32_double", "__gather_base_offsets32_double", true), LowerGSInfo("__pseudo_gather_factored_base_offsets32_double", "__gather_factored_base_offsets32_double", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i8", "__gather_base_offsets64_i8", true), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i16", "__gather_base_offsets64_i16", true), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i16", "__gather_factored_base_offsets64_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i32", "__gather_base_offsets64_i32", true), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets64_float", "__gather_base_offsets64_float", true), LowerGSInfo("__pseudo_gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_float", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i64", "__gather_base_offsets64_i64", true), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i64", "__gather_factored_base_offsets64_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets64_double", "__gather_base_offsets64_double", true), LowerGSInfo("__pseudo_gather_factored_base_offsets64_double", "__gather_factored_base_offsets64_double", true),
LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true), LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true),
LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true), LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true),
@@ -3667,19 +3667,19 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true), LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true),
LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true), LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true),
LowerGSInfo("__pseudo_scatter_base_offsets32_i8", "__scatter_base_offsets32_i8", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i8", "__scatter_factored_base_offsets32_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i16", "__scatter_base_offsets32_i16", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i16", "__scatter_factored_base_offsets32_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i32", "__scatter_base_offsets32_i32", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i32", "__scatter_factored_base_offsets32_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_float", "__scatter_base_offsets32_float", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets32_float", "__scatter_factored_base_offsets32_float", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i64", "__scatter_base_offsets32_i64", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i64", "__scatter_factored_base_offsets32_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_double", "__scatter_base_offsets32_double", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets32_double", "__scatter_factored_base_offsets32_double", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i8", "__scatter_base_offsets64_i8", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i16", "__scatter_base_offsets64_i16", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i16", "__scatter_factored_base_offsets64_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i32", "__scatter_base_offsets64_i32", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_float", "__scatter_base_offsets64_float", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_float", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i64", "__scatter_base_offsets64_i64", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i64", "__scatter_factored_base_offsets64_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_double", "__scatter_base_offsets64_double", false), LowerGSInfo("__pseudo_scatter_factored_base_offsets64_double", "__scatter_factored_base_offsets64_double", false),
LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false), LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false),
LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false), LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false),
@@ -3899,12 +3899,12 @@ bool
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) { MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
const char *names[] = { const char *names[] = {
"__fast_masked_vload", "__fast_masked_vload",
"__gather_base_offsets32_i8", "__gather_base_offsets32_i16", "__gather_factored_base_offsets32_i8", "__gather_factored_base_offsets32_i16",
"__gather_base_offsets32_i32", "__gather_base_offsets32_i64", "__gather_factored_base_offsets32_i32", "__gather_factored_base_offsets32_i64",
"__gather_base_offsets32_float", "__gather_base_offsets32_double", "__gather_factored_base_offsets32_float", "__gather_factored_base_offsets32_double",
"__gather_base_offsets64_i8", "__gather_base_offsets64_i16", "__gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i16",
"__gather_base_offsets64_i32", "__gather_base_offsets64_i64", "__gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i64",
"__gather_base_offsets64_float", "__gather_base_offsets64_double", "__gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_double",
"__gather32_i8", "__gather32_i16", "__gather32_i8", "__gather32_i16",
"__gather32_i32", "__gather32_i64", "__gather32_i32", "__gather32_i64",
"__gather32_float", "__gather32_double", "__gather32_float", "__gather32_double",
@@ -3926,12 +3926,12 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
"__masked_store_blend_i8", "__masked_store_blend_i16", "__masked_store_blend_i8", "__masked_store_blend_i16",
"__masked_store_blend_i32", "__masked_store_blend_i64", "__masked_store_blend_i32", "__masked_store_blend_i64",
"__masked_store_blend_float", "__masked_store_blend_double", "__masked_store_blend_float", "__masked_store_blend_double",
"__scatter_base_offsets32_i8", "__scatter_base_offsets32_i16", "__scatter_factored_base_offsets32_i8", "__scatter_factored_base_offsets32_i16",
"__scatter_base_offsets32_i32", "__scatter_base_offsets32_i64", "__scatter_factored_base_offsets32_i32", "__scatter_factored_base_offsets32_i64",
"__scatter_base_offsets32_float", "__scatter_base_offsets32_double", "__scatter_factored_base_offsets32_float", "__scatter_factored_base_offsets32_double",
"__scatter_base_offsets64_i8", "__scatter_base_offsets64_i16", "__scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i16",
"__scatter_base_offsets64_i32", "__scatter_base_offsets64_i64", "__scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i64",
"__scatter_base_offsets64_float", "__scatter_base_offsets64_double", "__scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_double",
"__scatter_elt32_i8", "__scatter_elt32_i16", "__scatter_elt32_i8", "__scatter_elt32_i16",
"__scatter_elt32_i32", "__scatter_elt32_i64", "__scatter_elt32_i32", "__scatter_elt32_i64",
"__scatter_elt32_float", "__scatter_elt32_double", "__scatter_elt32_float", "__scatter_elt32_double",