Gather/scatter function improvements in builtins.
More naming consistency: _i32 rather than i32, now. Also improved the m4 macros to generate these sequences to not require as many parameters.
This commit is contained in:
@@ -598,10 +598,10 @@ define void @__masked_store_blend_i64(<16 x i64>* nocapture %ptr, <16 x i64> %ne
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; scatter
|
;; scatter
|
||||||
|
|
||||||
gen_scatter(16, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(16, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(16, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(16, i64)
|
gen_scatter(i64)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; double precision sqrt
|
;; double precision sqrt
|
||||||
|
|||||||
@@ -492,10 +492,10 @@ define void @__masked_store_blend_i64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; scatter
|
;; scatter
|
||||||
|
|
||||||
gen_scatter(8, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(8, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(8, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(8, i64)
|
gen_scatter(i64)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; double precision sqrt
|
;; double precision sqrt
|
||||||
|
|||||||
@@ -69,9 +69,7 @@ declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind read
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
gen_gather(16, i8)
|
gen_gather(i8)
|
||||||
gen_gather(16, i16)
|
gen_gather(i16)
|
||||||
gen_gather(16, i32)
|
gen_gather(i32)
|
||||||
gen_gather(16, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind read
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
gen_gather(8, i8)
|
gen_gather(i8)
|
||||||
gen_gather(8, i16)
|
gen_gather(i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(i64)
|
||||||
|
|||||||
@@ -121,9 +121,7 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
gen_gather(16, i8)
|
gen_gather(i8)
|
||||||
gen_gather(16, i16)
|
gen_gather(i16)
|
||||||
gen_gather(16, i32)
|
gen_gather(i32)
|
||||||
gen_gather(16, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
gen_gather(8, i8)
|
gen_gather(i8)
|
||||||
gen_gather(8, i16)
|
gen_gather(i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(i64)
|
||||||
|
|||||||
@@ -36,15 +36,15 @@ masked_load(i64, 8)
|
|||||||
|
|
||||||
; define these with the macros from stdlib.m4
|
; define these with the macros from stdlib.m4
|
||||||
|
|
||||||
gen_gather(1, i8)
|
gen_gather(i8)
|
||||||
gen_gather(1, i16)
|
gen_gather(i16)
|
||||||
gen_gather(1, i32)
|
gen_gather(i32)
|
||||||
gen_gather(1, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
gen_scatter(1, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(1, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(1, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(1, i64)
|
gen_scatter(i64)
|
||||||
|
|
||||||
|
|
||||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||||
|
|||||||
@@ -446,15 +446,15 @@ masked_load(i64, 8)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather/scatter
|
;; gather/scatter
|
||||||
|
|
||||||
gen_gather(8, i8)
|
gen_gather(i8)
|
||||||
gen_gather(8, i16)
|
gen_gather(i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
gen_scatter(8, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(8, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(8, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(8, i64)
|
gen_scatter(i64)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; float rounding
|
;; float rounding
|
||||||
|
|||||||
@@ -575,12 +575,12 @@ masked_load(i64, 8)
|
|||||||
|
|
||||||
; define these with the macros from stdlib.m4
|
; define these with the macros from stdlib.m4
|
||||||
|
|
||||||
gen_gather(4, i8)
|
gen_gather(i8)
|
||||||
gen_gather(4, i16)
|
gen_gather(i16)
|
||||||
gen_gather(4, i32)
|
gen_gather(i32)
|
||||||
gen_gather(4, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
gen_scatter(4, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(4, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(4, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(4, i64)
|
gen_scatter(i64)
|
||||||
|
|||||||
@@ -372,15 +372,15 @@ masked_load(i64, 8)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather/scatter
|
;; gather/scatter
|
||||||
|
|
||||||
gen_gather(8, i8)
|
gen_gather(i8)
|
||||||
gen_gather(8, i16)
|
gen_gather(i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
gen_scatter(8, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(8, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(8, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(8, i64)
|
gen_scatter(i64)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; float rounding
|
;; float rounding
|
||||||
|
|||||||
@@ -474,12 +474,12 @@ masked_load(i64, 8)
|
|||||||
|
|
||||||
; define these with the macros from stdlib.m4
|
; define these with the macros from stdlib.m4
|
||||||
|
|
||||||
gen_gather(4, i8)
|
gen_gather(i8)
|
||||||
gen_gather(4, i16)
|
gen_gather(i16)
|
||||||
gen_gather(4, i32)
|
gen_gather(i32)
|
||||||
gen_gather(4, i64)
|
gen_gather(i64)
|
||||||
|
|
||||||
gen_scatter(4, i8)
|
gen_scatter(i8)
|
||||||
gen_scatter(4, i16)
|
gen_scatter(i16)
|
||||||
gen_scatter(4, i32)
|
gen_scatter(i32)
|
||||||
gen_scatter(4, i64)
|
gen_scatter(i64)
|
||||||
|
|||||||
392
builtins/util.m4
392
builtins/util.m4
@@ -1567,10 +1567,10 @@ declare void @__pseudo_masked_store_i64(<WIDTH x i64> * nocapture, <WIDTH x i64>
|
|||||||
; to perform a gather, it generates a call to one of these functions,
|
; to perform a gather, it generates a call to one of these functions,
|
||||||
; which have signatures:
|
; which have signatures:
|
||||||
;
|
;
|
||||||
; varying int8 __pseudo_gather(varying int8 *, mask)
|
; varying int8 __pseudo_gather_i8(varying int8 *, mask)
|
||||||
; varying int16 __pseudo_gather(varying int16 *, mask)
|
; varying int16 __pseudo_gather_i16(varying int16 *, mask)
|
||||||
; varying int32 __pseudo_gather(varying int32 *, mask)
|
; varying int32 __pseudo_gather_i32(varying int32 *, mask)
|
||||||
; varying int64 __pseudo_gather(varying int64 *, mask)
|
; varying int64 __pseudo_gather_i64(varying int64 *, mask)
|
||||||
;
|
;
|
||||||
; The GatherScatterFlattenOpt optimization pass finds these calls and then
|
; The GatherScatterFlattenOpt optimization pass finds these calls and then
|
||||||
; converts them to make calls to the following functions (when appropriate);
|
; converts them to make calls to the following functions (when appropriate);
|
||||||
@@ -1591,46 +1591,46 @@ declare void @__pseudo_masked_store_i64(<WIDTH x i64> * nocapture, <WIDTH x i64>
|
|||||||
; converts them to native gather functions or converts them to vector
|
; converts them to native gather functions or converts them to vector
|
||||||
; loads, if equivalent.
|
; loads, if equivalent.
|
||||||
|
|
||||||
declare <WIDTH x i8> @__pseudo_gather32_8(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i8> @__pseudo_gather32_i8(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i16> @__pseudo_gather32_16(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i16> @__pseudo_gather32_i16(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i32> @__pseudo_gather32_32(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i32> @__pseudo_gather32_i32(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i64> @__pseudo_gather32_64(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i64> @__pseudo_gather32_i64(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
|
||||||
|
|
||||||
declare <WIDTH x i8> @__pseudo_gather64_8(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i8> @__pseudo_gather64_i8(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i16> @__pseudo_gather64_16(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i16> @__pseudo_gather64_i16(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i32> @__pseudo_gather64_32(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i32> @__pseudo_gather64_i32(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i64> @__pseudo_gather64_64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
declare <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
|
||||||
|
|
||||||
declare <WIDTH x i8> @__pseudo_gather_base_offsets32_8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare <WIDTH x i8> @__pseudo_gather_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i16> @__pseudo_gather_base_offsets32_16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare <WIDTH x i16> @__pseudo_gather_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i32> @__pseudo_gather_base_offsets32_32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare <WIDTH x i32> @__pseudo_gather_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i64> @__pseudo_gather_base_offsets32_64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare <WIDTH x i64> @__pseudo_gather_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
|
|
||||||
declare <WIDTH x i8> @__pseudo_gather_base_offsets64_8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare <WIDTH x i8> @__pseudo_gather_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i16> @__pseudo_gather_base_offsets64_16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare <WIDTH x i16> @__pseudo_gather_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i32> @__pseudo_gather_base_offsets64_32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare <WIDTH x i32> @__pseudo_gather_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
declare <WIDTH x i64> @__pseudo_gather_base_offsets64_64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare <WIDTH x i64> @__pseudo_gather_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x MASK>) nounwind readonly
|
<WIDTH x MASK>) nounwind readonly
|
||||||
|
|
||||||
; Similarly to the pseudo-gathers defined above, we also declare undefined
|
; Similarly to the pseudo-gathers defined above, we also declare undefined
|
||||||
; pseudo-scatter instructions with signatures:
|
; pseudo-scatter instructions with signatures:
|
||||||
;
|
;
|
||||||
; void __pseudo_scatter_8 (varying int8 *, varying int8 values, mask)
|
; void __pseudo_scatter_i8 (varying int8 *, varying int8 values, mask)
|
||||||
; void __pseudo_scatter_16(varying int16 *, varying int16 values, mask)
|
; void __pseudo_scatter_i16(varying int16 *, varying int16 values, mask)
|
||||||
; void __pseudo_scatter_32(varying int32 *, varying int32 values, mask)
|
; void __pseudo_scatter_i32(varying int32 *, varying int32 values, mask)
|
||||||
; void __pseudo_scatter_64(varying int64 *, varying int64 values, mask)
|
; void __pseudo_scatter_i64(varying int64 *, varying int64 values, mask)
|
||||||
;
|
;
|
||||||
; The GatherScatterFlattenOpt optimization pass also finds these and
|
; The GatherScatterFlattenOpt optimization pass also finds these and
|
||||||
; transforms them to scatters like:
|
; transforms them to scatters like:
|
||||||
;
|
;
|
||||||
; void __pseudo_scatter_base_offsets{32,64}_8(uniform int8 *base,
|
; void __pseudo_scatter_base_offsets{32,64}_i8(uniform int8 *base,
|
||||||
; varying int32 offsets, uniform int32 offset_scale,
|
; varying int32 offsets, uniform int32 offset_scale,
|
||||||
; varying int{32,64} offset_delta, varying int8 values, mask)
|
; varying int{32,64} offset_delta, varying int8 values, mask)
|
||||||
; (and similarly for 16/32/64 bit values)
|
; (and similarly for 16/32/64 bit values)
|
||||||
@@ -1638,32 +1638,32 @@ declare <WIDTH x i64> @__pseudo_gather_base_offsets64_64(i8 *, <WIDTH x i64>, i3
|
|||||||
; And the GSImprovementsPass in turn converts these to actual native
|
; And the GSImprovementsPass in turn converts these to actual native
|
||||||
; scatters or masked stores.
|
; scatters or masked stores.
|
||||||
|
|
||||||
declare void @__pseudo_scatter32_8(<WIDTH x i32>, <WIDTH x i8>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter32_i8(<WIDTH x i32>, <WIDTH x i8>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter32_16(<WIDTH x i32>, <WIDTH x i16>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter32_i16(<WIDTH x i32>, <WIDTH x i16>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter32_32(<WIDTH x i32>, <WIDTH x i32>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter32_i32(<WIDTH x i32>, <WIDTH x i32>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter32_64(<WIDTH x i32>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter32_i64(<WIDTH x i32>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||||
|
|
||||||
declare void @__pseudo_scatter64_8(<WIDTH x i64>, <WIDTH x i8>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter64_i8(<WIDTH x i64>, <WIDTH x i8>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter64_16(<WIDTH x i64>, <WIDTH x i16>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter64_i16(<WIDTH x i64>, <WIDTH x i16>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter64_32(<WIDTH x i64>, <WIDTH x i32>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter64_i32(<WIDTH x i64>, <WIDTH x i32>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter64_64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
|
declare void @__pseudo_scatter64_i64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||||
|
|
||||||
declare void @__pseudo_scatter_base_offsets32_8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare void @__pseudo_scatter_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x i8>, <WIDTH x MASK>) nounwind
|
<WIDTH x i8>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter_base_offsets32_16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare void @__pseudo_scatter_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x i16>, <WIDTH x MASK>) nounwind
|
<WIDTH x i16>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter_base_offsets32_32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare void @__pseudo_scatter_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x i32>, <WIDTH x MASK>) nounwind
|
<WIDTH x i32>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter_base_offsets32_64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
declare void @__pseudo_scatter_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
|
||||||
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||||
|
|
||||||
declare void @__pseudo_scatter_base_offsets64_8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare void @__pseudo_scatter_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x i8>, <WIDTH x MASK>) nounwind
|
<WIDTH x i8>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter_base_offsets64_16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare void @__pseudo_scatter_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x i16>, <WIDTH x MASK>) nounwind
|
<WIDTH x i16>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare void @__pseudo_scatter_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x i32>, <WIDTH x MASK>) nounwind
|
<WIDTH x i32>, <WIDTH x MASK>) nounwind
|
||||||
declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
declare void @__pseudo_scatter_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||||
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||||
|
|
||||||
declare float @__log_uniform_float(float) nounwind readnone
|
declare float @__log_uniform_float(float) nounwind readnone
|
||||||
@@ -1740,29 +1740,29 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gathers
|
;; gathers
|
||||||
|
|
||||||
%pg32_8 = call <WIDTH x i8> @__pseudo_gather32_8(<WIDTH x i32> %v32,
|
%pg32_8 = call <WIDTH x i8> @__pseudo_gather32_i8(<WIDTH x i32> %v32,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use8(<WIDTH x i8> %pg32_8)
|
call void @__use8(<WIDTH x i8> %pg32_8)
|
||||||
%pg32_16 = call <WIDTH x i16> @__pseudo_gather32_16(<WIDTH x i32> %v32,
|
%pg32_16 = call <WIDTH x i16> @__pseudo_gather32_i16(<WIDTH x i32> %v32,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use16(<WIDTH x i16> %pg32_16)
|
call void @__use16(<WIDTH x i16> %pg32_16)
|
||||||
%pg32_32 = call <WIDTH x i32> @__pseudo_gather32_32(<WIDTH x i32> %v32,
|
%pg32_32 = call <WIDTH x i32> @__pseudo_gather32_i32(<WIDTH x i32> %v32,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use32(<WIDTH x i32> %pg32_32)
|
call void @__use32(<WIDTH x i32> %pg32_32)
|
||||||
%pg32_64 = call <WIDTH x i64> @__pseudo_gather32_64(<WIDTH x i32> %v32,
|
%pg32_64 = call <WIDTH x i64> @__pseudo_gather32_i64(<WIDTH x i32> %v32,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use64(<WIDTH x i64> %pg32_64)
|
call void @__use64(<WIDTH x i64> %pg32_64)
|
||||||
|
|
||||||
%pg64_8 = call <WIDTH x i8> @__pseudo_gather64_8(<WIDTH x i64> %v64,
|
%pg64_8 = call <WIDTH x i8> @__pseudo_gather64_i8(<WIDTH x i64> %v64,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use8(<WIDTH x i8> %pg64_8)
|
call void @__use8(<WIDTH x i8> %pg64_8)
|
||||||
%pg64_16 = call <WIDTH x i16> @__pseudo_gather64_16(<WIDTH x i64> %v64,
|
%pg64_16 = call <WIDTH x i16> @__pseudo_gather64_i16(<WIDTH x i64> %v64,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use16(<WIDTH x i16> %pg64_16)
|
call void @__use16(<WIDTH x i16> %pg64_16)
|
||||||
%pg64_32 = call <WIDTH x i32> @__pseudo_gather64_32(<WIDTH x i64> %v64,
|
%pg64_32 = call <WIDTH x i32> @__pseudo_gather64_i32(<WIDTH x i64> %v64,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use32(<WIDTH x i32> %pg64_32)
|
call void @__use32(<WIDTH x i32> %pg64_32)
|
||||||
%pg64_64 = call <WIDTH x i64> @__pseudo_gather64_64(<WIDTH x i64> %v64,
|
%pg64_64 = call <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64> %v64,
|
||||||
<WIDTH x MASK> %mask)
|
<WIDTH x MASK> %mask)
|
||||||
call void @__use64(<WIDTH x i64> %pg64_64)
|
call void @__use64(<WIDTH x i64> %pg64_64)
|
||||||
|
|
||||||
@@ -1793,19 +1793,19 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
|||||||
call void @__use64(<WIDTH x i64> %g64_64)
|
call void @__use64(<WIDTH x i64> %g64_64)
|
||||||
|
|
||||||
%pgbo32_8 = call <WIDTH x i8>
|
%pgbo32_8 = call <WIDTH x i8>
|
||||||
@__pseudo_gather_base_offsets32_8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
@__pseudo_gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__use8(<WIDTH x i8> %pgbo32_8)
|
call void @__use8(<WIDTH x i8> %pgbo32_8)
|
||||||
%pgbo32_16 = call <WIDTH x i16>
|
%pgbo32_16 = call <WIDTH x i16>
|
||||||
@__pseudo_gather_base_offsets32_16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
@__pseudo_gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__use16(<WIDTH x i16> %pgbo32_16)
|
call void @__use16(<WIDTH x i16> %pgbo32_16)
|
||||||
%pgbo32_32 = call <WIDTH x i32>
|
%pgbo32_32 = call <WIDTH x i32>
|
||||||
@__pseudo_gather_base_offsets32_32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
@__pseudo_gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__use32(<WIDTH x i32> %pgbo32_32)
|
call void @__use32(<WIDTH x i32> %pgbo32_32)
|
||||||
%pgbo32_64 = call <WIDTH x i64>
|
%pgbo32_64 = call <WIDTH x i64>
|
||||||
@__pseudo_gather_base_offsets32_64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
@__pseudo_gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__use64(<WIDTH x i64> %pgbo32_64)
|
call void @__use64(<WIDTH x i64> %pgbo32_64)
|
||||||
|
|
||||||
@@ -1828,19 +1828,19 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
|||||||
|
|
||||||
|
|
||||||
%pgbo64_8 = call <WIDTH x i8>
|
%pgbo64_8 = call <WIDTH x i8>
|
||||||
@__pseudo_gather_base_offsets64_8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
@__pseudo_gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
call void @__use8(<WIDTH x i8> %pgbo64_8)
|
call void @__use8(<WIDTH x i8> %pgbo64_8)
|
||||||
%pgbo64_16 = call <WIDTH x i16>
|
%pgbo64_16 = call <WIDTH x i16>
|
||||||
@__pseudo_gather_base_offsets64_16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
@__pseudo_gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
call void @__use16(<WIDTH x i16> %pgbo64_16)
|
call void @__use16(<WIDTH x i16> %pgbo64_16)
|
||||||
%pgbo64_32 = call <WIDTH x i32>
|
%pgbo64_32 = call <WIDTH x i32>
|
||||||
@__pseudo_gather_base_offsets64_32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
@__pseudo_gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
call void @__use32(<WIDTH x i32> %pgbo64_32)
|
call void @__use32(<WIDTH x i32> %pgbo64_32)
|
||||||
%pgbo64_64 = call <WIDTH x i64>
|
%pgbo64_64 = call <WIDTH x i64>
|
||||||
@__pseudo_gather_base_offsets64_64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
@__pseudo_gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
call void @__use64(<WIDTH x i64> %pgbo64_64)
|
call void @__use64(<WIDTH x i64> %pgbo64_64)
|
||||||
|
|
||||||
@@ -1864,15 +1864,15 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; scatters
|
;; scatters
|
||||||
|
|
||||||
call void @__pseudo_scatter32_8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter32_i8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter32_16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter32_i16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter32_32(<WIDTH x i32> %v32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter32_i32(<WIDTH x i32> %v32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter32_64(<WIDTH x i32> %v32, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter32_i64(<WIDTH x i32> %v32, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
|
|
||||||
call void @__pseudo_scatter64_8(<WIDTH x i64> %v64, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter64_i8(<WIDTH x i64> %v64, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter64_16(<WIDTH x i64> %v64, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter64_i16(<WIDTH x i64> %v64, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter64_32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter64_i32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter64_64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
call void @__pseudo_scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
|
|
||||||
call void @__scatter32_i8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
call void @__scatter32_i8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||||
call void @__scatter32_i16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
call void @__scatter32_i16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||||
@@ -1884,22 +1884,22 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
|||||||
call void @__scatter64_i32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
call void @__scatter64_i32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
|
|
||||||
call void @__pseudo_scatter_base_offsets32_8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
call void @__pseudo_scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter_base_offsets32_16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
call void @__pseudo_scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter_base_offsets32_32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
call void @__pseudo_scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter_base_offsets32_64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
call void @__pseudo_scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
|
|
||||||
call void @__pseudo_scatter_base_offsets64_8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
call void @__pseudo_scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter_base_offsets64_16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
call void @__pseudo_scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter_base_offsets64_32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
call void @__pseudo_scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||||
call void @__pseudo_scatter_base_offsets64_64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
call void @__pseudo_scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||||
|
|
||||||
call void @__scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
call void @__scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||||
@@ -3076,18 +3076,17 @@ pl_done:
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
;;
|
;;
|
||||||
;; $1: vector width of the target
|
;; $1: scalar type for which to generate functions to do gathers
|
||||||
;; $2: scalar type for which to generate functions to do gathers
|
|
||||||
|
|
||||||
; vec width, type
|
; vec width, type
|
||||||
define(`gen_gather', `
|
define(`gen_gather', `
|
||||||
;; Define the utility function to do the gather operation for a single element
|
;; Define the utility function to do the gather operation for a single element
|
||||||
;; of the type
|
;; of the type
|
||||||
define <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %offsets, i32 %offset_scale,
|
define <WIDTH x $1> @__gather_elt32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<$1 x i32> %offset_delta, <$1 x $2> %ret,
|
<WIDTH x i32> %offset_delta, <WIDTH x $1> %ret,
|
||||||
i32 %lane) nounwind readonly alwaysinline {
|
i32 %lane) nounwind readonly alwaysinline {
|
||||||
; compute address for this one from the base
|
; compute address for this one from the base
|
||||||
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
%offset32 = extractelement <WIDTH x i32> %offsets, i32 %lane
|
||||||
; the order and details of the next 4 lines are important--they match LLVMs
|
; the order and details of the next 4 lines are important--they match LLVMs
|
||||||
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
||||||
%offset64 = sext i32 %offset32 to i64
|
%offset64 = sext i32 %offset32 to i64
|
||||||
@@ -3095,131 +3094,131 @@ define <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %offsets, i32 %offset_
|
|||||||
%offset = mul i64 %offset64, %scale64
|
%offset = mul i64 %offset64, %scale64
|
||||||
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
||||||
|
|
||||||
%delta = extractelement <$1 x i32> %offset_delta, i32 %lane
|
%delta = extractelement <WIDTH x i32> %offset_delta, i32 %lane
|
||||||
%delta64 = sext i32 %delta to i64
|
%delta64 = sext i32 %delta to i64
|
||||||
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
||||||
|
|
||||||
; load value and insert into returned value
|
; load value and insert into returned value
|
||||||
%ptrcast = bitcast i8 * %finalptr to $2 *
|
%ptrcast = bitcast i8 * %finalptr to $1 *
|
||||||
%val = load $2 *%ptrcast
|
%val = load $1 *%ptrcast
|
||||||
%updatedret = insertelement <$1 x $2> %ret, $2 %val, i32 %lane
|
%updatedret = insertelement <WIDTH x $1> %ret, $1 %val, i32 %lane
|
||||||
ret <$1 x $2> %updatedret
|
ret <WIDTH x $1> %updatedret
|
||||||
}
|
}
|
||||||
|
|
||||||
define <$1 x $2> @__gather_elt64_$2(i8 * %ptr, <$1 x i64> %offsets, i32 %offset_scale,
|
define <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<$1 x i64> %offset_delta, <$1 x $2> %ret,
|
<WIDTH x i64> %offset_delta, <WIDTH x $1> %ret,
|
||||||
i32 %lane) nounwind readonly alwaysinline {
|
i32 %lane) nounwind readonly alwaysinline {
|
||||||
; compute address for this one from the base
|
; compute address for this one from the base
|
||||||
%offset64 = extractelement <$1 x i64> %offsets, i32 %lane
|
%offset64 = extractelement <WIDTH x i64> %offsets, i32 %lane
|
||||||
; the order and details of the next 4 lines are important--they match LLVMs
|
; the order and details of the next 4 lines are important--they match LLVMs
|
||||||
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
||||||
%offset_scale64 = sext i32 %offset_scale to i64
|
%offset_scale64 = sext i32 %offset_scale to i64
|
||||||
%offset = mul i64 %offset64, %offset_scale64
|
%offset = mul i64 %offset64, %offset_scale64
|
||||||
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
||||||
|
|
||||||
%delta64 = extractelement <$1 x i64> %offset_delta, i32 %lane
|
%delta64 = extractelement <WIDTH x i64> %offset_delta, i32 %lane
|
||||||
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
||||||
|
|
||||||
; load value and insert into returned value
|
; load value and insert into returned value
|
||||||
%ptrcast = bitcast i8 * %finalptr to $2 *
|
%ptrcast = bitcast i8 * %finalptr to $1 *
|
||||||
%val = load $2 *%ptrcast
|
%val = load $1 *%ptrcast
|
||||||
%updatedret = insertelement <$1 x $2> %ret, $2 %val, i32 %lane
|
%updatedret = insertelement <WIDTH x $1> %ret, $1 %val, i32 %lane
|
||||||
ret <$1 x $2> %updatedret
|
ret <WIDTH x $1> %updatedret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets, i32 %offset_scale,
|
define <WIDTH x $1> @__gather_base_offsets32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<$1 x i32> %offset_delta,
|
<WIDTH x i32> %offset_delta,
|
||||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||||
; to require that the 0th element of the array being gathered from is always
|
; to require that the 0th element of the array being gathered from is always
|
||||||
; legal to read from (and we do indeed require that, given the benefits!)
|
; legal to read from (and we do indeed require that, given the benefits!)
|
||||||
;
|
;
|
||||||
; Set the offset to zero for lanes that are off
|
; Set the offset to zero for lanes that are off
|
||||||
%offsetsPtr = alloca <$1 x i32>
|
%offsetsPtr = alloca <WIDTH x i32>
|
||||||
store <$1 x i32> zeroinitializer, <$1 x i32> * %offsetsPtr
|
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %offsetsPtr
|
||||||
call void @__masked_store_blend_i32(<$1 x i32> * %offsetsPtr, <$1 x i32> %offsets,
|
call void @__masked_store_blend_i32(<WIDTH x i32> * %offsetsPtr, <WIDTH x i32> %offsets,
|
||||||
<$1 x i32> %vecmask)
|
<WIDTH x i32> %vecmask)
|
||||||
%newOffsets = load <$1 x i32> * %offsetsPtr
|
%newOffsets = load <WIDTH x i32> * %offsetsPtr
|
||||||
|
|
||||||
%deltaPtr = alloca <$1 x i32>
|
%deltaPtr = alloca <WIDTH x i32>
|
||||||
store <$1 x i32> zeroinitializer, <$1 x i32> * %deltaPtr
|
store <WIDTH x i32> zeroinitializer, <WIDTH x i32> * %deltaPtr
|
||||||
call void @__masked_store_blend_i32(<$1 x i32> * %deltaPtr, <$1 x i32> %offset_delta,
|
call void @__masked_store_blend_i32(<WIDTH x i32> * %deltaPtr, <WIDTH x i32> %offset_delta,
|
||||||
<$1 x i32> %vecmask)
|
<WIDTH x i32> %vecmask)
|
||||||
%newDelta = load <$1 x i32> * %deltaPtr
|
%newDelta = load <WIDTH x i32> * %deltaPtr
|
||||||
|
|
||||||
%ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets,
|
%ret0 = call <WIDTH x $1> @__gather_elt32_$1(i8 * %ptr, <WIDTH x i32> %newOffsets,
|
||||||
i32 %offset_scale, <$1 x i32> %newDelta,
|
i32 %offset_scale, <WIDTH x i32> %newDelta,
|
||||||
<$1 x $2> undef, i32 0)
|
<WIDTH x $1> undef, i32 0)
|
||||||
forloop(lane, 1, eval($1-1),
|
forloop(lane, 1, eval(WIDTH-1),
|
||||||
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr,
|
`patsubst(patsubst(`%retLANE = call <WIDTH x $1> @__gather_elt32_$1(i8 * %ptr,
|
||||||
<$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %newDelta,
|
<WIDTH x i32> %newOffsets, i32 %offset_scale, <WIDTH x i32> %newDelta,
|
||||||
<$1 x $2> %retPREV, i32 LANE)
|
<WIDTH x $1> %retPREV, i32 LANE)
|
||||||
', `LANE', lane), `PREV', eval(lane-1))')
|
', `LANE', lane), `PREV', eval(lane-1))')
|
||||||
ret <$1 x $2> %ret`'eval($1-1)
|
ret <WIDTH x $1> %ret`'eval(WIDTH-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
define <$1 x $2> @__gather_base_offsets64_$2(i8 * %ptr, <$1 x i64> %offsets, i32 %offset_scale,
|
define <WIDTH x $1> @__gather_base_offsets64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<$1 x i64> %offset_delta,
|
<WIDTH x i64> %offset_delta,
|
||||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||||
; to require that the 0th element of the array being gathered from is always
|
; to require that the 0th element of the array being gathered from is always
|
||||||
; legal to read from (and we do indeed require that, given the benefits!)
|
; legal to read from (and we do indeed require that, given the benefits!)
|
||||||
;
|
;
|
||||||
; Set the offset to zero for lanes that are off
|
; Set the offset to zero for lanes that are off
|
||||||
%offsetsPtr = alloca <$1 x i64>
|
%offsetsPtr = alloca <WIDTH x i64>
|
||||||
store <$1 x i64> zeroinitializer, <$1 x i64> * %offsetsPtr
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %offsetsPtr
|
||||||
call void @__masked_store_blend_i64(<$1 x i64> * %offsetsPtr, <$1 x i64> %offsets,
|
call void @__masked_store_blend_i64(<WIDTH x i64> * %offsetsPtr, <WIDTH x i64> %offsets,
|
||||||
<$1 x i32> %vecmask)
|
<WIDTH x i32> %vecmask)
|
||||||
%newOffsets = load <$1 x i64> * %offsetsPtr
|
%newOffsets = load <WIDTH x i64> * %offsetsPtr
|
||||||
|
|
||||||
%deltaPtr = alloca <$1 x i64>
|
%deltaPtr = alloca <WIDTH x i64>
|
||||||
store <$1 x i64> zeroinitializer, <$1 x i64> * %deltaPtr
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %deltaPtr
|
||||||
call void @__masked_store_blend_i64(<$1 x i64> * %deltaPtr, <$1 x i64> %offset_delta,
|
call void @__masked_store_blend_i64(<WIDTH x i64> * %deltaPtr, <WIDTH x i64> %offset_delta,
|
||||||
<$1 x i32> %vecmask)
|
<WIDTH x i32> %vecmask)
|
||||||
%newDelta = load <$1 x i64> * %deltaPtr
|
%newDelta = load <WIDTH x i64> * %deltaPtr
|
||||||
|
|
||||||
%ret0 = call <$1 x $2> @__gather_elt64_$2(i8 * %ptr, <$1 x i64> %newOffsets,
|
%ret0 = call <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr, <WIDTH x i64> %newOffsets,
|
||||||
i32 %offset_scale, <$1 x i64> %newDelta,
|
i32 %offset_scale, <WIDTH x i64> %newDelta,
|
||||||
<$1 x $2> undef, i32 0)
|
<WIDTH x $1> undef, i32 0)
|
||||||
forloop(lane, 1, eval($1-1),
|
forloop(lane, 1, eval(WIDTH-1),
|
||||||
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt64_$2(i8 * %ptr,
|
`patsubst(patsubst(`%retLANE = call <WIDTH x $1> @__gather_elt64_$1(i8 * %ptr,
|
||||||
<$1 x i64> %newOffsets, i32 %offset_scale, <$1 x i64> %newDelta,
|
<WIDTH x i64> %newOffsets, i32 %offset_scale, <WIDTH x i64> %newDelta,
|
||||||
<$1 x $2> %retPREV, i32 LANE)
|
<WIDTH x $1> %retPREV, i32 LANE)
|
||||||
', `LANE', lane), `PREV', eval(lane-1))')
|
', `LANE', lane), `PREV', eval(lane-1))')
|
||||||
ret <$1 x $2> %ret`'eval($1-1)
|
ret <WIDTH x $1> %ret`'eval(WIDTH-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
; fully general 32-bit gather, takes array of pointers encoded as vector of i32s
|
; fully general 32-bit gather, takes array of pointers encoded as vector of i32s
|
||||||
define <$1 x $2> @__gather32_$2(<$1 x i32> %ptrs,
|
define <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
||||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||||
%ret_ptr = alloca <$1 x $2>
|
%ret_ptr = alloca <WIDTH x $1>
|
||||||
per_lane($1, <$1 x i32> %vecmask, `
|
per_lane(WIDTH, <WIDTH x i32> %vecmask, `
|
||||||
%iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
|
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = load $2 * %ptr_LANE_ID
|
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
||||||
%store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
|
%store_ptr_LANE_ID = getelementptr <WIDTH x $1> * %ret_ptr, i32 0, i32 LANE
|
||||||
store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
|
store $1 %val_LANE_ID, $1 * %store_ptr_LANE_ID
|
||||||
')
|
')
|
||||||
|
|
||||||
%ret = load <$1 x $2> * %ret_ptr
|
%ret = load <WIDTH x $1> * %ret_ptr
|
||||||
ret <$1 x $2> %ret
|
ret <WIDTH x $1> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
; fully general 64-bit gather, takes array of pointers encoded as vector of i32s
|
; fully general 64-bit gather, takes array of pointers encoded as vector of i32s
|
||||||
define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs,
|
define <WIDTH x $1> @__gather64_$1(<WIDTH x i64> %ptrs,
|
||||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
<WIDTH x i32> %vecmask) nounwind readonly alwaysinline {
|
||||||
%ret_ptr = alloca <$1 x $2>
|
%ret_ptr = alloca <WIDTH x $1>
|
||||||
per_lane($1, <$1 x i32> %vecmask, `
|
per_lane(WIDTH, <WIDTH x i32> %vecmask, `
|
||||||
%iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
|
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = load $2 * %ptr_LANE_ID
|
%val_LANE_ID = load $1 * %ptr_LANE_ID
|
||||||
%store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
|
%store_ptr_LANE_ID = getelementptr <WIDTH x $1> * %ret_ptr, i32 0, i32 LANE
|
||||||
store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
|
store $1 %val_LANE_ID, $1 * %store_ptr_LANE_ID
|
||||||
')
|
')
|
||||||
|
|
||||||
%ret = load <$1 x $2> * %ret_ptr
|
%ret = load <WIDTH x $1> * %ret_ptr
|
||||||
ret <$1 x $2> %ret
|
ret <WIDTH x $1> %ret
|
||||||
}
|
}
|
||||||
'
|
'
|
||||||
)
|
)
|
||||||
@@ -3229,16 +3228,15 @@ define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs,
|
|||||||
;; gen_scatter
|
;; gen_scatter
|
||||||
;; Emit a function declaration for a scalarized scatter.
|
;; Emit a function declaration for a scalarized scatter.
|
||||||
;;
|
;;
|
||||||
;; $1: target vector width
|
;; $1: scalar type for which we want to generate code to scatter
|
||||||
;; $2: scalar type for which we want to generate code to scatter
|
|
||||||
|
|
||||||
define(`gen_scatter', `
|
define(`gen_scatter', `
|
||||||
;; Define the function that descripes the work to do to scatter a single
|
;; Define the function that descripes the work to do to scatter a single
|
||||||
;; value
|
;; value
|
||||||
define void @__scatter_elt32_$2(i8 * %ptr, <$1 x i32> %offsets, i32 %offset_scale,
|
define void @__scatter_elt32_$1(i8 * %ptr, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<$1 x i32> %offset_delta, <$1 x $2> %values,
|
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
|
||||||
i32 %lane) nounwind alwaysinline {
|
i32 %lane) nounwind alwaysinline {
|
||||||
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
%offset32 = extractelement <WIDTH x i32> %offsets, i32 %lane
|
||||||
; the order and details of the next 4 lines are important--they match LLVMs
|
; the order and details of the next 4 lines are important--they match LLVMs
|
||||||
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
||||||
%offset64 = sext i32 %offset32 to i64
|
%offset64 = sext i32 %offset32 to i64
|
||||||
@@ -3246,75 +3244,75 @@ define void @__scatter_elt32_$2(i8 * %ptr, <$1 x i32> %offsets, i32 %offset_scal
|
|||||||
%offset = mul i64 %offset64, %scale64
|
%offset = mul i64 %offset64, %scale64
|
||||||
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
||||||
|
|
||||||
%delta = extractelement <$1 x i32> %offset_delta, i32 %lane
|
%delta = extractelement <WIDTH x i32> %offset_delta, i32 %lane
|
||||||
%delta64 = sext i32 %delta to i64
|
%delta64 = sext i32 %delta to i64
|
||||||
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
||||||
|
|
||||||
%ptrcast = bitcast i8 * %finalptr to $2 *
|
%ptrcast = bitcast i8 * %finalptr to $1 *
|
||||||
%storeval = extractelement <$1 x $2> %values, i32 %lane
|
%storeval = extractelement <WIDTH x $1> %values, i32 %lane
|
||||||
store $2 %storeval, $2 * %ptrcast
|
store $1 %storeval, $1 * %ptrcast
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @__scatter_elt64_$2(i8 * %ptr, <$1 x i64> %offsets, i32 %offset_scale,
|
define void @__scatter_elt64_$1(i8 * %ptr, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<$1 x i64> %offset_delta, <$1 x $2> %values,
|
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
|
||||||
i32 %lane) nounwind alwaysinline {
|
i32 %lane) nounwind alwaysinline {
|
||||||
%offset64 = extractelement <$1 x i64> %offsets, i32 %lane
|
%offset64 = extractelement <WIDTH x i64> %offsets, i32 %lane
|
||||||
; the order and details of the next 4 lines are important--they match LLVMs
|
; the order and details of the next 4 lines are important--they match LLVMs
|
||||||
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations
|
||||||
%scale64 = sext i32 %offset_scale to i64
|
%scale64 = sext i32 %offset_scale to i64
|
||||||
%offset = mul i64 %offset64, %scale64
|
%offset = mul i64 %offset64, %scale64
|
||||||
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
%ptroffset = getelementptr i8 * %ptr, i64 %offset
|
||||||
|
|
||||||
%delta64 = extractelement <$1 x i64> %offset_delta, i32 %lane
|
%delta64 = extractelement <WIDTH x i64> %offset_delta, i32 %lane
|
||||||
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
%finalptr = getelementptr i8 * %ptroffset, i64 %delta64
|
||||||
|
|
||||||
%ptrcast = bitcast i8 * %finalptr to $2 *
|
%ptrcast = bitcast i8 * %finalptr to $1 *
|
||||||
%storeval = extractelement <$1 x $2> %values, i32 %lane
|
%storeval = extractelement <WIDTH x $1> %values, i32 %lane
|
||||||
store $2 %storeval, $2 * %ptrcast
|
store $1 %storeval, $1 * %ptrcast
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @__scatter_base_offsets32_$2(i8* %base, <$1 x i32> %offsets, i32 %offset_scale,
|
define void @__scatter_base_offsets32_$1(i8* %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<$1 x i32> %offset_delta, <$1 x $2> %values,
|
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||||
per_lane($1, <$1 x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||||
call void @__scatter_elt32_$2(i8 * %base, <$1 x i32> %offsets, i32 %offset_scale,
|
call void @__scatter_elt32_$1(i8 * %base, <WIDTH x i32> %offsets, i32 %offset_scale,
|
||||||
<$1 x i32> %offset_delta, <$1 x $2> %values, i32 LANE)')
|
<WIDTH x i32> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @__scatter_base_offsets64_$2(i8* %base, <$1 x i64> %offsets, i32 %offset_scale,
|
define void @__scatter_base_offsets64_$1(i8* %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<$1 x i64> %offset_delta, <$1 x $2> %values,
|
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||||
per_lane($1, <$1 x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||||
call void @__scatter_elt64_$2(i8 * %base, <$1 x i64> %offsets, i32 %offset_scale,
|
call void @__scatter_elt64_$1(i8 * %base, <WIDTH x i64> %offsets, i32 %offset_scale,
|
||||||
<$1 x i64> %offset_delta, <$1 x $2> %values, i32 LANE)')
|
<WIDTH x i64> %offset_delta, <WIDTH x $1> %values, i32 LANE)')
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s
|
; fully general 32-bit scatter, takes array of pointers encoded as vector of i32s
|
||||||
define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
|
define void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||||
per_lane($1, <$1 x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||||
%iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i32> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
|
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
|
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
||||||
store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
|
store $1 %val_LANE_ID, $1 * %ptr_LANE_ID
|
||||||
')
|
')
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s
|
; fully general 64-bit scatter, takes array of pointers encoded as vector of i64s
|
||||||
define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values,
|
define void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<WIDTH x i32> %mask) nounwind alwaysinline {
|
||||||
per_lane($1, <$1 x i32> %mask, `
|
per_lane(WIDTH, <WIDTH x i32> %mask, `
|
||||||
%iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
|
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptrs, i32 LANE
|
||||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
|
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 *
|
||||||
%val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
|
%val_LANE_ID = extractelement <WIDTH x $1> %values, i32 LANE
|
||||||
store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
|
store $1 %val_LANE_ID, $1 * %ptr_LANE_ID
|
||||||
')
|
')
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|||||||
40
ctx.cpp
40
ctx.cpp
@@ -2516,23 +2516,23 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
|
|||||||
const PointerType *pt = CastType<PointerType>(returnType);
|
const PointerType *pt = CastType<PointerType>(returnType);
|
||||||
const char *funcName = NULL;
|
const char *funcName = NULL;
|
||||||
if (pt != NULL)
|
if (pt != NULL)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_gather32_32" :
|
funcName = g->target.is32Bit ? "__pseudo_gather32_i32" :
|
||||||
"__pseudo_gather64_64";
|
"__pseudo_gather64_i64";
|
||||||
else if (llvmReturnType == LLVMTypes::DoubleVectorType ||
|
else if (llvmReturnType == LLVMTypes::DoubleVectorType ||
|
||||||
llvmReturnType == LLVMTypes::Int64VectorType)
|
llvmReturnType == LLVMTypes::Int64VectorType)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_gather32_64" :
|
funcName = g->target.is32Bit ? "__pseudo_gather32_i64" :
|
||||||
"__pseudo_gather64_64";
|
"__pseudo_gather64_i64";
|
||||||
else if (llvmReturnType == LLVMTypes::FloatVectorType ||
|
else if (llvmReturnType == LLVMTypes::FloatVectorType ||
|
||||||
llvmReturnType == LLVMTypes::Int32VectorType)
|
llvmReturnType == LLVMTypes::Int32VectorType)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_gather32_32" :
|
funcName = g->target.is32Bit ? "__pseudo_gather32_i32" :
|
||||||
"__pseudo_gather64_32";
|
"__pseudo_gather64_i32";
|
||||||
else if (llvmReturnType == LLVMTypes::Int16VectorType)
|
else if (llvmReturnType == LLVMTypes::Int16VectorType)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_gather32_16" :
|
funcName = g->target.is32Bit ? "__pseudo_gather32_i16" :
|
||||||
"__pseudo_gather64_16";
|
"__pseudo_gather64_i16";
|
||||||
else {
|
else {
|
||||||
AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
|
AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
|
||||||
funcName = g->target.is32Bit ? "__pseudo_gather32_8" :
|
funcName = g->target.is32Bit ? "__pseudo_gather32_i8" :
|
||||||
"__pseudo_gather64_8";
|
"__pseudo_gather64_i8";
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Function *gatherFunc = m->module->getFunction(funcName);
|
llvm::Function *gatherFunc = m->module->getFunction(funcName);
|
||||||
@@ -2828,26 +2828,26 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
|
|||||||
llvm::Type *type = value->getType();
|
llvm::Type *type = value->getType();
|
||||||
const char *funcName = NULL;
|
const char *funcName = NULL;
|
||||||
if (pt != NULL)
|
if (pt != NULL)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_scatter32_32" :
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
|
||||||
"__pseudo_scatter64_64";
|
"__pseudo_scatter64_i64";
|
||||||
else if (type == LLVMTypes::DoubleVectorType ||
|
else if (type == LLVMTypes::DoubleVectorType ||
|
||||||
type == LLVMTypes::Int64VectorType) {
|
type == LLVMTypes::Int64VectorType) {
|
||||||
funcName = g->target.is32Bit ? "__pseudo_scatter32_64" :
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_i64" :
|
||||||
"__pseudo_scatter64_64";
|
"__pseudo_scatter64_i64";
|
||||||
value = BitCastInst(value, LLVMTypes::Int64VectorType, "value2int");
|
value = BitCastInst(value, LLVMTypes::Int64VectorType, "value2int");
|
||||||
}
|
}
|
||||||
else if (type == LLVMTypes::FloatVectorType ||
|
else if (type == LLVMTypes::FloatVectorType ||
|
||||||
type == LLVMTypes::Int32VectorType) {
|
type == LLVMTypes::Int32VectorType) {
|
||||||
funcName = g->target.is32Bit ? "__pseudo_scatter32_32" :
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
|
||||||
"__pseudo_scatter64_32";
|
"__pseudo_scatter64_i32";
|
||||||
value = BitCastInst(value, LLVMTypes::Int32VectorType, "value2int");
|
value = BitCastInst(value, LLVMTypes::Int32VectorType, "value2int");
|
||||||
}
|
}
|
||||||
else if (type == LLVMTypes::Int16VectorType)
|
else if (type == LLVMTypes::Int16VectorType)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_scatter32_16" :
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_i16" :
|
||||||
"__pseudo_scatter64_16";
|
"__pseudo_scatter64_i16";
|
||||||
else if (type == LLVMTypes::Int8VectorType)
|
else if (type == LLVMTypes::Int8VectorType)
|
||||||
funcName = g->target.is32Bit ? "__pseudo_scatter32_8" :
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_i8" :
|
||||||
"__pseudo_scatter64_8";
|
"__pseudo_scatter64_i8";
|
||||||
|
|
||||||
llvm::Function *scatterFunc = m->module->getFunction(funcName);
|
llvm::Function *scatterFunc = m->module->getFunction(funcName);
|
||||||
AssertPos(currentPos, scatterFunc != NULL);
|
AssertPos(currentPos, scatterFunc != NULL);
|
||||||
|
|||||||
171
opt.cpp
171
opt.cpp
@@ -1689,38 +1689,41 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
DEBUG_START_PASS("DetectGSBaseOffsets");
|
DEBUG_START_PASS("DetectGSBaseOffsets");
|
||||||
|
|
||||||
GSInfo gsFuncs[] = {
|
GSInfo gsFuncs[] = {
|
||||||
GSInfo("__pseudo_gather32_8", "__pseudo_gather_base_offsets32_8",
|
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8",
|
||||||
"__pseudo_gather_base_offsets32_8", true),
|
"__pseudo_gather_base_offsets32_i8", true),
|
||||||
GSInfo("__pseudo_gather32_16", "__pseudo_gather_base_offsets32_16",
|
GSInfo("__pseudo_gather32_i16", "__pseudo_gather_base_offsets32_i16",
|
||||||
"__pseudo_gather_base_offsets32_16", true),
|
"__pseudo_gather_base_offsets32_i16", true),
|
||||||
GSInfo("__pseudo_gather32_32", "__pseudo_gather_base_offsets32_32",
|
GSInfo("__pseudo_gather32_i32", "__pseudo_gather_base_offsets32_i32",
|
||||||
"__pseudo_gather_base_offsets32_32", true),
|
"__pseudo_gather_base_offsets32_i32", true),
|
||||||
GSInfo("__pseudo_gather32_64", "__pseudo_gather_base_offsets32_64",
|
GSInfo("__pseudo_gather32_i64", "__pseudo_gather_base_offsets32_i64",
|
||||||
"__pseudo_gather_base_offsets32_64", true),
|
"__pseudo_gather_base_offsets32_i64", true),
|
||||||
GSInfo("__pseudo_scatter32_8", "__pseudo_scatter_base_offsets32_8",
|
|
||||||
"__pseudo_scatter_base_offsets32_8", false),
|
GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_base_offsets32_i8",
|
||||||
GSInfo("__pseudo_scatter32_16", "__pseudo_scatter_base_offsets32_16",
|
"__pseudo_scatter_base_offsets32_i8", false),
|
||||||
"__pseudo_scatter_base_offsets32_16", false),
|
GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_base_offsets32_i16",
|
||||||
GSInfo("__pseudo_scatter32_32", "__pseudo_scatter_base_offsets32_32",
|
"__pseudo_scatter_base_offsets32_i16", false),
|
||||||
"__pseudo_scatter_base_offsets32_32", false),
|
GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_base_offsets32_i32",
|
||||||
GSInfo("__pseudo_scatter32_64", "__pseudo_scatter_base_offsets32_64",
|
"__pseudo_scatter_base_offsets32_i32", false),
|
||||||
"__pseudo_scatter_base_offsets32_64", false),
|
GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_base_offsets32_i64",
|
||||||
GSInfo("__pseudo_gather64_8", "__pseudo_gather_base_offsets64_8",
|
"__pseudo_scatter_base_offsets32_i64", false),
|
||||||
"__pseudo_gather_base_offsets32_8", true),
|
|
||||||
GSInfo("__pseudo_gather64_16", "__pseudo_gather_base_offsets64_16",
|
GSInfo("__pseudo_gather64_i8", "__pseudo_gather_base_offsets64_i8",
|
||||||
"__pseudo_gather_base_offsets32_16", true),
|
"__pseudo_gather_base_offsets32_i8", true),
|
||||||
GSInfo("__pseudo_gather64_32", "__pseudo_gather_base_offsets64_32",
|
GSInfo("__pseudo_gather64_i16", "__pseudo_gather_base_offsets64_i16",
|
||||||
"__pseudo_gather_base_offsets32_32", true),
|
"__pseudo_gather_base_offsets32_i16", true),
|
||||||
GSInfo("__pseudo_gather64_64", "__pseudo_gather_base_offsets64_64",
|
GSInfo("__pseudo_gather64_i32", "__pseudo_gather_base_offsets64_i32",
|
||||||
"__pseudo_gather_base_offsets32_64", true),
|
"__pseudo_gather_base_offsets32_i32", true),
|
||||||
GSInfo("__pseudo_scatter64_8", "__pseudo_scatter_base_offsets64_8",
|
GSInfo("__pseudo_gather64_i64", "__pseudo_gather_base_offsets64_i64",
|
||||||
"__pseudo_scatter_base_offsets32_8", false),
|
"__pseudo_gather_base_offsets32_i64", true),
|
||||||
GSInfo("__pseudo_scatter64_16", "__pseudo_scatter_base_offsets64_16",
|
|
||||||
"__pseudo_scatter_base_offsets32_16", false),
|
GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_base_offsets64_i8",
|
||||||
GSInfo("__pseudo_scatter64_32", "__pseudo_scatter_base_offsets64_32",
|
"__pseudo_scatter_base_offsets32_i8", false),
|
||||||
"__pseudo_scatter_base_offsets32_32", false),
|
GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_base_offsets64_i16",
|
||||||
GSInfo("__pseudo_scatter64_64", "__pseudo_scatter_base_offsets64_64",
|
"__pseudo_scatter_base_offsets32_i16", false),
|
||||||
"__pseudo_scatter_base_offsets32_64", false),
|
GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_base_offsets64_i32",
|
||||||
|
"__pseudo_scatter_base_offsets32_i32", false),
|
||||||
|
GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_base_offsets64_i64",
|
||||||
|
"__pseudo_scatter_base_offsets32_i64", false),
|
||||||
};
|
};
|
||||||
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||||
for (int i = 0; i < numGSFuncs; ++i)
|
for (int i = 0; i < numGSFuncs; ++i)
|
||||||
@@ -2281,40 +2284,40 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
DEBUG_START_PASS("GSToLoadStorePass");
|
DEBUG_START_PASS("GSToLoadStorePass");
|
||||||
|
|
||||||
GatherImpInfo gInfo[] = {
|
GatherImpInfo gInfo[] = {
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_8", "__load_and_broadcast_i8",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__load_and_broadcast_i8",
|
||||||
"__masked_load_i8", 1),
|
"__masked_load_i8", 1),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_16", "__load_and_broadcast_i16",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i16", "__load_and_broadcast_i16",
|
||||||
"__masked_load_i16", 2),
|
"__masked_load_i16", 2),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_32", "__load_and_broadcast_i32",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i32", "__load_and_broadcast_i32",
|
||||||
"__masked_load_i32", 4),
|
"__masked_load_i32", 4),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_64", "__load_and_broadcast_i64",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i64", "__load_and_broadcast_i64",
|
||||||
"__masked_load_i64", 8),
|
"__masked_load_i64", 8),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_8", "__load_and_broadcast_i8",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i8", "__load_and_broadcast_i8",
|
||||||
"__masked_load_i8", 1),
|
"__masked_load_i8", 1),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_16", "__load_and_broadcast_i16",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i16", "__load_and_broadcast_i16",
|
||||||
"__masked_load_i16", 2),
|
"__masked_load_i16", 2),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_32", "__load_and_broadcast_i32",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i32", "__load_and_broadcast_i32",
|
||||||
"__masked_load_i32", 4),
|
"__masked_load_i32", 4),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_64", "__load_and_broadcast_i64",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i64", "__load_and_broadcast_i64",
|
||||||
"__masked_load_i64", 8)
|
"__masked_load_i64", 8),
|
||||||
};
|
};
|
||||||
ScatterImpInfo sInfo[] = {
|
ScatterImpInfo sInfo[] = {
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_8", "__pseudo_masked_store_i8",
|
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
|
||||||
LLVMTypes::Int8VectorPointerType, 1),
|
LLVMTypes::Int8VectorPointerType, 1),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_16", "__pseudo_masked_store_i16",
|
ScatterImpInfo("__pseudo_scatter_base_offsets32_i16", "__pseudo_masked_store_i16",
|
||||||
LLVMTypes::Int16VectorPointerType, 2),
|
LLVMTypes::Int16VectorPointerType, 2),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_32", "__pseudo_masked_store_i32",
|
ScatterImpInfo("__pseudo_scatter_base_offsets32_i32", "__pseudo_masked_store_i32",
|
||||||
LLVMTypes::Int32VectorPointerType, 4),
|
LLVMTypes::Int32VectorPointerType, 4),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_64", "__pseudo_masked_store_i64",
|
ScatterImpInfo("__pseudo_scatter_base_offsets32_i64", "__pseudo_masked_store_i64",
|
||||||
LLVMTypes::Int64VectorPointerType, 8),
|
LLVMTypes::Int64VectorPointerType, 8),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_8", "__pseudo_masked_store_i8",
|
ScatterImpInfo("__pseudo_scatter_base_offsets64_i8", "__pseudo_masked_store_i8",
|
||||||
LLVMTypes::Int8VectorPointerType, 1),
|
LLVMTypes::Int8VectorPointerType, 1),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_16", "__pseudo_masked_store_i16",
|
ScatterImpInfo("__pseudo_scatter_base_offsets64_i16", "__pseudo_masked_store_i16",
|
||||||
LLVMTypes::Int16VectorPointerType, 2),
|
LLVMTypes::Int16VectorPointerType, 2),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_32", "__pseudo_masked_store_i32",
|
ScatterImpInfo("__pseudo_scatter_base_offsets64_i32", "__pseudo_masked_store_i32",
|
||||||
LLVMTypes::Int32VectorPointerType, 4),
|
LLVMTypes::Int32VectorPointerType, 4),
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets64_64", "__pseudo_masked_store_i64",
|
ScatterImpInfo("__pseudo_scatter_base_offsets64_i64", "__pseudo_masked_store_i64",
|
||||||
LLVMTypes::Int64VectorPointerType, 8)
|
LLVMTypes::Int64VectorPointerType, 8),
|
||||||
};
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
@@ -3387,8 +3390,8 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
DEBUG_START_PASS("GatherCoalescePass");
|
DEBUG_START_PASS("GatherCoalescePass");
|
||||||
|
|
||||||
llvm::Function *gatherFuncs[] = {
|
llvm::Function *gatherFuncs[] = {
|
||||||
m->module->getFunction("__pseudo_gather_base_offsets32_32"),
|
m->module->getFunction("__pseudo_gather_base_offsets32_i32"),
|
||||||
m->module->getFunction("__pseudo_gather_base_offsets64_32"),
|
m->module->getFunction("__pseudo_gather_base_offsets64_i32"),
|
||||||
};
|
};
|
||||||
int nGatherFuncs = sizeof(gatherFuncs) / sizeof(gatherFuncs[0]);
|
int nGatherFuncs = sizeof(gatherFuncs) / sizeof(gatherFuncs[0]);
|
||||||
|
|
||||||
@@ -3570,45 +3573,45 @@ PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
DEBUG_START_PASS("PseudoGSToGSPass");
|
DEBUG_START_PASS("PseudoGSToGSPass");
|
||||||
|
|
||||||
LowerGSInfo lgsInfo[] = {
|
LowerGSInfo lgsInfo[] = {
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets32_8", "__gather_base_offsets32_i8", true),
|
LowerGSInfo("__pseudo_gather_base_offsets32_i8", "__gather_base_offsets32_i8", true),
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets32_16", "__gather_base_offsets32_i16", true),
|
LowerGSInfo("__pseudo_gather_base_offsets32_i16", "__gather_base_offsets32_i16", true),
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets32_32", "__gather_base_offsets32_i32", true),
|
LowerGSInfo("__pseudo_gather_base_offsets32_i32", "__gather_base_offsets32_i32", true),
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets32_64", "__gather_base_offsets32_i64", true),
|
LowerGSInfo("__pseudo_gather_base_offsets32_i64", "__gather_base_offsets32_i64", true),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets64_8", "__gather_base_offsets64_i8", true),
|
LowerGSInfo("__pseudo_gather_base_offsets64_i8", "__gather_base_offsets64_i8", true),
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets64_16", "__gather_base_offsets64_i16", true),
|
LowerGSInfo("__pseudo_gather_base_offsets64_i16", "__gather_base_offsets64_i16", true),
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets64_32", "__gather_base_offsets64_i32", true),
|
LowerGSInfo("__pseudo_gather_base_offsets64_i32", "__gather_base_offsets64_i32", true),
|
||||||
LowerGSInfo("__pseudo_gather_base_offsets64_64", "__gather_base_offsets64_i64", true),
|
LowerGSInfo("__pseudo_gather_base_offsets64_i64", "__gather_base_offsets64_i64", true),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_gather32_8", "__gather32_i8", true),
|
LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true),
|
||||||
LowerGSInfo("__pseudo_gather32_16", "__gather32_i16", true),
|
LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true),
|
||||||
LowerGSInfo("__pseudo_gather32_32", "__gather32_i32", true),
|
LowerGSInfo("__pseudo_gather32_i32", "__gather32_i32", true),
|
||||||
LowerGSInfo("__pseudo_gather32_64", "__gather32_i64", true),
|
LowerGSInfo("__pseudo_gather32_i64", "__gather32_i64", true),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_gather64_8", "__gather64_i8", true),
|
LowerGSInfo("__pseudo_gather64_i8", "__gather64_i8", true),
|
||||||
LowerGSInfo("__pseudo_gather64_16", "__gather64_i16", true),
|
LowerGSInfo("__pseudo_gather64_i16", "__gather64_i16", true),
|
||||||
LowerGSInfo("__pseudo_gather64_32", "__gather64_i32", true),
|
LowerGSInfo("__pseudo_gather64_i32", "__gather64_i32", true),
|
||||||
LowerGSInfo("__pseudo_gather64_64", "__gather64_i64", true),
|
LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets32_8", "__scatter_base_offsets32_i8", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets32_i8", "__scatter_base_offsets32_i8", false),
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets32_16", "__scatter_base_offsets32_i16", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets32_i16", "__scatter_base_offsets32_i16", false),
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets32_32", "__scatter_base_offsets32_i32", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets32_i32", "__scatter_base_offsets32_i32", false),
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets32_64", "__scatter_base_offsets32_i64", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets32_i64", "__scatter_base_offsets32_i64", false),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets64_8", "__scatter_base_offsets64_i8", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets64_i8", "__scatter_base_offsets64_i8", false),
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets64_16", "__scatter_base_offsets64_i16", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets64_i16", "__scatter_base_offsets64_i16", false),
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets64_32", "__scatter_base_offsets64_i32", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets64_i32", "__scatter_base_offsets64_i32", false),
|
||||||
LowerGSInfo("__pseudo_scatter_base_offsets64_64", "__scatter_base_offsets64_i64", false),
|
LowerGSInfo("__pseudo_scatter_base_offsets64_i64", "__scatter_base_offsets64_i64", false),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_scatter32_8", "__scatter32_i8", false),
|
LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false),
|
||||||
LowerGSInfo("__pseudo_scatter32_16", "__scatter32_i16", false),
|
LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false),
|
||||||
LowerGSInfo("__pseudo_scatter32_32", "__scatter32_i32", false),
|
LowerGSInfo("__pseudo_scatter32_i32", "__scatter32_i32", false),
|
||||||
LowerGSInfo("__pseudo_scatter32_64", "__scatter32_i64", false),
|
LowerGSInfo("__pseudo_scatter32_i64", "__scatter32_i64", false),
|
||||||
|
|
||||||
LowerGSInfo("__pseudo_scatter64_8", "__scatter64_i8", false),
|
LowerGSInfo("__pseudo_scatter64_i8", "__scatter64_i8", false),
|
||||||
LowerGSInfo("__pseudo_scatter64_16", "__scatter64_i16", false),
|
LowerGSInfo("__pseudo_scatter64_i16", "__scatter64_i16", false),
|
||||||
LowerGSInfo("__pseudo_scatter64_32", "__scatter64_i32", false),
|
LowerGSInfo("__pseudo_scatter64_i32", "__scatter64_i32", false),
|
||||||
LowerGSInfo("__pseudo_scatter64_64", "__scatter64_i64", false),
|
LowerGSInfo("__pseudo_scatter64_i64", "__scatter64_i64", false),
|
||||||
};
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
|
|||||||
Reference in New Issue
Block a user