Add support for non-factored variants of gather/scatter functions.

We now have two ways of approaching gather/scatters with a common base
pointer and with offset vectors.  For targets with native gather/scatter,
we just turn those into base + {1/2/4/8}*offsets.  For targets without,
we turn those into base + {1/2/4/8}*varying_offsets + const_offsets,
where const_offsets is a compile-time constant.

Infrastructure for issue #325.
This commit is contained in:
Matt Pharr
2012-07-11 14:09:06 -07:00
parent ec0280be11
commit 10b79fb41b
4 changed files with 965 additions and 356 deletions

View File

@@ -1579,7 +1579,7 @@ declare void @__pseudo_masked_store_double(<WIDTH x double> * nocapture, <WIDTH
; Declare the pseudo-gather functions. When the ispc front-end needs ; Declare the pseudo-gather functions. When the ispc front-end needs
; to perform a gather, it generates a call to one of these functions, ; to perform a gather, it generates a call to one of these functions,
; which have signatures: ; which ideally have these signatures:
; ;
; varying int8 __pseudo_gather_i8(varying int8 *, mask) ; varying int8 __pseudo_gather_i8(varying int8 *, mask)
; varying int16 __pseudo_gather_i16(varying int16 *, mask) ; varying int16 __pseudo_gather_i16(varying int16 *, mask)
@@ -1588,24 +1588,9 @@ declare void @__pseudo_masked_store_double(<WIDTH x double> * nocapture, <WIDTH
; varying int64 __pseudo_gather_i64(varying int64 *, mask) ; varying int64 __pseudo_gather_i64(varying int64 *, mask)
; varying double __pseudo_gather_double(varying double *, mask) ; varying double __pseudo_gather_double(varying double *, mask)
; ;
; The GatherScatterFlattenOpt optimization pass finds these calls and then ; However, vectors of pointers weren not legal in LLVM until recently, so
; converts them to make calls to the following functions (when appropriate); ; instead, it emits calls to functions that either take vectors of int32s
; these represent gathers from a common base pointer with offsets. The ; or int64s, depending on the compilation target.
; offset_scale factor scales the offsets before they are added to the base
; pointer--it should have the value 1, 2, 4, or 8. (It can always just be 1.)
; Then, the offset delta_value (guaranteed to be a compile-time constant value),
; is added to the final address. The 2, 4, 8 scales are used to match LLVM patterns
; that use the free 2/4/8 scaling available in x86 addressing calculations, and
; offset_delta feeds into the free offset calculation.
;
; varying int{8,16,32,float,64,double}
; __pseudo_gather_factored_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base,
; int{32,64} offsets, uniform int32 offset_scale,
; int{32,64} offset_delta, mask)
;
; Then, the GSImprovementsPass optimizations finds these and either
; converts them to native gather functions or converts them to vector
; loads, if equivalent.
declare <WIDTH x i8> @__pseudo_gather32_i8(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly declare <WIDTH x i8> @__pseudo_gather32_i8(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16> @__pseudo_gather32_i16(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly declare <WIDTH x i16> @__pseudo_gather32_i16(<WIDTH x i32>, <WIDTH x MASK>) nounwind readonly
@@ -1621,30 +1606,105 @@ declare <WIDTH x float> @__pseudo_gather64_float(<WIDTH x i64>, <WIDTH x MASK>)
declare <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly declare <WIDTH x i64> @__pseudo_gather64_i64(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
declare <WIDTH x double> @__pseudo_gather64_double(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly declare <WIDTH x double> @__pseudo_gather64_double(<WIDTH x i64>, <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i8> @__pseudo_gather_factored_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, ; The ImproveMemoryOps optimization pass finds these calls and then
; tries to convert them to be calls to gather functions that take a uniform
; base pointer and then a varying integer offset, when possible.
;
; For targets without a native gather instruction, it is best to factor the
; integer offsets like "{1/2/4/8} * varying_offset + constant_offset",
; where varying_offset includes non-compile time constant values, and
; constant_offset includes compile-time constant values. (The scalar loads
; generated in turn can then take advantage of the free offsetting and scale by
; 1/2/4/8 that is offered by the x86 addresisng modes.)
;
; varying int{8,16,32,float,64,double}
; __pseudo_gather_factored_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base,
; int{32,64} offsets, uniform int32 offset_scale,
; int{32,64} offset_delta, mask)
;
; For targets with a gather instruction, it is better to just factor them into
; a gather from a uniform base pointer and then "{1/2/4/8} * offsets", where the
; offsets are int32/64 vectors.
;
; varying int{8,16,32,float,64,double}
; __pseudo_gather_base_offsets{32,64}_{i8,i16,i32,float,i64,double}(uniform int8 *base,
; uniform int32 offset_scale, int{32,64} offsets, mask)
declare <WIDTH x i8>
@__pseudo_gather_factored_base_offsets32_i8(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16> @__pseudo_gather_factored_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i16>
@__pseudo_gather_factored_base_offsets32_i16(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i32> @__pseudo_gather_factored_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i32>
@__pseudo_gather_factored_base_offsets32_i32(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x float> @__pseudo_gather_factored_base_offsets32_float(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x float>
@__pseudo_gather_factored_base_offsets32_float(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i64> @__pseudo_gather_factored_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x i64>
@__pseudo_gather_factored_base_offsets32_i64(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x double> @__pseudo_gather_factored_base_offsets32_double(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>, declare <WIDTH x double>
@__pseudo_gather_factored_base_offsets32_double(i8 *, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i8> @__pseudo_gather_factored_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i8>
@__pseudo_gather_factored_base_offsets64_i8(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16> @__pseudo_gather_factored_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i16>
@__pseudo_gather_factored_base_offsets64_i16(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i32> @__pseudo_gather_factored_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i32>
@__pseudo_gather_factored_base_offsets64_i32(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x float> @__pseudo_gather_factored_base_offsets64_float(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x float>
@__pseudo_gather_factored_base_offsets64_float(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x i64> @__pseudo_gather_factored_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x i64>
@__pseudo_gather_factored_base_offsets64_i64(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
declare <WIDTH x double> @__pseudo_gather_factored_base_offsets64_double(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>, declare <WIDTH x double>
@__pseudo_gather_factored_base_offsets64_double(i8 *, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i8>
@__pseudo_gather_base_offsets32_i8(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16>
@__pseudo_gather_base_offsets32_i16(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i32>
@__pseudo_gather_base_offsets32_i32(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x float>
@__pseudo_gather_base_offsets32_float(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i64>
@__pseudo_gather_base_offsets32_i64(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x double>
@__pseudo_gather_base_offsets32_double(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i8>
@__pseudo_gather_base_offsets64_i8(i8 *, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i16>
@__pseudo_gather_base_offsets64_i16(i8 *, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i32>
@__pseudo_gather_base_offsets64_i32(i8 *, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x float>
@__pseudo_gather_base_offsets64_float(i8 *, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x i64>
@__pseudo_gather_base_offsets64_i64(i8 *, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly
declare <WIDTH x double>
@__pseudo_gather_base_offsets64_double(i8 *, i32, <WIDTH x i64>,
<WIDTH x MASK>) nounwind readonly <WIDTH x MASK>) nounwind readonly
; Similarly to the pseudo-gathers defined above, we also declare undefined ; Similarly to the pseudo-gathers defined above, we also declare undefined
@@ -1657,16 +1717,6 @@ declare <WIDTH x double> @__pseudo_gather_factored_base_offsets64_double(i8 *, <
; void __pseudo_scatter_i64(varying int64 *, varying int64 values, mask) ; void __pseudo_scatter_i64(varying int64 *, varying int64 values, mask)
; void __pseudo_scatter_double(varying double *, varying double values, mask) ; void __pseudo_scatter_double(varying double *, varying double values, mask)
; ;
; The GatherScatterFlattenOpt optimization pass also finds these and
; transforms them to scatters like:
;
; void __pseudo_scatter_factored_base_offsets{32,64}_i8(uniform int8 *base,
; varying int32 offsets, uniform int32 offset_scale,
; varying int{32,64} offset_delta, varying int8 values, mask)
; (and similarly for 16/32/64 bit values)
;
; And the GSImprovementsPass in turn converts these to actual native
; scatters or masked stores.
declare void @__pseudo_scatter32_i8(<WIDTH x i32>, <WIDTH x i8>, <WIDTH x MASK>) nounwind declare void @__pseudo_scatter32_i8(<WIDTH x i32>, <WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter32_i16(<WIDTH x i32>, <WIDTH x i16>, <WIDTH x MASK>) nounwind declare void @__pseudo_scatter32_i16(<WIDTH x i32>, <WIDTH x i16>, <WIDTH x MASK>) nounwind
@@ -1682,30 +1732,95 @@ declare void @__pseudo_scatter64_float(<WIDTH x i64>, <WIDTH x float>, <WIDTH x
declare void @__pseudo_scatter64_i64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind declare void @__pseudo_scatter64_i64(<WIDTH x i64>, <WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter64_double(<WIDTH x i64>, <WIDTH x double>, <WIDTH x MASK>) nounwind declare void @__pseudo_scatter64_double(<WIDTH x i64>, <WIDTH x double>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, ; And the ImproveMemoryOps optimization pass also finds these and
; either transforms them to scatters like:
;
; void __pseudo_scatter_factored_base_offsets{32,64}_i8(uniform int8 *base,
; varying int32 offsets, uniform int32 offset_scale,
; varying int{32,64} offset_delta, varying int8 values, mask)
; (and similarly for 16/32/64 bit values)
;
; Or, if the target has a native scatter instruction:
;
; void __pseudo_scatter_base_offsets{32,64}_i8(uniform int8 *base,
; uniform int32 offset_scale, varying int{32,64} offsets,
; varying int8 values, mask)
; (and similarly for 16/32/64 bit values)
declare void
@__pseudo_scatter_factored_base_offsets32_i8(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i8>, <WIDTH x MASK>) nounwind <WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void
@__pseudo_scatter_factored_base_offsets32_i16(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i16>, <WIDTH x MASK>) nounwind <WIDTH x i16>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void
@__pseudo_scatter_factored_base_offsets32_i32(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i32>, <WIDTH x MASK>) nounwind <WIDTH x i32>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets32_float(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void
@__pseudo_scatter_factored_base_offsets32_float(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x float>, <WIDTH x MASK>) nounwind <WIDTH x float>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void
@__pseudo_scatter_factored_base_offsets32_i64(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind <WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets32_double(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>, declare void
@__pseudo_scatter_factored_base_offsets32_double(i8 * nocapture, <WIDTH x i32>, i32, <WIDTH x i32>,
<WIDTH x double>, <WIDTH x MASK>) nounwind <WIDTH x double>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void
@__pseudo_scatter_factored_base_offsets64_i8(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i8>, <WIDTH x MASK>) nounwind <WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void
@__pseudo_scatter_factored_base_offsets64_i16(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i16>, <WIDTH x MASK>) nounwind <WIDTH x i16>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void
@__pseudo_scatter_factored_base_offsets64_i32(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i32>, <WIDTH x MASK>) nounwind <WIDTH x i32>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets64_float(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void
@__pseudo_scatter_factored_base_offsets64_float(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x float>, <WIDTH x MASK>) nounwind <WIDTH x float>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void
@__pseudo_scatter_factored_base_offsets64_i64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind <WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void @__pseudo_scatter_factored_base_offsets64_double(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>, declare void
@__pseudo_scatter_factored_base_offsets64_double(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x double>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets32_i8(i8 * nocapture, i32, <WIDTH x i32>,
<WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets32_i16(i8 * nocapture, i32, <WIDTH x i32>,
<WIDTH x i16>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets32_i32(i8 * nocapture, i32, <WIDTH x i32>,
<WIDTH x i32>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets32_float(i8 * nocapture, i32, <WIDTH x i32>,
<WIDTH x float>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets32_i64(i8 * nocapture, i32, <WIDTH x i32>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets32_double(i8 * nocapture, i32, <WIDTH x i32>,
<WIDTH x double>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets64_i8(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x i8>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets64_i16(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x i16>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets64_i32(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x i32>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets64_float(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x float>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets64_i64(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x double>, <WIDTH x MASK>) nounwind <WIDTH x double>, <WIDTH x MASK>) nounwind
declare float @__log_uniform_float(float) nounwind readnone declare float @__log_uniform_float(float) nounwind readnone
@@ -1871,6 +1986,109 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
<WIDTH x MASK> %mask) <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %g64_d) call void @__usedouble(<WIDTH x double> %g64_d)
ifelse(HAVE_GATHER, `1',
`
%nfpgbo32_8 = call <WIDTH x i8>
@__pseudo_gather_base_offsets32_i8(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %nfpgbo32_8)
%nfpgbo32_16 = call <WIDTH x i16>
@__pseudo_gather_base_offsets32_i16(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %nfpgbo32_16)
%nfpgbo32_32 = call <WIDTH x i32>
@__pseudo_gather_base_offsets32_i32(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %nfpgbo32_32)
%nfpgbo32_f = call <WIDTH x float>
@__pseudo_gather_base_offsets32_float(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %nfpgbo32_f)
%nfpgbo32_64 = call <WIDTH x i64>
@__pseudo_gather_base_offsets32_i64(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %nfpgbo32_64)
%nfpgbo32_d = call <WIDTH x double>
@__pseudo_gather_base_offsets32_double(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %nfpgbo32_d)
%nfpgbo64_8 = call <WIDTH x i8>
@__pseudo_gather_base_offsets64_i8(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %nfpgbo64_8)
%nfpgbo64_16 = call <WIDTH x i16>
@__pseudo_gather_base_offsets64_i16(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %nfpgbo64_16)
%nfpgbo64_32 = call <WIDTH x i32>
@__pseudo_gather_base_offsets64_i32(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %nfpgbo64_32)
%nfpgbo64_f = call <WIDTH x float>
@__pseudo_gather_base_offsets64_float(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %nfpgbo64_f)
%nfpgbo64_64 = call <WIDTH x i64>
@__pseudo_gather_base_offsets64_i64(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %nfpgbo64_64)
%nfpgbo64_d = call <WIDTH x double>
@__pseudo_gather_base_offsets64_double(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %nfpgbo64_d)
%nfgbo32_8 = call <WIDTH x i8>
@__gather_base_offsets32_i8(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %nfgbo32_8)
%nfgbo32_16 = call <WIDTH x i16>
@__gather_base_offsets32_i16(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %nfgbo32_16)
%nfgbo32_32 = call <WIDTH x i32>
@__gather_base_offsets32_i32(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %nfgbo32_32)
%nfgbo32_f = call <WIDTH x float>
@__gather_base_offsets32_float(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %nfgbo32_f)
%nfgbo32_64 = call <WIDTH x i64>
@__gather_base_offsets32_i64(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %nfgbo32_64)
%nfgbo32_d = call <WIDTH x double>
@__gather_base_offsets32_double(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %nfgbo32_d)
%nfgbo64_8 = call <WIDTH x i8>
@__gather_base_offsets64_i8(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %nfgbo64_8)
%nfgbo64_16 = call <WIDTH x i16>
@__gather_base_offsets64_i16(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %nfgbo64_16)
%nfgbo64_32 = call <WIDTH x i32>
@__gather_base_offsets64_i32(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %nfgbo64_32)
%nfgbo64_f = call <WIDTH x float>
@__gather_base_offsets64_float(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %nfgbo64_f)
%nfgbo64_64 = call <WIDTH x i64>
@__gather_base_offsets64_i64(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %nfgbo64_64)
%nfgbo64_d = call <WIDTH x double>
@__gather_base_offsets64_double(i8 * %ptr, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %nfgbo64_d)
',
`
%pgbo32_8 = call <WIDTH x i8> %pgbo32_8 = call <WIDTH x i8>
@__pseudo_gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, @__pseudo_gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
@@ -1896,32 +2114,6 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask) <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %pgbo32_d) call void @__usedouble(<WIDTH x double> %pgbo32_d)
%gbo32_8 = call <WIDTH x i8>
@__gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %gbo32_8)
%gbo32_16 = call <WIDTH x i16>
@__gather_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %gbo32_16)
%gbo32_32 = call <WIDTH x i32>
@__gather_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %gbo32_32)
%gbo32_f = call <WIDTH x float>
@__gather_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %gbo32_f)
%gbo32_64 = call <WIDTH x i64>
@__gather_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %gbo32_64)
%gbo32_d = call <WIDTH x double>
@__gather_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %gbo32_d)
%pgbo64_8 = call <WIDTH x i8> %pgbo64_8 = call <WIDTH x i8>
@__pseudo_gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__pseudo_gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
@@ -1947,6 +2139,31 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %pgbo64_d) call void @__usedouble(<WIDTH x double> %pgbo64_d)
%gbo32_8 = call <WIDTH x i8>
@__gather_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %gbo32_8)
%gbo32_16 = call <WIDTH x i16>
@__gather_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %gbo32_16)
%gbo32_32 = call <WIDTH x i32>
@__gather_factored_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %gbo32_32)
%gbo32_f = call <WIDTH x float>
@__gather_factored_base_offsets32_float(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usefloat(<WIDTH x float> %gbo32_f)
%gbo32_64 = call <WIDTH x i64>
@__gather_factored_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %gbo32_64)
%gbo32_d = call <WIDTH x double>
@__gather_factored_base_offsets32_double(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %gbo32_d)
%gbo64_8 = call <WIDTH x i8> %gbo64_8 = call <WIDTH x i8>
@__gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
@@ -1970,7 +2187,8 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
%gbo64_d = call <WIDTH x double> %gbo64_d = call <WIDTH x double>
@__gather_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, @__gather_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__usedouble(<WIDTH x double> %gbo64_d) call void @__usedouble(<WIDTH x double> %pgbo64_d)
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; scatters ;; scatters
@@ -2003,6 +2221,61 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask) call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter64_double(<WIDTH x i64> %v64, <WIDTH x double> %vd, <WIDTH x MASK> %mask) call void @__scatter64_double(<WIDTH x i64> %v64, <WIDTH x double> %vd, <WIDTH x MASK> %mask)
ifelse(HAVE_SCATTER, `1',
`
call void @__pseudo_scatter_base_offsets32_i8(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i16(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i32(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_float(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_i64(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_double(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i8(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i16(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i32(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_float(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_i64(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_double(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i8(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i16(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i32(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_float(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i64(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_double(i8 * %ptr, i32 0, <WIDTH x i32> %v32,
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i8(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i16(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i32(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_float(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x float> %vf, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i64(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_double(i8 * %ptr, i32 0, <WIDTH x i64> %v64,
<WIDTH x double> %vd, <WIDTH x MASK> %mask)
',
`
call void @__pseudo_scatter_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask) <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32, call void @__pseudo_scatter_factored_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
@@ -2054,6 +2327,7 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask) <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64, call void @__scatter_factored_base_offsets64_double(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x double> %vd, <WIDTH x MASK> %mask) <WIDTH x double> %vd, <WIDTH x MASK> %mask)
')
ret void ret void
} }

View File

@@ -212,6 +212,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
// This is the case for most of them // This is the case for most of them
t->hasHalf = t->hasRand = t->hasTranscendentals = false; t->hasHalf = t->hasRand = t->hasTranscendentals = false;
t->hasGather = t->hasScatter = false;
if (!strcasecmp(isa, "sse2")) { if (!strcasecmp(isa, "sse2")) {
t->isa = Target::SSE2; t->isa = Target::SSE2;

8
ispc.h
View File

@@ -252,9 +252,15 @@ struct Target {
conversions. */ conversions. */
bool hasHalf; bool hasHalf;
/** Indicates whether there is an ISA random number instruciton. */ /** Indicates whether there is an ISA random number instruction. */
bool hasRand; bool hasRand;
/** Indicates whether the target has a native gather instruction */
bool hasGather;
/** Indicates whether the target has a native scatter instruction */
bool hasScatter;
/** Indicates whether the target has support for transcendentals (beyond /** Indicates whether the target has support for transcendentals (beyond
sqrt, which we assume that all of them handle). */ sqrt, which we assume that all of them handle). */
bool hasTranscendentals; bool hasTranscendentals;

732
opt.cpp
View File

@@ -225,7 +225,6 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
} }
#if 0
static llvm::Instruction * static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::Value *arg2, llvm::Value *arg3, const char *name, llvm::Value *arg2, llvm::Value *arg3, const char *name,
@@ -234,7 +233,6 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]); llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]);
return llvm::CallInst::Create(func, newArgArray, name, insertBefore); return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
} }
#endif
static llvm::Instruction * static llvm::Instruction *
lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
@@ -1673,6 +1671,39 @@ lOffsets32BitSafe(llvm::Value **variableOffsetPtr,
} }
/** Check to see if the single offset vector can safely be represented with
32-bit values. If so, return true and update the pointed-to
llvm::Value * to be the 32-bit equivalent. */
static bool
lOffsets32BitSafe(llvm::Value **offsetPtr,
llvm::Instruction *insertBefore) {
llvm::Value *offset = *offsetPtr;
if (offset->getType() == LLVMTypes::Int32VectorType)
return true;
llvm::SExtInst *sext = llvm::dyn_cast<llvm::SExtInst>(offset);
if (sext != NULL &&
sext->getOperand(0)->getType() == LLVMTypes::Int32VectorType) {
// sext of a 32-bit vector -> the 32-bit vector is good
*offsetPtr = sext->getOperand(0);
return true;
}
else if (lVectorIs32BitInts(offset)) {
// The only constant vector we should have here is a vector of
// all zeros (i.e. a ConstantAggregateZero, but just in case,
// do the more general check with lVectorIs32BitInts().
*offsetPtr =
new llvm::TruncInst(offset, LLVMTypes::Int32VectorType,
LLVMGetName(offset, "_trunc"),
insertBefore);
return true;
}
else
return false;
}
static bool static bool
lGSToGSBaseOffsets(llvm::CallInst *callInst) { lGSToGSBaseOffsets(llvm::CallInst *callInst) {
struct GSInfo { struct GSInfo {
@@ -1689,57 +1720,153 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
}; };
GSInfo gsFuncs[] = { GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_factored_base_offsets32_i8", GSInfo("__pseudo_gather32_i8",
"__pseudo_gather_factored_base_offsets32_i8", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GSInfo("__pseudo_gather32_i16", "__pseudo_gather_factored_base_offsets32_i16", "__pseudo_gather_factored_base_offsets32_i8",
"__pseudo_gather_factored_base_offsets32_i16", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GSInfo("__pseudo_gather32_i32", "__pseudo_gather_factored_base_offsets32_i32", "__pseudo_gather_factored_base_offsets32_i8",
"__pseudo_gather_factored_base_offsets32_i32", true), true),
GSInfo("__pseudo_gather32_float", "__pseudo_gather_factored_base_offsets32_float", GSInfo("__pseudo_gather32_i16",
"__pseudo_gather_factored_base_offsets32_float", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSInfo("__pseudo_gather32_i64", "__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_factored_base_offsets32_i64", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSInfo("__pseudo_gather32_double", "__pseudo_gather_factored_base_offsets32_double", "__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_factored_base_offsets32_double", true), true),
GSInfo("__pseudo_gather32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSInfo("__pseudo_gather32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSInfo("__pseudo_gather32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSInfo("__pseudo_gather32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_factored_base_offsets32_i8", GSInfo("__pseudo_scatter32_i8",
"__pseudo_scatter_factored_base_offsets32_i8", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_scatter_factored_base_offsets32_i16", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_scatter_factored_base_offsets32_i32", false), false),
GSInfo("__pseudo_scatter32_float", "__pseudo_scatter_factored_base_offsets32_float", GSInfo("__pseudo_scatter32_i16",
"__pseudo_scatter_factored_base_offsets32_float", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_factored_base_offsets32_i64", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSInfo("__pseudo_scatter32_double", "__pseudo_scatter_factored_base_offsets32_double", "__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_factored_base_offsets32_double", false), false),
GSInfo("__pseudo_scatter32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSInfo("__pseudo_scatter32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSInfo("__pseudo_scatter32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSInfo("__pseudo_scatter32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
GSInfo("__pseudo_gather64_i8", "__pseudo_gather_factored_base_offsets64_i8", GSInfo("__pseudo_gather64_i8",
"__pseudo_gather_factored_base_offsets32_i8", true), g->target.hasGather ? "__pseudo_gather_base_offsets64_i8" :
GSInfo("__pseudo_gather64_i16", "__pseudo_gather_factored_base_offsets64_i16", "__pseudo_gather_factored_base_offsets64_i8",
"__pseudo_gather_factored_base_offsets32_i16", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GSInfo("__pseudo_gather64_i32", "__pseudo_gather_factored_base_offsets64_i32", "__pseudo_gather_factored_base_offsets32_i8",
"__pseudo_gather_factored_base_offsets32_i32", true), true),
GSInfo("__pseudo_gather64_float", "__pseudo_gather_factored_base_offsets64_float", GSInfo("__pseudo_gather64_i16",
"__pseudo_gather_factored_base_offsets32_float", true), g->target.hasGather ? "__pseudo_gather_base_offsets64_i16" :
GSInfo("__pseudo_gather64_i64", "__pseudo_gather_factored_base_offsets64_i64", "__pseudo_gather_factored_base_offsets64_i16",
"__pseudo_gather_factored_base_offsets32_i64", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSInfo("__pseudo_gather64_double", "__pseudo_gather_factored_base_offsets64_double", "__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_factored_base_offsets32_double", true), true),
GSInfo("__pseudo_gather64_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSInfo("__pseudo_gather64_float",
g->target.hasGather ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSInfo("__pseudo_gather64_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSInfo("__pseudo_gather64_double",
g->target.hasGather ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_factored_base_offsets64_i8", GSInfo("__pseudo_scatter64_i8",
"__pseudo_scatter_factored_base_offsets32_i8", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i8" :
GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_factored_base_offsets64_i16", "__pseudo_scatter_factored_base_offsets64_i8",
"__pseudo_scatter_factored_base_offsets32_i16", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_factored_base_offsets64_i32", "__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_scatter_factored_base_offsets32_i32", false), false),
GSInfo("__pseudo_scatter64_float", "__pseudo_scatter_factored_base_offsets64_float", GSInfo("__pseudo_scatter64_i16",
"__pseudo_scatter_factored_base_offsets32_float", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i16" :
GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_factored_base_offsets64_i64", "__pseudo_scatter_factored_base_offsets64_i16",
"__pseudo_scatter_factored_base_offsets32_i64", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_factored_base_offsets64_double", "__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_factored_base_offsets32_double", false), false),
GSInfo("__pseudo_scatter64_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSInfo("__pseudo_scatter64_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSInfo("__pseudo_scatter64_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSInfo("__pseudo_scatter64_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
}; };
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
@@ -1771,6 +1898,59 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
// to the next instruction... // to the next instruction...
return false; return false;
// Cast the base pointer to a void *, since that's what the
// __pseudo_*_base_offsets_* functions want.
basePtr = new llvm::IntToPtrInst(basePtr, LLVMTypes::VoidPointerType,
LLVMGetName(basePtr, "_2void"), callInst);
lCopyMetadata(basePtr, callInst);
llvm::Function *gatherScatterFunc = info->baseOffsetsFunc;
if ((info->isGather == true && g->target.hasGather) ||
(info->isGather == false && g->target.hasScatter)) {
// See if the offsets are scaled by 2, 4, or 8. If so,
// extract that scale factor and rewrite the offsets to remove
// it.
llvm::Value *offsetScale = lExtractOffsetVector248Scale(&offsetVector);
// If we're doing 32-bit addressing on a 64-bit target, here we
// will see if we can call one of the 32-bit variants of the pseudo
// gather/scatter functions.
if (g->opt.force32BitAddressing &&
lOffsets32BitSafe(&offsetVector, callInst)) {
gatherScatterFunc = info->baseOffsets32Func;
}
if (info->isGather) {
llvm::Value *mask = callInst->getArgOperand(1);
// Generate a new function call to the next pseudo gather
// base+offsets instruction. Note that we're passing a NULL
// llvm::Instruction to llvm::CallInst::Create; this means that
// the instruction isn't inserted into a basic block and that
// way we can then call ReplaceInstWithInst().
llvm::Instruction *newCall =
lCallInst(gatherScatterFunc, basePtr, offsetScale, offsetVector,
mask, callInst->getName().str().c_str(),
NULL);
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
else {
llvm::Value *storeValue = callInst->getArgOperand(1);
llvm::Value *mask = callInst->getArgOperand(2);
// Generate a new function call to the next pseudo scatter
// base+offsets instruction. See above for why passing NULL
// for the Instruction * is intended.
llvm::Instruction *newCall =
lCallInst(gatherScatterFunc, basePtr, offsetScale,
offsetVector, storeValue, mask, "", NULL);
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
}
else {
// Try to decompose the offset vector into a compile time constant // Try to decompose the offset vector into a compile time constant
// component and a varying component. The constant component is // component and a varying component. The constant component is
// passed as a separate parameter to the gather/scatter functions, // passed as a separate parameter to the gather/scatter functions,
@@ -1790,14 +1970,6 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
// 2/4/8 offered by x86 addressing operators.) // 2/4/8 offered by x86 addressing operators.)
llvm::Value *offsetScale = lExtractOffsetVector248Scale(&variableOffset); llvm::Value *offsetScale = lExtractOffsetVector248Scale(&variableOffset);
// Cast the base pointer to a void *, since that's what the
// __pseudo_*_base_offsets_* functions want.
basePtr = new llvm::IntToPtrInst(basePtr, LLVMTypes::VoidPointerType,
LLVMGetName(basePtr, "_2void"), callInst);
lCopyMetadata(basePtr, callInst);
llvm::Function *gatherScatterFunc = info->baseOffsetsFunc;
// If we're doing 32-bit addressing on a 64-bit target, here we // If we're doing 32-bit addressing on a 64-bit target, here we
// will see if we can call one of the 32-bit variants of the pseudo // will see if we can call one of the 32-bit variants of the pseudo
// gather/scatter functions. // gather/scatter functions.
@@ -1834,7 +2006,7 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
lCopyMetadata(newCall, callInst); lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall); llvm::ReplaceInstWithInst(callInst, newCall);
} }
}
return true; return true;
} }
@@ -1858,57 +2030,67 @@ lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
}; };
GSBOInfo gsFuncs[] = { GSBOInfo gsFuncs[] = {
GSBOInfo("__pseudo_gather_factored_base_offsets32_i8", GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8", true), "__pseudo_gather_factored_base_offsets32_i8",
GSBOInfo("__pseudo_gather_factored_base_offsets32_i16", g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i16", true), "__pseudo_gather_factored_base_offsets32_i8",
GSBOInfo("__pseudo_gather_factored_base_offsets32_i32", true),
"__pseudo_gather_factored_base_offsets32_i32", true), GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSBOInfo("__pseudo_gather_factored_base_offsets32_float", "__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_factored_base_offsets32_float", true), g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSBOInfo("__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i16",
"__pseudo_gather_factored_base_offsets32_i64", true), true),
GSBOInfo("__pseudo_gather_factored_base_offsets32_double", GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_double", true), "__pseudo_gather_factored_base_offsets32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSBOInfo( "__pseudo_scatter_factored_base_offsets32_i8", GSBOInfo( g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8", false), "__pseudo_scatter_factored_base_offsets32_i8",
GSBOInfo("__pseudo_scatter_factored_base_offsets32_i16", g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i16", false), "__pseudo_scatter_factored_base_offsets32_i8",
GSBOInfo("__pseudo_scatter_factored_base_offsets32_i32", false),
"__pseudo_scatter_factored_base_offsets32_i32", false), GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSBOInfo("__pseudo_scatter_factored_base_offsets32_float", "__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_factored_base_offsets32_float", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSBOInfo("__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_scatter_factored_base_offsets32_i64", false), false),
GSBOInfo("__pseudo_scatter_factored_base_offsets32_double", GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_double", false), "__pseudo_scatter_factored_base_offsets32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
GSBOInfo( "__pseudo_gather_factored_base_offsets64_i8", "__pseudo_scatter_factored_base_offsets32_i32",
"__pseudo_gather_factored_base_offsets32_i8", true), false),
GSBOInfo("__pseudo_gather_factored_base_offsets64_i16", GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_i16", true), "__pseudo_scatter_factored_base_offsets32_float",
GSBOInfo("__pseudo_gather_factored_base_offsets64_i32", g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_i32", true), "__pseudo_scatter_factored_base_offsets32_float",
GSBOInfo("__pseudo_gather_factored_base_offsets64_float", false),
"__pseudo_gather_factored_base_offsets32_float", true), GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
GSBOInfo("__pseudo_gather_factored_base_offsets64_i64", "__pseudo_scatter_factored_base_offsets32_i64",
"__pseudo_gather_factored_base_offsets32_i64", true), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
GSBOInfo("__pseudo_gather_factored_base_offsets64_double", "__pseudo_scatter_factored_base_offsets32_i64",
"__pseudo_gather_factored_base_offsets32_double", true), false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
GSBOInfo( "__pseudo_scatter_factored_base_offsets64_i8", "__pseudo_scatter_factored_base_offsets32_double",
"__pseudo_scatter_factored_base_offsets32_i8", false), g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
GSBOInfo("__pseudo_scatter_factored_base_offsets64_i16", "__pseudo_scatter_factored_base_offsets32_double",
"__pseudo_scatter_factored_base_offsets32_i16", false), false),
GSBOInfo("__pseudo_scatter_factored_base_offsets64_i32",
"__pseudo_scatter_factored_base_offsets32_i32", false),
GSBOInfo("__pseudo_scatter_factored_base_offsets64_float",
"__pseudo_scatter_factored_base_offsets32_float", false),
GSBOInfo("__pseudo_scatter_factored_base_offsets64_i64",
"__pseudo_scatter_factored_base_offsets32_i64", false),
GSBOInfo("__pseudo_scatter_factored_base_offsets64_double",
"__pseudo_scatter_factored_base_offsets32_double", false),
}; };
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
@@ -1991,6 +2173,26 @@ lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets,
} }
static llvm::Constant *
lGetOffsetScaleVec(llvm::Value *offsetScale, llvm::Type *vecType) {
llvm::ConstantInt *offsetScaleInt =
llvm::dyn_cast<llvm::ConstantInt>(offsetScale);
Assert(offsetScaleInt != NULL);
uint64_t scaleValue = offsetScaleInt->getZExtValue();
std::vector<llvm::Constant *> scales;
for (int i = 0; i < g->target.vectorWidth; ++i) {
if (vecType == LLVMTypes::Int64VectorType)
scales.push_back(LLVMInt64(scaleValue));
else {
Assert(vecType == LLVMTypes::Int32VectorType);
scales.push_back(LLVMInt32((int32_t)scaleValue));
}
}
return llvm::ConstantVector::get(scales);
}
/** After earlier optimization passes have run, we are sometimes able to /** After earlier optimization passes have run, we are sometimes able to
determine that gathers/scatters are actually accessing memory in a more determine that gathers/scatters are actually accessing memory in a more
regular fashion and then change the operation to something simpler and regular fashion and then change the operation to something simpler and
@@ -2011,7 +2213,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
struct GatherImpInfo { struct GatherImpInfo {
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st, GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
int a) int a)
: align(a) { : align(a), isFactored(!g->target.hasGather) {
pseudoFunc = m->module->getFunction(pName); pseudoFunc = m->module->getFunction(pName);
loadMaskedFunc = m->module->getFunction(lmName); loadMaskedFunc = m->module->getFunction(lmName);
Assert(pseudoFunc != NULL && loadMaskedFunc != NULL); Assert(pseudoFunc != NULL && loadMaskedFunc != NULL);
@@ -2022,39 +2224,52 @@ lGSToLoadStore(llvm::CallInst *callInst) {
llvm::Function *loadMaskedFunc; llvm::Function *loadMaskedFunc;
llvm::Type *scalarType; llvm::Type *scalarType;
const int align; const int align;
const bool isFactored;
}; };
GatherImpInfo gInfo[] = { GatherImpInfo gInfo[] = {
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i8", "__masked_load_i8", GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
LLVMTypes::Int8Type, 1), "__pseudo_gather_factored_base_offsets32_i8",
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i16", "__masked_load_i16", "__masked_load_i8", LLVMTypes::Int8Type, 1),
LLVMTypes::Int16Type, 2), GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i32", "__masked_load_i32", "__pseudo_gather_factored_base_offsets32_i16",
LLVMTypes::Int32Type, 4), "__masked_load_i16", LLVMTypes::Int16Type, 2),
GatherImpInfo("__pseudo_gather_factored_base_offsets32_float", "__masked_load_float", GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
LLVMTypes::FloatType, 4), "__pseudo_gather_factored_base_offsets32_i32",
GatherImpInfo("__pseudo_gather_factored_base_offsets32_i64", "__masked_load_i64", "__masked_load_i32", LLVMTypes::Int32Type, 4),
LLVMTypes::Int64Type, 8), GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
GatherImpInfo("__pseudo_gather_factored_base_offsets32_double", "__masked_load_double", "__pseudo_gather_factored_base_offsets32_float",
LLVMTypes::DoubleType, 8), "__masked_load_float", LLVMTypes::FloatType, 4),
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i8", "__masked_load_i8", GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
LLVMTypes::Int8Type, 1), "__pseudo_gather_factored_base_offsets32_i64",
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i16", "__masked_load_i16", "__masked_load_i64", LLVMTypes::Int64Type, 8),
LLVMTypes::Int16Type, 2), GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i32", "__masked_load_i32", "__pseudo_gather_factored_base_offsets32_double",
LLVMTypes::Int32Type, 4), "__masked_load_double", LLVMTypes::DoubleType, 8),
GatherImpInfo("__pseudo_gather_factored_base_offsets64_float", "__masked_load_float", GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i8" :
LLVMTypes::FloatType, 4), "__pseudo_gather_factored_base_offsets64_i8",
GatherImpInfo("__pseudo_gather_factored_base_offsets64_i64", "__masked_load_i64", "__masked_load_i8", LLVMTypes::Int8Type, 1),
LLVMTypes::Int64Type, 8), GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i16" :
GatherImpInfo("__pseudo_gather_factored_base_offsets64_double", "__masked_load_double", "__pseudo_gather_factored_base_offsets64_i16",
LLVMTypes::DoubleType, 8) "__masked_load_i16", LLVMTypes::Int16Type, 2),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32",
"__masked_load_i32", LLVMTypes::Int32Type, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float",
"__masked_load_float", LLVMTypes::FloatType, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64",
"__masked_load_i64", LLVMTypes::Int64Type, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double",
"__masked_load_double", LLVMTypes::DoubleType, 8),
}; };
struct ScatterImpInfo { struct ScatterImpInfo {
ScatterImpInfo(const char *pName, const char *msName, ScatterImpInfo(const char *pName, const char *msName,
llvm::Type *vpt, int a) llvm::Type *vpt, int a)
: align(a) { : align(a), isFactored(!g->target.hasScatter) {
pseudoFunc = m->module->getFunction(pName); pseudoFunc = m->module->getFunction(pName);
maskedStoreFunc = m->module->getFunction(msName); maskedStoreFunc = m->module->getFunction(msName);
vecPtrType = vpt; vecPtrType = vpt;
@@ -2064,33 +2279,46 @@ lGSToLoadStore(llvm::CallInst *callInst) {
llvm::Function *maskedStoreFunc; llvm::Function *maskedStoreFunc;
llvm::Type *vecPtrType; llvm::Type *vecPtrType;
const int align; const int align;
const bool isFactored;
}; };
ScatterImpInfo sInfo[] = { ScatterImpInfo sInfo[] = {
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_masked_store_i8", ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
LLVMTypes::Int8VectorPointerType, 1), "__pseudo_scatter_factored_base_offsets32_i8",
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_masked_store_i16", "__pseudo_masked_store_i8", LLVMTypes::Int8VectorPointerType, 1),
LLVMTypes::Int16VectorPointerType, 2), ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_masked_store_i32", "__pseudo_scatter_factored_base_offsets32_i16",
LLVMTypes::Int32VectorPointerType, 4), "__pseudo_masked_store_i16", LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_float", "__pseudo_masked_store_float", ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
LLVMTypes::FloatVectorPointerType, 4), "__pseudo_scatter_factored_base_offsets32_i32",
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_masked_store_i64", "__pseudo_masked_store_i32", LLVMTypes::Int32VectorPointerType, 4),
LLVMTypes::Int64VectorPointerType, 8), ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
ScatterImpInfo("__pseudo_scatter_factored_base_offsets32_double", "__pseudo_masked_store_double", "__pseudo_scatter_factored_base_offsets32_float",
LLVMTypes::DoubleVectorPointerType, 8), "__pseudo_masked_store_float", LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i8", "__pseudo_masked_store_i8", ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
LLVMTypes::Int8VectorPointerType, 1), "__pseudo_scatter_factored_base_offsets32_i64",
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i16", "__pseudo_masked_store_i16", "__pseudo_masked_store_i64", LLVMTypes::Int64VectorPointerType, 8),
LLVMTypes::Int16VectorPointerType, 2), ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i32", "__pseudo_masked_store_i32", "__pseudo_scatter_factored_base_offsets32_double",
LLVMTypes::Int32VectorPointerType, 4), "__pseudo_masked_store_double", LLVMTypes::DoubleVectorPointerType, 8),
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_float", "__pseudo_masked_store_float", ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i8" :
LLVMTypes::FloatVectorPointerType, 4), "__pseudo_scatter_factored_base_offsets64_i8",
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_i64", "__pseudo_masked_store_i64", "__pseudo_masked_store_i8", LLVMTypes::Int8VectorPointerType, 1),
LLVMTypes::Int64VectorPointerType, 8), ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i16" :
ScatterImpInfo("__pseudo_scatter_factored_base_offsets64_double", "__pseudo_masked_store_double", "__pseudo_scatter_factored_base_offsets64_i16",
LLVMTypes::DoubleVectorPointerType, 8) "__pseudo_masked_store_i16", LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32",
"__pseudo_masked_store_i32", LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float",
"__pseudo_masked_store_float", LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64",
"__pseudo_masked_store_i64", LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double",
"__pseudo_masked_store_double", LLVMTypes::DoubleVectorPointerType, 8),
}; };
llvm::Function *calledFunc = callInst->getCalledFunction(); llvm::Function *calledFunc = callInst->getCalledFunction();
@@ -2118,34 +2346,45 @@ lGSToLoadStore(llvm::CallInst *callInst) {
lGetSourcePosFromMetadata(callInst, &pos); lGetSourcePosFromMetadata(callInst, &pos);
llvm::Value *base = callInst->getArgOperand(0); llvm::Value *base = callInst->getArgOperand(0);
llvm::Value *fullOffsets = NULL;
llvm::Value *storeValue = NULL;
llvm::Value *mask = NULL;
if ((gatherInfo != NULL && gatherInfo->isFactored) ||
(scatterInfo != NULL && scatterInfo->isFactored)) {
llvm::Value *varyingOffsets = callInst->getArgOperand(1); llvm::Value *varyingOffsets = callInst->getArgOperand(1);
llvm::Value *offsetScale = callInst->getArgOperand(2); llvm::Value *offsetScale = callInst->getArgOperand(2);
llvm::Value *constOffsets = callInst->getArgOperand(3); llvm::Value *constOffsets = callInst->getArgOperand(3);
llvm::Value *storeValue = (scatterInfo != NULL) ? callInst->getArgOperand(4) : NULL; if (scatterInfo)
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 4 : 5); storeValue = callInst->getArgOperand(4);
mask = callInst->getArgOperand((gatherInfo != NULL) ? 4 : 5);
// Compute the full offset vector: offsetScale * varyingOffsets + constOffsets // Compute the full offset vector: offsetScale * varyingOffsets + constOffsets
llvm::ConstantInt *offsetScaleInt = llvm::Constant *offsetScaleVec =
llvm::dyn_cast<llvm::ConstantInt>(offsetScale); lGetOffsetScaleVec(offsetScale, varyingOffsets->getType());
Assert(offsetScaleInt != NULL);
uint64_t scaleValue = offsetScaleInt->getZExtValue();
std::vector<llvm::Constant *> scales;
for (int i = 0; i < g->target.vectorWidth; ++i) {
if (varyingOffsets->getType() == LLVMTypes::Int64VectorType)
scales.push_back(LLVMInt64(scaleValue));
else
scales.push_back(LLVMInt32((int32_t)scaleValue));
}
llvm::Constant *offsetScaleVec = llvm::ConstantVector::get(scales);
llvm::Value *scaledVarying = llvm::Value *scaledVarying =
llvm::BinaryOperator::Create(llvm::Instruction::Mul, offsetScaleVec, llvm::BinaryOperator::Create(llvm::Instruction::Mul, offsetScaleVec,
varyingOffsets, "scaled_varying", callInst); varyingOffsets, "scaled_varying", callInst);
llvm::Value *fullOffsets = fullOffsets =
llvm::BinaryOperator::Create(llvm::Instruction::Add, scaledVarying, llvm::BinaryOperator::Create(llvm::Instruction::Add, scaledVarying,
constOffsets, "varying+const_offsets", constOffsets, "varying+const_offsets",
callInst); callInst);
}
else {
if (scatterInfo)
storeValue = callInst->getArgOperand(3);
mask = callInst->getArgOperand((gatherInfo != NULL) ? 3 : 4);
llvm::Value *offsetScale = callInst->getArgOperand(1);
llvm::Value *offsets = callInst->getArgOperand(2);
llvm::Value *offsetScaleVec =
lGetOffsetScaleVec(offsetScale, offsets->getType());
fullOffsets =
llvm::BinaryOperator::Create(llvm::Instruction::Mul, offsetScaleVec,
offsets, "scaled_offsets", callInst);
}
Debug(SourcePos(), "GSToLoadStore: %s.", Debug(SourcePos(), "GSToLoadStore: %s.",
fullOffsets->getName().str().c_str()); fullOffsets->getName().str().c_str());
@@ -3631,7 +3870,6 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
: isGather(ig) { : isGather(ig) {
pseudoFunc = m->module->getFunction(pName); pseudoFunc = m->module->getFunction(pName);
actualFunc = m->module->getFunction(aName); actualFunc = m->module->getFunction(aName);
Assert(pseudoFunc != NULL && actualFunc != NULL);
} }
llvm::Function *pseudoFunc; llvm::Function *pseudoFunc;
llvm::Function *actualFunc; llvm::Function *actualFunc;
@@ -3639,20 +3877,6 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
}; };
LowerGSInfo lgsInfo[] = { LowerGSInfo lgsInfo[] = {
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i8", "__gather_factored_base_offsets32_i8", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i16", "__gather_factored_base_offsets32_i16", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i32", "__gather_factored_base_offsets32_i32", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_float", "__gather_factored_base_offsets32_float", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i64", "__gather_factored_base_offsets32_i64", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_double", "__gather_factored_base_offsets32_double", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i8", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i16", "__gather_factored_base_offsets64_i16", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i32", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_float", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i64", "__gather_factored_base_offsets64_i64", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_double", "__gather_factored_base_offsets64_double", true),
LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true), LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true),
LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true), LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true),
LowerGSInfo("__pseudo_gather32_i32", "__gather32_i32", true), LowerGSInfo("__pseudo_gather32_i32", "__gather32_i32", true),
@@ -3667,19 +3891,57 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true), LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true),
LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true), LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i8", "__scatter_factored_base_offsets32_i8", false), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i8",
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i16", "__scatter_factored_base_offsets32_i16", false), "__gather_factored_base_offsets32_i8", true),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i32", "__scatter_factored_base_offsets32_i32", false), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i16",
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_float", "__scatter_factored_base_offsets32_float", false), "__gather_factored_base_offsets32_i16", true),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i64", "__scatter_factored_base_offsets32_i64", false), LowerGSInfo("__pseudo_gather_factored_base_offsets32_i32",
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_double", "__scatter_factored_base_offsets32_double", false), "__gather_factored_base_offsets32_i32", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_float",
"__gather_factored_base_offsets32_float", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i64",
"__gather_factored_base_offsets32_i64", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_double",
"__gather_factored_base_offsets32_double", true),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i8", false), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i8",
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i16", "__scatter_factored_base_offsets64_i16", false), "__gather_factored_base_offsets64_i8", true),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i32", false), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i16",
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_float", false), "__gather_factored_base_offsets64_i16", true),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i64", "__scatter_factored_base_offsets64_i64", false), LowerGSInfo("__pseudo_gather_factored_base_offsets64_i32",
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_double", "__scatter_factored_base_offsets64_double", false), "__gather_factored_base_offsets64_i32", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_float",
"__gather_factored_base_offsets64_float", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i64",
"__gather_factored_base_offsets64_i64", true),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_double",
"__gather_factored_base_offsets64_double", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i8",
"__gather_base_offsets32_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i16",
"__gather_base_offsets32_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i32",
"__gather_base_offsets32_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets32_float",
"__gather_base_offsets32_float", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i64",
"__gather_base_offsets32_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets32_double",
"__gather_base_offsets32_double", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i8",
"__gather_base_offsets64_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i16",
"__gather_base_offsets64_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i32",
"__gather_base_offsets64_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets64_float",
"__gather_base_offsets64_float", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i64",
"__gather_base_offsets64_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets64_double",
"__gather_base_offsets64_double", true),
LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false), LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false),
LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false), LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false),
@@ -3694,6 +3956,59 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
LowerGSInfo("__pseudo_scatter64_float", "__scatter64_float", false), LowerGSInfo("__pseudo_scatter64_float", "__scatter64_float", false),
LowerGSInfo("__pseudo_scatter64_i64", "__scatter64_i64", false), LowerGSInfo("__pseudo_scatter64_i64", "__scatter64_i64", false),
LowerGSInfo("__pseudo_scatter64_double", "__scatter64_double", false), LowerGSInfo("__pseudo_scatter64_double", "__scatter64_double", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i8",
"__scatter_factored_base_offsets32_i8", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i16",
"__scatter_factored_base_offsets32_i16", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i32",
"__scatter_factored_base_offsets32_i32", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_float",
"__scatter_factored_base_offsets32_float", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i64",
"__scatter_factored_base_offsets32_i64", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_double",
"__scatter_factored_base_offsets32_double", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i8",
"__scatter_factored_base_offsets64_i8", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i16",
"__scatter_factored_base_offsets64_i16", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i32",
"__scatter_factored_base_offsets64_i32", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_float",
"__scatter_factored_base_offsets64_float", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i64",
"__scatter_factored_base_offsets64_i64", false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_double",
"__scatter_factored_base_offsets64_double", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i8",
"__scatter_base_offsets32_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i16",
"__scatter_base_offsets32_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i32",
"__scatter_base_offsets32_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_float",
"__scatter_base_offsets32_float", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i64",
"__scatter_base_offsets32_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_double",
"__scatter_base_offsets32_double", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i8",
"__scatter_base_offsets64_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i16",
"__scatter_base_offsets64_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i32",
"__scatter_base_offsets64_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_float",
"__scatter_base_offsets64_float", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i64",
"__scatter_base_offsets64_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_double",
"__scatter_base_offsets64_double", false),
}; };
llvm::Function *calledFunc = callInst->getCalledFunction(); llvm::Function *calledFunc = callInst->getCalledFunction();
@@ -3709,6 +4024,7 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
if (info == NULL) if (info == NULL)
return false; return false;
Assert(info->actualFunc != NULL);
// Get the source position from the metadata attached to the call // Get the source position from the metadata attached to the call
// instruction so that we can issue PerformanceWarning()s below. // instruction so that we can issue PerformanceWarning()s below.
@@ -3905,6 +4221,12 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
"__gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i16", "__gather_factored_base_offsets64_i8", "__gather_factored_base_offsets64_i16",
"__gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i64", "__gather_factored_base_offsets64_i32", "__gather_factored_base_offsets64_i64",
"__gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_double", "__gather_factored_base_offsets64_float", "__gather_factored_base_offsets64_double",
"__gather_base_offsets32_i8", "__gather_base_offsets32_i16",
"__gather_base_offsets32_i32", "__gather_base_offsets32_i64",
"__gather_base_offsets32_float", "__gather_base_offsets32_double",
"__gather_base_offsets64_i8", "__gather_base_offsets64_i16",
"__gather_base_offsets64_i32", "__gather_base_offsets64_i64",
"__gather_base_offsets64_float", "__gather_base_offsets64_double",
"__gather32_i8", "__gather32_i16", "__gather32_i8", "__gather32_i16",
"__gather32_i32", "__gather32_i64", "__gather32_i32", "__gather32_i64",
"__gather32_float", "__gather32_double", "__gather32_float", "__gather32_double",
@@ -3932,6 +4254,12 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
"__scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i16", "__scatter_factored_base_offsets64_i8", "__scatter_factored_base_offsets64_i16",
"__scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i64", "__scatter_factored_base_offsets64_i32", "__scatter_factored_base_offsets64_i64",
"__scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_double", "__scatter_factored_base_offsets64_float", "__scatter_factored_base_offsets64_double",
"__scatter_base_offsets32_i8", "__scatter_base_offsets32_i16",
"__scatter_base_offsets32_i32", "__scatter_base_offsets32_i64",
"__scatter_base_offsets32_float", "__scatter_base_offsets32_double",
"__scatter_base_offsets64_i8", "__scatter_base_offsets64_i16",
"__scatter_base_offsets64_i32", "__scatter_base_offsets64_i64",
"__scatter_base_offsets64_float", "__scatter_base_offsets64_double",
"__scatter_elt32_i8", "__scatter_elt32_i16", "__scatter_elt32_i8", "__scatter_elt32_i16",
"__scatter_elt32_i32", "__scatter_elt32_i64", "__scatter_elt32_i32", "__scatter_elt32_i64",
"__scatter_elt32_float", "__scatter_elt32_double", "__scatter_elt32_float", "__scatter_elt32_double",