Extend gather-scatter optimization with prefetch optimization

This commit is contained in:
Vsevolod Livinskiy
2014-09-25 15:59:31 +04:00
parent 8ff187a7b7
commit 0a6eb61ad0
9 changed files with 204 additions and 127 deletions

View File

@@ -503,6 +503,7 @@ lSetInternalFunctions(llvm::Module *module) {
"__prefetch_read_uniform_2", "__prefetch_read_uniform_2",
"__prefetch_read_uniform_3", "__prefetch_read_uniform_3",
"__prefetch_read_uniform_nt", "__prefetch_read_uniform_nt",
"__pseudo_prefetch_read_varying_1",
"__psubs_vi8", "__psubs_vi8",
"__psubs_vi16", "__psubs_vi16",
"__psubus_vi8", "__psubus_vi8",

View File

@@ -370,6 +370,8 @@ declare void @__prefetch_read_uniform_2(i8 * nocapture) nounwind
declare void @__prefetch_read_uniform_3(i8 * nocapture) nounwind declare void @__prefetch_read_uniform_3(i8 * nocapture) nounwind
declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
declare void @__prefetch_read_varying_1(<WIDTH x i64> %addr, <WIDTH x MASK> %mask) nounwind
declare void @__prefetch_read_varying_1_native(i8 * %base, i32 %scale, <WIDTH x i32> %offsets, <WIDTH x MASK> %mask) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins ;; int8/int16 builtins

View File

@@ -1584,6 +1584,17 @@ define void @__prefetch_read_uniform_nt(i8 *) alwaysinline {
call void @llvm.prefetch(i8 * %0, i32 0, i32 0, i32 1) call void @llvm.prefetch(i8 * %0, i32 0, i32 0, i32 1)
ret void ret void
} }
define void @__prefetch_read_varying_1(<WIDTH x i64> %addr, <WIDTH x MASK> %mask) alwaysinline {
per_lane(WIDTH, <WIDTH x MASK> %mask, `
%iptr_LANE_ID = extractelement <WIDTH x i64> %addr, i32 LANE
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8*
call void @llvm.prefetch(i8 * %ptr_LANE_ID, i32 0, i32 3, i32 1)
')
ret void
}
declare void @__prefetch_read_varying_1_native(i8 * %base, i32 %scale, <WIDTH x i32> %offsets, <WIDTH x MASK> %mask) nounwind
') ')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -2535,6 +2546,13 @@ declare void
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>, @__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x double>, <WIDTH x MASK>) nounwind <WIDTH x double>, <WIDTH x MASK>) nounwind
declare void @__pseudo_prefetch_read_varying_1(<WIDTH x i64>, <WIDTH x MASK>) nounwind
declare void
@__pseudo_prefetch_read_varying_1_native(i8 *, i32, <WIDTH x i32>,
<WIDTH x MASK>) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
declare void @__use8(<WIDTH x i8>) declare void @__use8(<WIDTH x i8>)
@@ -3034,6 +3052,17 @@ ifelse(HAVE_SCATTER, `1',
<WIDTH x double> %vd, <WIDTH x MASK> %mask) <WIDTH x double> %vd, <WIDTH x MASK> %mask)
') ')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; prefetchs
call void @__pseudo_prefetch_read_varying_1(<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_prefetch_read_varying_1_native(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__prefetch_read_varying_1_native(i8 * %ptr, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__prefetch_read_varying_1(<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
ret void ret void
} }

View File

@@ -2550,6 +2550,16 @@ static FORCEINLINE void __prefetch_read_uniform_nt(unsigned char *p) {
// _mm_prefetch(p, _MM_HINT_NTA); // prefetch into L1$ with non-temporal hint // _mm_prefetch(p, _MM_HINT_NTA); // prefetch into L1$ with non-temporal hint
} }
static FORCEINLINE void __prefetch_read_varying_1_native(uint8_t *base, uint32_t scale,
__vec16_i32 offsets, __vec16_i1 mask) {
_mm512_mask_prefetch_i32gather_ps (offsets, mask, base, scale, _MM_HINT_T0);
offsets = _mm512_permutevar_epi32(_mm512_set_16to16_pi(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8), offsets);
__vec16_i1 copy_mask = _mm512_kmov(mask);
_mm512_kswapb(mask, copy_mask);
_mm512_mask_prefetch_i32gather_ps (offsets, mask, base, scale, _MM_HINT_T0);
}
static FORCEINLINE void __prefetch_read_varying_1(__vec16_i64 addr, __vec16_i1 mask) {}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// atomics // atomics
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@@ -1926,6 +1926,14 @@ static FORCEINLINE void __prefetch_read_uniform_nt(const char *p) {
// _mm_prefetch(p, _MM_HINT_NTA); // prefetch into L1$ with non-temporal hint // _mm_prefetch(p, _MM_HINT_NTA); // prefetch into L1$ with non-temporal hint
} }
static FORCEINLINE void __prefetch_read_varying_1_native(uint8_t *base, uint32_t scale,
__vec16_i32 offsets, __vec16_i1 mask) {
_mm512_prefetch_i32gather_ps(offsets, base, scale, _MM_HINT_T0);
offsets = _mm512_permutevar_epi32(_mm512_set_16to16_pi(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15), offsets);
/* TODO: permutevar mask */
_mm512_prefetch_i32gather_ps(offsets, base, scale, _MM_HINT_T0);
}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// atomics // atomics
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@@ -199,7 +199,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_hasTranscendentals(false), m_hasTranscendentals(false),
m_hasTrigonometry(false), m_hasTrigonometry(false),
m_hasRsqrtd(false), m_hasRsqrtd(false),
m_hasRcpd(false) m_hasRcpd(false),
m_hasVecPrefetch(false)
{ {
if (isa == NULL) { if (isa == NULL) {
if (cpu != NULL) { if (cpu != NULL) {
@@ -381,6 +382,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = false; this->m_hasTrigonometry = false;
this->m_hasGather = this->m_hasScatter = true; this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true; this->m_hasRsqrtd = this->m_hasRcpd = true;
// It's set to true, because MIC has hardware vector prefetch instruction
this->m_hasVecPrefetch = true;
} }
else if (!strcasecmp(isa, "generic-32") || else if (!strcasecmp(isa, "generic-32") ||
!strcasecmp(isa, "generic-x32")) { !strcasecmp(isa, "generic-x32")) {

5
ispc.h
View File

@@ -280,6 +280,8 @@ public:
bool hasRcpd() const {return m_hasRcpd;} bool hasRcpd() const {return m_hasRcpd;}
bool hasVecPrefetch() const {return m_hasVecPrefetch;}
private: private:
/** llvm Target object representing this target. */ /** llvm Target object representing this target. */
@@ -382,6 +384,9 @@ private:
/** Indicates whether there is an ISA double precision rcp. */ /** Indicates whether there is an ISA double precision rcp. */
bool m_hasRcpd; bool m_hasRcpd;
/** Indicates whether the target has hardware instruction for vector prefetch. */
bool m_hasVecPrefetch;
}; };

263
opt.cpp
View File

@@ -1994,8 +1994,8 @@ static bool
lGSToGSBaseOffsets(llvm::CallInst *callInst) { lGSToGSBaseOffsets(llvm::CallInst *callInst) {
struct GSInfo { struct GSInfo {
GSInfo(const char *pgFuncName, const char *pgboFuncName, GSInfo(const char *pgFuncName, const char *pgboFuncName,
const char *pgbo32FuncName, bool ig) const char *pgbo32FuncName, bool ig, bool ip)
: isGather(ig) { : isGather(ig), isPrefetch(ip) {
func = m->module->getFunction(pgFuncName); func = m->module->getFunction(pgFuncName);
baseOffsetsFunc = m->module->getFunction(pgboFuncName); baseOffsetsFunc = m->module->getFunction(pgboFuncName);
baseOffsets32Func = m->module->getFunction(pgbo32FuncName); baseOffsets32Func = m->module->getFunction(pgbo32FuncName);
@@ -2003,6 +2003,7 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
llvm::Function *func; llvm::Function *func;
llvm::Function *baseOffsetsFunc, *baseOffsets32Func; llvm::Function *baseOffsetsFunc, *baseOffsets32Func;
const bool isGather; const bool isGather;
const bool isPrefetch;
}; };
GSInfo gsFuncs[] = { GSInfo gsFuncs[] = {
@@ -2011,148 +2012,155 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
"__pseudo_gather_factored_base_offsets32_i8", "__pseudo_gather_factored_base_offsets32_i8",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8", "__pseudo_gather_factored_base_offsets32_i8",
true), true, false),
GSInfo("__pseudo_gather32_i16", GSInfo("__pseudo_gather32_i16",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16", "__pseudo_gather_factored_base_offsets32_i16",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16", "__pseudo_gather_factored_base_offsets32_i16",
true), true, false),
GSInfo("__pseudo_gather32_i32", GSInfo("__pseudo_gather32_i32",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32", "__pseudo_gather_factored_base_offsets32_i32",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32", "__pseudo_gather_factored_base_offsets32_i32",
true), true, false),
GSInfo("__pseudo_gather32_float", GSInfo("__pseudo_gather32_float",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float", "__pseudo_gather_factored_base_offsets32_float",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float", "__pseudo_gather_factored_base_offsets32_float",
true), true, false),
GSInfo("__pseudo_gather32_i64", GSInfo("__pseudo_gather32_i64",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i64",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i64",
true), true, false),
GSInfo("__pseudo_gather32_double", GSInfo("__pseudo_gather32_double",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double", "__pseudo_gather_factored_base_offsets32_double",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double", "__pseudo_gather_factored_base_offsets32_double",
true), true, false),
GSInfo("__pseudo_scatter32_i8", GSInfo("__pseudo_scatter32_i8",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
false), false, false),
GSInfo("__pseudo_scatter32_i16", GSInfo("__pseudo_scatter32_i16",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
false), false, false),
GSInfo("__pseudo_scatter32_i32", GSInfo("__pseudo_scatter32_i32",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
false), false, false),
GSInfo("__pseudo_scatter32_float", GSInfo("__pseudo_scatter32_float",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float", "__pseudo_scatter_factored_base_offsets32_float",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float", "__pseudo_scatter_factored_base_offsets32_float",
false), false, false),
GSInfo("__pseudo_scatter32_i64", GSInfo("__pseudo_scatter32_i64",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
false), false, false),
GSInfo("__pseudo_scatter32_double", GSInfo("__pseudo_scatter32_double",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double", "__pseudo_scatter_factored_base_offsets32_double",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double", "__pseudo_scatter_factored_base_offsets32_double",
false), false, false),
GSInfo("__pseudo_gather64_i8", GSInfo("__pseudo_gather64_i8",
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i8" : g->target->hasGather() ? "__pseudo_gather_base_offsets64_i8" :
"__pseudo_gather_factored_base_offsets64_i8", "__pseudo_gather_factored_base_offsets64_i8",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8", "__pseudo_gather_factored_base_offsets32_i8",
true), true, false),
GSInfo("__pseudo_gather64_i16", GSInfo("__pseudo_gather64_i16",
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i16" : g->target->hasGather() ? "__pseudo_gather_base_offsets64_i16" :
"__pseudo_gather_factored_base_offsets64_i16", "__pseudo_gather_factored_base_offsets64_i16",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16", "__pseudo_gather_factored_base_offsets32_i16",
true), true, false),
GSInfo("__pseudo_gather64_i32", GSInfo("__pseudo_gather64_i32",
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i32" : g->target->hasGather() ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32", "__pseudo_gather_factored_base_offsets64_i32",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32", "__pseudo_gather_factored_base_offsets32_i32",
true), true, false),
GSInfo("__pseudo_gather64_float", GSInfo("__pseudo_gather64_float",
g->target->hasGather() ? "__pseudo_gather_base_offsets64_float" : g->target->hasGather() ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float", "__pseudo_gather_factored_base_offsets64_float",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float", "__pseudo_gather_factored_base_offsets32_float",
true), true, false),
GSInfo("__pseudo_gather64_i64", GSInfo("__pseudo_gather64_i64",
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i64" : g->target->hasGather() ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64", "__pseudo_gather_factored_base_offsets64_i64",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i64",
true), true, false),
GSInfo("__pseudo_gather64_double", GSInfo("__pseudo_gather64_double",
g->target->hasGather() ? "__pseudo_gather_base_offsets64_double" : g->target->hasGather() ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double", "__pseudo_gather_factored_base_offsets64_double",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double", "__pseudo_gather_factored_base_offsets32_double",
true), true, false),
GSInfo("__pseudo_scatter64_i8", GSInfo("__pseudo_scatter64_i8",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i8" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i8" :
"__pseudo_scatter_factored_base_offsets64_i8", "__pseudo_scatter_factored_base_offsets64_i8",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
false), false, false),
GSInfo("__pseudo_scatter64_i16", GSInfo("__pseudo_scatter64_i16",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i16" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i16" :
"__pseudo_scatter_factored_base_offsets64_i16", "__pseudo_scatter_factored_base_offsets64_i16",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
false), false, false),
GSInfo("__pseudo_scatter64_i32", GSInfo("__pseudo_scatter64_i32",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i32" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32", "__pseudo_scatter_factored_base_offsets64_i32",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
false), false, false),
GSInfo("__pseudo_scatter64_float", GSInfo("__pseudo_scatter64_float",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_float" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float", "__pseudo_scatter_factored_base_offsets64_float",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float", "__pseudo_scatter_factored_base_offsets32_float",
false), false, false),
GSInfo("__pseudo_scatter64_i64", GSInfo("__pseudo_scatter64_i64",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i64" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64", "__pseudo_scatter_factored_base_offsets64_i64",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
false), false, false),
GSInfo("__pseudo_scatter64_double", GSInfo("__pseudo_scatter64_double",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_double" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double", "__pseudo_scatter_factored_base_offsets64_double",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double", "__pseudo_scatter_factored_base_offsets32_double",
false), false, false),
GSInfo("__pseudo_prefetch_read_varying_1",
g->target->hasVecPrefetch() ? "__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
g->target->hasVecPrefetch() ? "__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
false, true),
}; };
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
@@ -2178,7 +2186,8 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
llvm::Value *basePtr = lGetBasePtrAndOffsets(ptrs, &offsetVector, llvm::Value *basePtr = lGetBasePtrAndOffsets(ptrs, &offsetVector,
callInst); callInst);
if (basePtr == NULL || offsetVector == NULL) if (basePtr == NULL || offsetVector == NULL ||
(info->isGather == false && info->isPrefetch == true && g->target->hasVecPrefetch() == false))
// It's actually a fully general gather/scatter with a varying // It's actually a fully general gather/scatter with a varying
// set of base pointers, so leave it as is and continune onward // set of base pointers, so leave it as is and continune onward
// to the next instruction... // to the next instruction...
@@ -2193,7 +2202,9 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
llvm::Function *gatherScatterFunc = info->baseOffsetsFunc; llvm::Function *gatherScatterFunc = info->baseOffsetsFunc;
if ((info->isGather == true && g->target->hasGather()) || if ((info->isGather == true && g->target->hasGather()) ||
(info->isGather == false && g->target->hasScatter())) { (info->isGather == false && info->isPrefetch == false && g->target->hasScatter()) ||
(info->isGather == false && info->isPrefetch == true && g->target->hasVecPrefetch())) {
// See if the offsets are scaled by 2, 4, or 8. If so, // See if the offsets are scaled by 2, 4, or 8. If so,
// extract that scale factor and rewrite the offsets to remove // extract that scale factor and rewrite the offsets to remove
// it. // it.
@@ -2207,7 +2218,7 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
gatherScatterFunc = info->baseOffsets32Func; gatherScatterFunc = info->baseOffsets32Func;
} }
if (info->isGather) { if (info->isGather || info->isPrefetch) {
llvm::Value *mask = callInst->getArgOperand(1); llvm::Value *mask = callInst->getArgOperand(1);
// Generate a new function call to the next pseudo gather // Generate a new function call to the next pseudo gather
@@ -2264,7 +2275,7 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
gatherScatterFunc = info->baseOffsets32Func; gatherScatterFunc = info->baseOffsets32Func;
} }
if (info->isGather) { if (info->isGather || info->isPrefetch) {
llvm::Value *mask = callInst->getArgOperand(1); llvm::Value *mask = callInst->getArgOperand(1);
// Generate a new function call to the next pseudo gather // Generate a new function call to the next pseudo gather
@@ -2306,13 +2317,14 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
static bool static bool
lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) { lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
struct GSBOInfo { struct GSBOInfo {
GSBOInfo(const char *pgboFuncName, const char *pgbo32FuncName, bool ig) GSBOInfo(const char *pgboFuncName, const char *pgbo32FuncName, bool ig, bool ip)
: isGather(ig) { : isGather(ig), isPrefetch(ip) {
baseOffsetsFunc = m->module->getFunction(pgboFuncName); baseOffsetsFunc = m->module->getFunction(pgboFuncName);
baseOffsets32Func = m->module->getFunction(pgbo32FuncName); baseOffsets32Func = m->module->getFunction(pgbo32FuncName);
} }
llvm::Function *baseOffsetsFunc, *baseOffsets32Func; llvm::Function *baseOffsetsFunc, *baseOffsets32Func;
const bool isGather; const bool isGather;
const bool isPrefetch;
}; };
GSBOInfo gsFuncs[] = { GSBOInfo gsFuncs[] = {
@@ -2320,63 +2332,69 @@ lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
"__pseudo_gather_factored_base_offsets32_i8", "__pseudo_gather_factored_base_offsets32_i8",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8", "__pseudo_gather_factored_base_offsets32_i8",
true), true, false),
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" : GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16", "__pseudo_gather_factored_base_offsets32_i16",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16", "__pseudo_gather_factored_base_offsets32_i16",
true), true, false),
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" : GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32", "__pseudo_gather_factored_base_offsets32_i32",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32", "__pseudo_gather_factored_base_offsets32_i32",
true), true, false),
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" : GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float", "__pseudo_gather_factored_base_offsets32_float",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float", "__pseudo_gather_factored_base_offsets32_float",
true), true, false),
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" : GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i64",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64", "__pseudo_gather_factored_base_offsets32_i64",
true), true, false),
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" : GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double", "__pseudo_gather_factored_base_offsets32_double",
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" : g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double", "__pseudo_gather_factored_base_offsets32_double",
true), true, false),
GSBOInfo( g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" : GSBOInfo( g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8", "__pseudo_scatter_factored_base_offsets32_i8",
false), false, false),
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" : GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16", "__pseudo_scatter_factored_base_offsets32_i16",
false), false, false),
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" : GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32", "__pseudo_scatter_factored_base_offsets32_i32",
false), false, false),
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" : GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float", "__pseudo_scatter_factored_base_offsets32_float",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float", "__pseudo_scatter_factored_base_offsets32_float",
false), false, false),
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" : GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64", "__pseudo_scatter_factored_base_offsets32_i64",
false), false, false),
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" : GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double", "__pseudo_scatter_factored_base_offsets32_double",
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" : g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double", "__pseudo_scatter_factored_base_offsets32_double",
false), false, false),
GSBOInfo(g->target->hasVecPrefetch() ? "__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
g->target->hasVecPrefetch() ? "__pseudo_prefetch_read_varying_1_native" :
"__prefetch_read_varying_1",
false, true)
}; };
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
@@ -4167,149 +4185,155 @@ lReplacePseudoMaskedStore(llvm::CallInst *callInst) {
static bool static bool
lReplacePseudoGS(llvm::CallInst *callInst) { lReplacePseudoGS(llvm::CallInst *callInst) {
struct LowerGSInfo { struct LowerGSInfo {
LowerGSInfo(const char *pName, const char *aName, bool ig) LowerGSInfo(const char *pName, const char *aName, bool ig, bool ip)
: isGather(ig) { : isGather(ig), isPrefetch(ip) {
pseudoFunc = m->module->getFunction(pName); pseudoFunc = m->module->getFunction(pName);
actualFunc = m->module->getFunction(aName); actualFunc = m->module->getFunction(aName);
} }
llvm::Function *pseudoFunc; llvm::Function *pseudoFunc;
llvm::Function *actualFunc; llvm::Function *actualFunc;
const bool isGather; const bool isGather;
const bool isPrefetch;
}; };
LowerGSInfo lgsInfo[] = { LowerGSInfo lgsInfo[] = {
LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true), LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true, false),
LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true), LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true, false),
LowerGSInfo("__pseudo_gather32_i32", "__gather32_i32", true), LowerGSInfo("__pseudo_gather32_i32", "__gather32_i32", true, false),
LowerGSInfo("__pseudo_gather32_float", "__gather32_float", true), LowerGSInfo("__pseudo_gather32_float", "__gather32_float", true, false),
LowerGSInfo("__pseudo_gather32_i64", "__gather32_i64", true), LowerGSInfo("__pseudo_gather32_i64", "__gather32_i64", true, false),
LowerGSInfo("__pseudo_gather32_double", "__gather32_double", true), LowerGSInfo("__pseudo_gather32_double", "__gather32_double", true, false),
LowerGSInfo("__pseudo_gather64_i8", "__gather64_i8", true), LowerGSInfo("__pseudo_gather64_i8", "__gather64_i8", true, false),
LowerGSInfo("__pseudo_gather64_i16", "__gather64_i16", true), LowerGSInfo("__pseudo_gather64_i16", "__gather64_i16", true, false),
LowerGSInfo("__pseudo_gather64_i32", "__gather64_i32", true), LowerGSInfo("__pseudo_gather64_i32", "__gather64_i32", true, false),
LowerGSInfo("__pseudo_gather64_float", "__gather64_float", true), LowerGSInfo("__pseudo_gather64_float", "__gather64_float", true, false),
LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true), LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true, false),
LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true), LowerGSInfo("__pseudo_gather64_double", "__gather64_double", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i8", LowerGSInfo("__pseudo_gather_factored_base_offsets32_i8",
"__gather_factored_base_offsets32_i8", true), "__gather_factored_base_offsets32_i8", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i16", LowerGSInfo("__pseudo_gather_factored_base_offsets32_i16",
"__gather_factored_base_offsets32_i16", true), "__gather_factored_base_offsets32_i16", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i32", LowerGSInfo("__pseudo_gather_factored_base_offsets32_i32",
"__gather_factored_base_offsets32_i32", true), "__gather_factored_base_offsets32_i32", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_float", LowerGSInfo("__pseudo_gather_factored_base_offsets32_float",
"__gather_factored_base_offsets32_float", true), "__gather_factored_base_offsets32_float", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_i64", LowerGSInfo("__pseudo_gather_factored_base_offsets32_i64",
"__gather_factored_base_offsets32_i64", true), "__gather_factored_base_offsets32_i64", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets32_double", LowerGSInfo("__pseudo_gather_factored_base_offsets32_double",
"__gather_factored_base_offsets32_double", true), "__gather_factored_base_offsets32_double", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i8", LowerGSInfo("__pseudo_gather_factored_base_offsets64_i8",
"__gather_factored_base_offsets64_i8", true), "__gather_factored_base_offsets64_i8", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i16", LowerGSInfo("__pseudo_gather_factored_base_offsets64_i16",
"__gather_factored_base_offsets64_i16", true), "__gather_factored_base_offsets64_i16", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i32", LowerGSInfo("__pseudo_gather_factored_base_offsets64_i32",
"__gather_factored_base_offsets64_i32", true), "__gather_factored_base_offsets64_i32", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_float", LowerGSInfo("__pseudo_gather_factored_base_offsets64_float",
"__gather_factored_base_offsets64_float", true), "__gather_factored_base_offsets64_float", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_i64", LowerGSInfo("__pseudo_gather_factored_base_offsets64_i64",
"__gather_factored_base_offsets64_i64", true), "__gather_factored_base_offsets64_i64", true, false),
LowerGSInfo("__pseudo_gather_factored_base_offsets64_double", LowerGSInfo("__pseudo_gather_factored_base_offsets64_double",
"__gather_factored_base_offsets64_double", true), "__gather_factored_base_offsets64_double", true, false),
LowerGSInfo("__pseudo_gather_base_offsets32_i8", LowerGSInfo("__pseudo_gather_base_offsets32_i8",
"__gather_base_offsets32_i8", true), "__gather_base_offsets32_i8", true, false),
LowerGSInfo("__pseudo_gather_base_offsets32_i16", LowerGSInfo("__pseudo_gather_base_offsets32_i16",
"__gather_base_offsets32_i16", true), "__gather_base_offsets32_i16", true, false),
LowerGSInfo("__pseudo_gather_base_offsets32_i32", LowerGSInfo("__pseudo_gather_base_offsets32_i32",
"__gather_base_offsets32_i32", true), "__gather_base_offsets32_i32", true, false),
LowerGSInfo("__pseudo_gather_base_offsets32_float", LowerGSInfo("__pseudo_gather_base_offsets32_float",
"__gather_base_offsets32_float", true), "__gather_base_offsets32_float", true, false),
LowerGSInfo("__pseudo_gather_base_offsets32_i64", LowerGSInfo("__pseudo_gather_base_offsets32_i64",
"__gather_base_offsets32_i64", true), "__gather_base_offsets32_i64", true, false),
LowerGSInfo("__pseudo_gather_base_offsets32_double", LowerGSInfo("__pseudo_gather_base_offsets32_double",
"__gather_base_offsets32_double", true), "__gather_base_offsets32_double", true, false),
LowerGSInfo("__pseudo_gather_base_offsets64_i8", LowerGSInfo("__pseudo_gather_base_offsets64_i8",
"__gather_base_offsets64_i8", true), "__gather_base_offsets64_i8", true, false),
LowerGSInfo("__pseudo_gather_base_offsets64_i16", LowerGSInfo("__pseudo_gather_base_offsets64_i16",
"__gather_base_offsets64_i16", true), "__gather_base_offsets64_i16", true, false),
LowerGSInfo("__pseudo_gather_base_offsets64_i32", LowerGSInfo("__pseudo_gather_base_offsets64_i32",
"__gather_base_offsets64_i32", true), "__gather_base_offsets64_i32", true, false),
LowerGSInfo("__pseudo_gather_base_offsets64_float", LowerGSInfo("__pseudo_gather_base_offsets64_float",
"__gather_base_offsets64_float", true), "__gather_base_offsets64_float", true, false),
LowerGSInfo("__pseudo_gather_base_offsets64_i64", LowerGSInfo("__pseudo_gather_base_offsets64_i64",
"__gather_base_offsets64_i64", true), "__gather_base_offsets64_i64", true, false),
LowerGSInfo("__pseudo_gather_base_offsets64_double", LowerGSInfo("__pseudo_gather_base_offsets64_double",
"__gather_base_offsets64_double", true), "__gather_base_offsets64_double", true, false),
LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false), LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false, false),
LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false), LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false, false),
LowerGSInfo("__pseudo_scatter32_i32", "__scatter32_i32", false), LowerGSInfo("__pseudo_scatter32_i32", "__scatter32_i32", false, false),
LowerGSInfo("__pseudo_scatter32_float", "__scatter32_float", false), LowerGSInfo("__pseudo_scatter32_float", "__scatter32_float", false, false),
LowerGSInfo("__pseudo_scatter32_i64", "__scatter32_i64", false), LowerGSInfo("__pseudo_scatter32_i64", "__scatter32_i64", false, false),
LowerGSInfo("__pseudo_scatter32_double", "__scatter32_double", false), LowerGSInfo("__pseudo_scatter32_double", "__scatter32_double", false, false),
LowerGSInfo("__pseudo_scatter64_i8", "__scatter64_i8", false), LowerGSInfo("__pseudo_scatter64_i8", "__scatter64_i8", false, false),
LowerGSInfo("__pseudo_scatter64_i16", "__scatter64_i16", false), LowerGSInfo("__pseudo_scatter64_i16", "__scatter64_i16", false, false),
LowerGSInfo("__pseudo_scatter64_i32", "__scatter64_i32", false), LowerGSInfo("__pseudo_scatter64_i32", "__scatter64_i32", false, false),
LowerGSInfo("__pseudo_scatter64_float", "__scatter64_float", false), LowerGSInfo("__pseudo_scatter64_float", "__scatter64_float", false, false),
LowerGSInfo("__pseudo_scatter64_i64", "__scatter64_i64", false), LowerGSInfo("__pseudo_scatter64_i64", "__scatter64_i64", false, false),
LowerGSInfo("__pseudo_scatter64_double", "__scatter64_double", false), LowerGSInfo("__pseudo_scatter64_double", "__scatter64_double", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i8", LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i8",
"__scatter_factored_base_offsets32_i8", false), "__scatter_factored_base_offsets32_i8", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i16", LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i16",
"__scatter_factored_base_offsets32_i16", false), "__scatter_factored_base_offsets32_i16", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i32", LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i32",
"__scatter_factored_base_offsets32_i32", false), "__scatter_factored_base_offsets32_i32", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_float", LowerGSInfo("__pseudo_scatter_factored_base_offsets32_float",
"__scatter_factored_base_offsets32_float", false), "__scatter_factored_base_offsets32_float", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i64", LowerGSInfo("__pseudo_scatter_factored_base_offsets32_i64",
"__scatter_factored_base_offsets32_i64", false), "__scatter_factored_base_offsets32_i64", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets32_double", LowerGSInfo("__pseudo_scatter_factored_base_offsets32_double",
"__scatter_factored_base_offsets32_double", false), "__scatter_factored_base_offsets32_double", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i8", LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i8",
"__scatter_factored_base_offsets64_i8", false), "__scatter_factored_base_offsets64_i8", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i16", LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i16",
"__scatter_factored_base_offsets64_i16", false), "__scatter_factored_base_offsets64_i16", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i32", LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i32",
"__scatter_factored_base_offsets64_i32", false), "__scatter_factored_base_offsets64_i32", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_float", LowerGSInfo("__pseudo_scatter_factored_base_offsets64_float",
"__scatter_factored_base_offsets64_float", false), "__scatter_factored_base_offsets64_float", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i64", LowerGSInfo("__pseudo_scatter_factored_base_offsets64_i64",
"__scatter_factored_base_offsets64_i64", false), "__scatter_factored_base_offsets64_i64", false, false),
LowerGSInfo("__pseudo_scatter_factored_base_offsets64_double", LowerGSInfo("__pseudo_scatter_factored_base_offsets64_double",
"__scatter_factored_base_offsets64_double", false), "__scatter_factored_base_offsets64_double", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i8", LowerGSInfo("__pseudo_scatter_base_offsets32_i8",
"__scatter_base_offsets32_i8", false), "__scatter_base_offsets32_i8", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i16", LowerGSInfo("__pseudo_scatter_base_offsets32_i16",
"__scatter_base_offsets32_i16", false), "__scatter_base_offsets32_i16", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i32", LowerGSInfo("__pseudo_scatter_base_offsets32_i32",
"__scatter_base_offsets32_i32", false), "__scatter_base_offsets32_i32", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets32_float", LowerGSInfo("__pseudo_scatter_base_offsets32_float",
"__scatter_base_offsets32_float", false), "__scatter_base_offsets32_float", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i64", LowerGSInfo("__pseudo_scatter_base_offsets32_i64",
"__scatter_base_offsets32_i64", false), "__scatter_base_offsets32_i64", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets32_double", LowerGSInfo("__pseudo_scatter_base_offsets32_double",
"__scatter_base_offsets32_double", false), "__scatter_base_offsets32_double", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i8", LowerGSInfo("__pseudo_scatter_base_offsets64_i8",
"__scatter_base_offsets64_i8", false), "__scatter_base_offsets64_i8", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i16", LowerGSInfo("__pseudo_scatter_base_offsets64_i16",
"__scatter_base_offsets64_i16", false), "__scatter_base_offsets64_i16", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i32", LowerGSInfo("__pseudo_scatter_base_offsets64_i32",
"__scatter_base_offsets64_i32", false), "__scatter_base_offsets64_i32", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets64_float", LowerGSInfo("__pseudo_scatter_base_offsets64_float",
"__scatter_base_offsets64_float", false), "__scatter_base_offsets64_float", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i64", LowerGSInfo("__pseudo_scatter_base_offsets64_i64",
"__scatter_base_offsets64_i64", false), "__scatter_base_offsets64_i64", false, false),
LowerGSInfo("__pseudo_scatter_base_offsets64_double", LowerGSInfo("__pseudo_scatter_base_offsets64_double",
"__scatter_base_offsets64_double", false), "__scatter_base_offsets64_double", false, false),
LowerGSInfo("__pseudo_prefetch_read_varying_1",
"__prefetch_read_varying_1", false, true),
LowerGSInfo("__pseudo_prefetch_read_varying_1_native",
"__prefetch_read_varying_1_native", false, true),
}; };
llvm::Function *calledFunc = callInst->getCalledFunction(); llvm::Function *calledFunc = callInst->getCalledFunction();
@@ -4336,7 +4360,7 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
if (gotPosition && g->target->getVectorWidth() > 1) { if (gotPosition && g->target->getVectorWidth() > 1) {
if (info->isGather) if (info->isGather)
PerformanceWarning(pos, "Gather required to load value."); PerformanceWarning(pos, "Gather required to load value.");
else else if (!info->isPrefetch)
PerformanceWarning(pos, "Scatter required to store value."); PerformanceWarning(pos, "Scatter required to store value.");
} }
return true; return true;
@@ -4617,6 +4641,7 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
"__scatter64_i8", "__scatter64_i16", "__scatter64_i8", "__scatter64_i16",
"__scatter64_i32", "__scatter64_i64", "__scatter64_i32", "__scatter64_i64",
"__scatter64_float", "__scatter64_double", "__scatter64_float", "__scatter64_double",
"__prefetch_read_varying_1",
"__keep_funcs_live", "__keep_funcs_live",
}; };

View File

@@ -820,13 +820,7 @@ static inline void prefetch_nt(const void * uniform ptr) {
} }
static inline void prefetch_l1(const void * varying ptr) { static inline void prefetch_l1(const void * varying ptr) {
const void * uniform ptrArray[programCount]; __pseudo_prefetch_read_varying_1((int64)ptr, (IntMaskType)__mask);
ptrArray[programIndex] = ptr;
foreach_active (i) {
const void * uniform p = ptrArray[i];
prefetch_l1(p);
}
} }
static inline void prefetch_l2(const void * varying ptr) { static inline void prefetch_l2(const void * varying ptr) {