Add various prefetch functions to the standard library.

This commit is contained in:
Matt Pharr
2011-08-03 12:07:30 -07:00
parent 467f1e71d7
commit 0ac4f7b620
6 changed files with 168 additions and 16 deletions

View File

@@ -12,7 +12,8 @@ CLANG_LIBS = -lclangFrontend -lclangDriver \
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
BUILD_DATE=$(shell date +%Y%m%d)
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
@@ -105,7 +106,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
objs/builtins-%.cpp: builtins-%.ll builtins.m4 builtins-sse.ll
@echo Creating C++ source from builtin definitions file $<
@m4 builtins.m4 $< | ./bitcode2cpp.py $< > $@
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
objs/builtins-%.o: objs/builtins-%.cpp
@echo Compiling $<

View File

@@ -170,6 +170,27 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
}
static void
lCreateSymbol(const std::string &name, const Type *returnType,
const std::vector<const Type *> &argTypes,
const llvm::FunctionType *ftype, llvm::Function *func,
SymbolTable *symbolTable) {
SourcePos noPos;
noPos.name = "__stdlib";
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
// set NULL default arguments
std::vector<ConstExpr *> defaults;
for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
defaults.push_back(NULL);
funcType->SetArgumentDefaults(defaults);
Symbol *sym = new Symbol(name, noPos, funcType);
sym->function = func;
symbolTable->AddFunction(sym);
}
/** Given an LLVM function declaration, synthesize the equivalent ispc
symbol for the function (if possible). Returns true on success, false
on failure.
@@ -221,7 +242,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// Iterate over the arguments and try to find their equivalent ispc
// types. Track if any of the arguments has an integer type.
bool anyIntArgs = false;
bool anyIntArgs = false, anyReferenceArgs = false;
std::vector<const Type *> argTypes;
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
const llvm::Type *llvmArgType = ftype->getParamType(j);
@@ -230,22 +251,26 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
return false;
anyIntArgs |=
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
argTypes.push_back(type);
}
// Always create the symbol the first time through, in particular
// so that we get symbols for things with no integer types!
if (i == 0 || anyIntArgs == true) {
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
// set NULL default arguments
std::vector<ConstExpr *> defaults;
for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
defaults.push_back(NULL);
funcType->SetArgumentDefaults(defaults);
if (i == 0 || anyIntArgs == true)
lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
Symbol *sym = new Symbol(name, noPos, funcType);
sym->function = func;
symbolTable->AddFunction(sym);
// If there are any reference types, also make a variant of the
// symbol that has them as const references. This obviously
// doesn't make sense for many builtins, but we'll give the stdlib
// the option to call one if it needs one.
if (anyReferenceArgs == true) {
for (unsigned int j = 0; j < argTypes.size(); ++j) {
if (dynamic_cast<const ReferenceType *>(argTypes[j]) != NULL)
argTypes[j] = argTypes[j]->GetAsConstType();
lCreateSymbol(name + "_refsconst", returnType, argTypes,
ftype, func, symbolTable);
}
}
}

View File

@@ -557,6 +557,41 @@ define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; prefetch definitions
; prefetch has a new parameter in LLVM3.0, to distinguish between instruction
; and data caches--the declaration is now:
; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality,
; i32 %cachetype) (cachetype 1 == data cache)
; however, the version below seems to still work...
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
define(`prefetch_read', `
define internal void @__prefetch_read_1_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
ret void
}
define internal void @__prefetch_read_2_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
ret void
}
define internal void @__prefetch_read_3_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
ret void
}
define internal void @__prefetch_read_nt_$1($2 *) alwaysinline {
%ptr8 = bitcast $2 * %0 to i8 *
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
ret void
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define(`stdlib_core', `
@@ -779,6 +814,25 @@ define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone al
ret <$1 x i32> %0
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; prefetching
prefetch_read(uniform_bool, i1)
prefetch_read(uniform_int8, i8)
prefetch_read(uniform_int16, i16)
prefetch_read(uniform_int32, i32)
prefetch_read(uniform_int64, i64)
prefetch_read(uniform_float, float)
prefetch_read(uniform_double, double)
prefetch_read(varying_bool, <$1 x i32>)
prefetch_read(varying_int8, <$1 x i8>)
prefetch_read(varying_int16, <$1 x i16>)
prefetch_read(varying_int32, <$1 x i32>)
prefetch_read(varying_int64, <$1 x i64>)
prefetch_read(varying_float, <$1 x float>)
prefetch_read(varying_double, <$1 x double>)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; stdlib transcendentals
;;

View File

@@ -79,6 +79,7 @@ Contents:
+ `Packed Load and Store Operations`_
+ `Conversions To and From Half-Precision Floats`_
+ `Atomic Operations and Memory Fences`_
+ `Prefetches`_
+ `Low-Level Bits`_
* `Interoperability with the Application`_
@@ -1990,6 +1991,39 @@ code.
void memory_barrier();
Prefetches
----------
The standard library has a variety of functions to prefetch data into the
processor's cache. While modern CPUs have automatic prefetchers that do a
reasonable job of prefetching data to the cache before its needed, high
performance applications may find it helpful to prefetch data before it's
needed.
For example, this code shows how to prefetch data to the processor's L1
cache while iterating over the items in an array.
::
uniform int32 array[...];
for (uniform int i = 0; i < count; ++i) {
// do computation with array[i]
prefetch_l1(array[i+32]);
}
The standard library has routines to prefetch to the L1, L2, and L3
caches. It also has a variant, ``prefetch_nt()``, that indicates that the
value being prefetched isn't expected to be used more than once (so should
be high priority to be evicted from the cache).
::
void prefetch_{l1,l2,l3,nt}(reference TYPE)
These functions are available for all of the basic types in the
language--``int8``, ``int16``, ``int32``, ``float``, and so forth.
Low-Level Bits
--------------

View File

@@ -315,6 +315,39 @@ static inline uniform int lanemask() {
return __movmsk(__mask);
}
///////////////////////////////////////////////////////////////////////////
// Prefetching
#define PREFETCHES(NAME, TYPE) \
static inline void prefetch_l1(const reference TYPE ptr) { \
__prefetch_read_1_##NAME##_refsconst(ptr); \
} \
static inline void prefetch_l2(const reference TYPE ptr) { \
__prefetch_read_2_##NAME##_refsconst(ptr); \
} \
static inline void prefetch_l3(const reference TYPE ptr) { \
__prefetch_read_3_##NAME##_refsconst(ptr); \
} \
static inline void prefetch_nt(const reference TYPE ptr) { \
__prefetch_read_nt_##NAME##_refsconst(ptr); \
}
PREFETCHES(uniform_int8, uniform int8)
PREFETCHES(uniform_int16, uniform int16)
PREFETCHES(uniform_int32, uniform int32)
PREFETCHES(uniform_int64, uniform int64)
PREFETCHES(uniform_float, uniform float)
PREFETCHES(uniform_double, uniform double)
PREFETCHES(varying_int8, int8)
PREFETCHES(varying_int16, int16)
PREFETCHES(varying_int32, int32)
PREFETCHES(varying_int64, int64)
PREFETCHES(varying_float, float)
PREFETCHES(varying_double, double)
#undef PREFETCHES
///////////////////////////////////////////////////////////////////////////
// Horizontal ops / reductions
@@ -522,6 +555,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
DEFINE_ATOMIC_OP(double,double,swap,swap)
#undef DEFINE_ATOMIC_OP
#define ATOMIC_DECL_CMPXCHG(TA, TB) \
static inline TA atomic_compare_exchange_global( \
uniform reference TA ref, TA oldval, TA newval) { \
@@ -538,6 +573,8 @@ ATOMIC_DECL_CMPXCHG(int64, int64)
ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
ATOMIC_DECL_CMPXCHG(double, double)
#undef ATOMIC_DECL_CMPXCHG
///////////////////////////////////////////////////////////////////////////
// Floating-Point Math

View File

@@ -1541,7 +1541,7 @@ StructType::GetElementNumber(const std::string &n) const {
// ReferenceType
ReferenceType::ReferenceType(const Type *t, bool ic)
: isConst(ic), targetType(t) {
: isConst(ic), targetType(t->GetAsNonConstType()) {
}
@@ -2136,8 +2136,9 @@ Type::Equal(const Type *a, const Type *b) {
const ReferenceType *rta = dynamic_cast<const ReferenceType *>(a);
const ReferenceType *rtb = dynamic_cast<const ReferenceType *>(b);
if (rta != NULL && rtb != NULL)
return Type::Equal(rta->GetReferenceTarget(),
rtb->GetReferenceTarget());
return ((rta->IsConstType() == rtb->IsConstType()) &&
Type::Equal(rta->GetReferenceTarget(),
rtb->GetReferenceTarget()));
const FunctionType *fta = dynamic_cast<const FunctionType *>(a);
const FunctionType *ftb = dynamic_cast<const FunctionType *>(b);