Add various prefetch functions to the standard library.
This commit is contained in:
5
Makefile
5
Makefile
@@ -12,7 +12,8 @@ CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
|
||||
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
@@ -105,7 +106,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
|
||||
objs/builtins-%.cpp: builtins-%.ll builtins.m4 builtins-sse.ll
|
||||
@echo Creating C++ source from builtin definitions file $<
|
||||
@m4 builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-%.o: objs/builtins-%.cpp
|
||||
@echo Compiling $<
|
||||
|
||||
47
builtins.cpp
47
builtins.cpp
@@ -170,6 +170,27 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lCreateSymbol(const std::string &name, const Type *returnType,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const llvm::FunctionType *ftype, llvm::Function *func,
|
||||
SymbolTable *symbolTable) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||
// set NULL default arguments
|
||||
std::vector<ConstExpr *> defaults;
|
||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
|
||||
defaults.push_back(NULL);
|
||||
funcType->SetArgumentDefaults(defaults);
|
||||
|
||||
Symbol *sym = new Symbol(name, noPos, funcType);
|
||||
sym->function = func;
|
||||
symbolTable->AddFunction(sym);
|
||||
}
|
||||
|
||||
|
||||
/** Given an LLVM function declaration, synthesize the equivalent ispc
|
||||
symbol for the function (if possible). Returns true on success, false
|
||||
on failure.
|
||||
@@ -221,7 +242,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
|
||||
// Iterate over the arguments and try to find their equivalent ispc
|
||||
// types. Track if any of the arguments has an integer type.
|
||||
bool anyIntArgs = false;
|
||||
bool anyIntArgs = false, anyReferenceArgs = false;
|
||||
std::vector<const Type *> argTypes;
|
||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
||||
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
||||
@@ -230,22 +251,26 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
return false;
|
||||
anyIntArgs |=
|
||||
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
|
||||
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
|
||||
argTypes.push_back(type);
|
||||
}
|
||||
|
||||
// Always create the symbol the first time through, in particular
|
||||
// so that we get symbols for things with no integer types!
|
||||
if (i == 0 || anyIntArgs == true) {
|
||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||
// set NULL default arguments
|
||||
std::vector<ConstExpr *> defaults;
|
||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
|
||||
defaults.push_back(NULL);
|
||||
funcType->SetArgumentDefaults(defaults);
|
||||
if (i == 0 || anyIntArgs == true)
|
||||
lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
|
||||
|
||||
Symbol *sym = new Symbol(name, noPos, funcType);
|
||||
sym->function = func;
|
||||
symbolTable->AddFunction(sym);
|
||||
// If there are any reference types, also make a variant of the
|
||||
// symbol that has them as const references. This obviously
|
||||
// doesn't make sense for many builtins, but we'll give the stdlib
|
||||
// the option to call one if it needs one.
|
||||
if (anyReferenceArgs == true) {
|
||||
for (unsigned int j = 0; j < argTypes.size(); ++j) {
|
||||
if (dynamic_cast<const ReferenceType *>(argTypes[j]) != NULL)
|
||||
argTypes[j] = argTypes[j]->GetAsConstType();
|
||||
lCreateSymbol(name + "_refsconst", returnType, argTypes,
|
||||
ftype, func, symbolTable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
54
builtins.m4
54
builtins.m4
@@ -557,6 +557,41 @@ define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; prefetch definitions
|
||||
|
||||
; prefetch has a new parameter in LLVM3.0, to distinguish between instruction
|
||||
; and data caches--the declaration is now:
|
||||
; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality,
|
||||
; i32 %cachetype) (cachetype 1 == data cache)
|
||||
; however, the version below seems to still work...
|
||||
|
||||
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
|
||||
|
||||
define(`prefetch_read', `
|
||||
define internal void @__prefetch_read_1_$1($2 *) alwaysinline {
|
||||
%ptr8 = bitcast $2 * %0 to i8 *
|
||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
|
||||
ret void
|
||||
}
|
||||
define internal void @__prefetch_read_2_$1($2 *) alwaysinline {
|
||||
%ptr8 = bitcast $2 * %0 to i8 *
|
||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
|
||||
ret void
|
||||
}
|
||||
define internal void @__prefetch_read_3_$1($2 *) alwaysinline {
|
||||
%ptr8 = bitcast $2 * %0 to i8 *
|
||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
define internal void @__prefetch_read_nt_$1($2 *) alwaysinline {
|
||||
%ptr8 = bitcast $2 * %0 to i8 *
|
||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define(`stdlib_core', `
|
||||
|
||||
@@ -779,6 +814,25 @@ define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone al
|
||||
ret <$1 x i32> %0
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; prefetching
|
||||
|
||||
prefetch_read(uniform_bool, i1)
|
||||
prefetch_read(uniform_int8, i8)
|
||||
prefetch_read(uniform_int16, i16)
|
||||
prefetch_read(uniform_int32, i32)
|
||||
prefetch_read(uniform_int64, i64)
|
||||
prefetch_read(uniform_float, float)
|
||||
prefetch_read(uniform_double, double)
|
||||
|
||||
prefetch_read(varying_bool, <$1 x i32>)
|
||||
prefetch_read(varying_int8, <$1 x i8>)
|
||||
prefetch_read(varying_int16, <$1 x i16>)
|
||||
prefetch_read(varying_int32, <$1 x i32>)
|
||||
prefetch_read(varying_int64, <$1 x i64>)
|
||||
prefetch_read(varying_float, <$1 x float>)
|
||||
prefetch_read(varying_double, <$1 x double>)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; stdlib transcendentals
|
||||
;;
|
||||
|
||||
@@ -79,6 +79,7 @@ Contents:
|
||||
+ `Packed Load and Store Operations`_
|
||||
+ `Conversions To and From Half-Precision Floats`_
|
||||
+ `Atomic Operations and Memory Fences`_
|
||||
+ `Prefetches`_
|
||||
+ `Low-Level Bits`_
|
||||
|
||||
* `Interoperability with the Application`_
|
||||
@@ -1990,6 +1991,39 @@ code.
|
||||
void memory_barrier();
|
||||
|
||||
|
||||
Prefetches
|
||||
----------
|
||||
|
||||
The standard library has a variety of functions to prefetch data into the
|
||||
processor's cache. While modern CPUs have automatic prefetchers that do a
|
||||
reasonable job of prefetching data to the cache before its needed, high
|
||||
performance applications may find it helpful to prefetch data before it's
|
||||
needed.
|
||||
|
||||
For example, this code shows how to prefetch data to the processor's L1
|
||||
cache while iterating over the items in an array.
|
||||
|
||||
::
|
||||
|
||||
uniform int32 array[...];
|
||||
for (uniform int i = 0; i < count; ++i) {
|
||||
// do computation with array[i]
|
||||
prefetch_l1(array[i+32]);
|
||||
}
|
||||
|
||||
The standard library has routines to prefetch to the L1, L2, and L3
|
||||
caches. It also has a variant, ``prefetch_nt()``, that indicates that the
|
||||
value being prefetched isn't expected to be used more than once (so should
|
||||
be high priority to be evicted from the cache).
|
||||
|
||||
::
|
||||
|
||||
void prefetch_{l1,l2,l3,nt}(reference TYPE)
|
||||
|
||||
These functions are available for all of the basic types in the
|
||||
language--``int8``, ``int16``, ``int32``, ``float``, and so forth.
|
||||
|
||||
|
||||
Low-Level Bits
|
||||
--------------
|
||||
|
||||
|
||||
37
stdlib.ispc
37
stdlib.ispc
@@ -315,6 +315,39 @@ static inline uniform int lanemask() {
|
||||
return __movmsk(__mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Prefetching
|
||||
|
||||
#define PREFETCHES(NAME, TYPE) \
|
||||
static inline void prefetch_l1(const reference TYPE ptr) { \
|
||||
__prefetch_read_1_##NAME##_refsconst(ptr); \
|
||||
} \
|
||||
static inline void prefetch_l2(const reference TYPE ptr) { \
|
||||
__prefetch_read_2_##NAME##_refsconst(ptr); \
|
||||
} \
|
||||
static inline void prefetch_l3(const reference TYPE ptr) { \
|
||||
__prefetch_read_3_##NAME##_refsconst(ptr); \
|
||||
} \
|
||||
static inline void prefetch_nt(const reference TYPE ptr) { \
|
||||
__prefetch_read_nt_##NAME##_refsconst(ptr); \
|
||||
}
|
||||
|
||||
PREFETCHES(uniform_int8, uniform int8)
|
||||
PREFETCHES(uniform_int16, uniform int16)
|
||||
PREFETCHES(uniform_int32, uniform int32)
|
||||
PREFETCHES(uniform_int64, uniform int64)
|
||||
PREFETCHES(uniform_float, uniform float)
|
||||
PREFETCHES(uniform_double, uniform double)
|
||||
|
||||
PREFETCHES(varying_int8, int8)
|
||||
PREFETCHES(varying_int16, int16)
|
||||
PREFETCHES(varying_int32, int32)
|
||||
PREFETCHES(varying_int64, int64)
|
||||
PREFETCHES(varying_float, float)
|
||||
PREFETCHES(varying_double, double)
|
||||
|
||||
#undef PREFETCHES
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal ops / reductions
|
||||
|
||||
@@ -522,6 +555,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
|
||||
|
||||
DEFINE_ATOMIC_OP(double,double,swap,swap)
|
||||
|
||||
#undef DEFINE_ATOMIC_OP
|
||||
|
||||
#define ATOMIC_DECL_CMPXCHG(TA, TB) \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
uniform reference TA ref, TA oldval, TA newval) { \
|
||||
@@ -538,6 +573,8 @@ ATOMIC_DECL_CMPXCHG(int64, int64)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
|
||||
ATOMIC_DECL_CMPXCHG(double, double)
|
||||
|
||||
#undef ATOMIC_DECL_CMPXCHG
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Floating-Point Math
|
||||
|
||||
|
||||
7
type.cpp
7
type.cpp
@@ -1541,7 +1541,7 @@ StructType::GetElementNumber(const std::string &n) const {
|
||||
// ReferenceType
|
||||
|
||||
ReferenceType::ReferenceType(const Type *t, bool ic)
|
||||
: isConst(ic), targetType(t) {
|
||||
: isConst(ic), targetType(t->GetAsNonConstType()) {
|
||||
}
|
||||
|
||||
|
||||
@@ -2136,8 +2136,9 @@ Type::Equal(const Type *a, const Type *b) {
|
||||
const ReferenceType *rta = dynamic_cast<const ReferenceType *>(a);
|
||||
const ReferenceType *rtb = dynamic_cast<const ReferenceType *>(b);
|
||||
if (rta != NULL && rtb != NULL)
|
||||
return Type::Equal(rta->GetReferenceTarget(),
|
||||
rtb->GetReferenceTarget());
|
||||
return ((rta->IsConstType() == rtb->IsConstType()) &&
|
||||
Type::Equal(rta->GetReferenceTarget(),
|
||||
rtb->GetReferenceTarget()));
|
||||
|
||||
const FunctionType *fta = dynamic_cast<const FunctionType *>(a);
|
||||
const FunctionType *ftb = dynamic_cast<const FunctionType *>(b);
|
||||
|
||||
Reference in New Issue
Block a user