Add various prefetch functions to the standard library.
This commit is contained in:
5
Makefile
5
Makefile
@@ -12,7 +12,8 @@ CLANG_LIBS = -lclangFrontend -lclangDriver \
|
|||||||
|
|
||||||
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
|
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
|
||||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||||
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
|
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
||||||
|
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
||||||
|
|
||||||
BUILD_DATE=$(shell date +%Y%m%d)
|
BUILD_DATE=$(shell date +%Y%m%d)
|
||||||
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||||
@@ -105,7 +106,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
|||||||
|
|
||||||
objs/builtins-%.cpp: builtins-%.ll builtins.m4 builtins-sse.ll
|
objs/builtins-%.cpp: builtins-%.ll builtins.m4 builtins-sse.ll
|
||||||
@echo Creating C++ source from builtin definitions file $<
|
@echo Creating C++ source from builtin definitions file $<
|
||||||
@m4 builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
||||||
|
|
||||||
objs/builtins-%.o: objs/builtins-%.cpp
|
objs/builtins-%.o: objs/builtins-%.cpp
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
|
|||||||
47
builtins.cpp
47
builtins.cpp
@@ -170,6 +170,27 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
lCreateSymbol(const std::string &name, const Type *returnType,
|
||||||
|
const std::vector<const Type *> &argTypes,
|
||||||
|
const llvm::FunctionType *ftype, llvm::Function *func,
|
||||||
|
SymbolTable *symbolTable) {
|
||||||
|
SourcePos noPos;
|
||||||
|
noPos.name = "__stdlib";
|
||||||
|
|
||||||
|
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||||
|
// set NULL default arguments
|
||||||
|
std::vector<ConstExpr *> defaults;
|
||||||
|
for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
|
||||||
|
defaults.push_back(NULL);
|
||||||
|
funcType->SetArgumentDefaults(defaults);
|
||||||
|
|
||||||
|
Symbol *sym = new Symbol(name, noPos, funcType);
|
||||||
|
sym->function = func;
|
||||||
|
symbolTable->AddFunction(sym);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Given an LLVM function declaration, synthesize the equivalent ispc
|
/** Given an LLVM function declaration, synthesize the equivalent ispc
|
||||||
symbol for the function (if possible). Returns true on success, false
|
symbol for the function (if possible). Returns true on success, false
|
||||||
on failure.
|
on failure.
|
||||||
@@ -221,7 +242,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
|||||||
|
|
||||||
// Iterate over the arguments and try to find their equivalent ispc
|
// Iterate over the arguments and try to find their equivalent ispc
|
||||||
// types. Track if any of the arguments has an integer type.
|
// types. Track if any of the arguments has an integer type.
|
||||||
bool anyIntArgs = false;
|
bool anyIntArgs = false, anyReferenceArgs = false;
|
||||||
std::vector<const Type *> argTypes;
|
std::vector<const Type *> argTypes;
|
||||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
||||||
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
||||||
@@ -230,22 +251,26 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
|||||||
return false;
|
return false;
|
||||||
anyIntArgs |=
|
anyIntArgs |=
|
||||||
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
|
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
|
||||||
|
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
|
||||||
argTypes.push_back(type);
|
argTypes.push_back(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always create the symbol the first time through, in particular
|
// Always create the symbol the first time through, in particular
|
||||||
// so that we get symbols for things with no integer types!
|
// so that we get symbols for things with no integer types!
|
||||||
if (i == 0 || anyIntArgs == true) {
|
if (i == 0 || anyIntArgs == true)
|
||||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
|
||||||
// set NULL default arguments
|
|
||||||
std::vector<ConstExpr *> defaults;
|
|
||||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
|
|
||||||
defaults.push_back(NULL);
|
|
||||||
funcType->SetArgumentDefaults(defaults);
|
|
||||||
|
|
||||||
Symbol *sym = new Symbol(name, noPos, funcType);
|
// If there are any reference types, also make a variant of the
|
||||||
sym->function = func;
|
// symbol that has them as const references. This obviously
|
||||||
symbolTable->AddFunction(sym);
|
// doesn't make sense for many builtins, but we'll give the stdlib
|
||||||
|
// the option to call one if it needs one.
|
||||||
|
if (anyReferenceArgs == true) {
|
||||||
|
for (unsigned int j = 0; j < argTypes.size(); ++j) {
|
||||||
|
if (dynamic_cast<const ReferenceType *>(argTypes[j]) != NULL)
|
||||||
|
argTypes[j] = argTypes[j]->GetAsConstType();
|
||||||
|
lCreateSymbol(name + "_refsconst", returnType, argTypes,
|
||||||
|
ftype, func, symbolTable);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
54
builtins.m4
54
builtins.m4
@@ -557,6 +557,41 @@ define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $
|
|||||||
}
|
}
|
||||||
')
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; prefetch definitions
|
||||||
|
|
||||||
|
; prefetch has a new parameter in LLVM3.0, to distinguish between instruction
|
||||||
|
; and data caches--the declaration is now:
|
||||||
|
; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality,
|
||||||
|
; i32 %cachetype) (cachetype 1 == data cache)
|
||||||
|
; however, the version below seems to still work...
|
||||||
|
|
||||||
|
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
|
||||||
|
|
||||||
|
define(`prefetch_read', `
|
||||||
|
define internal void @__prefetch_read_1_$1($2 *) alwaysinline {
|
||||||
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define internal void @__prefetch_read_2_$1($2 *) alwaysinline {
|
||||||
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define internal void @__prefetch_read_3_$1($2 *) alwaysinline {
|
||||||
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define internal void @__prefetch_read_nt_$1($2 *) alwaysinline {
|
||||||
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
define(`stdlib_core', `
|
define(`stdlib_core', `
|
||||||
|
|
||||||
@@ -779,6 +814,25 @@ define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone al
|
|||||||
ret <$1 x i32> %0
|
ret <$1 x i32> %0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; prefetching
|
||||||
|
|
||||||
|
prefetch_read(uniform_bool, i1)
|
||||||
|
prefetch_read(uniform_int8, i8)
|
||||||
|
prefetch_read(uniform_int16, i16)
|
||||||
|
prefetch_read(uniform_int32, i32)
|
||||||
|
prefetch_read(uniform_int64, i64)
|
||||||
|
prefetch_read(uniform_float, float)
|
||||||
|
prefetch_read(uniform_double, double)
|
||||||
|
|
||||||
|
prefetch_read(varying_bool, <$1 x i32>)
|
||||||
|
prefetch_read(varying_int8, <$1 x i8>)
|
||||||
|
prefetch_read(varying_int16, <$1 x i16>)
|
||||||
|
prefetch_read(varying_int32, <$1 x i32>)
|
||||||
|
prefetch_read(varying_int64, <$1 x i64>)
|
||||||
|
prefetch_read(varying_float, <$1 x float>)
|
||||||
|
prefetch_read(varying_double, <$1 x double>)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; stdlib transcendentals
|
;; stdlib transcendentals
|
||||||
;;
|
;;
|
||||||
|
|||||||
@@ -79,6 +79,7 @@ Contents:
|
|||||||
+ `Packed Load and Store Operations`_
|
+ `Packed Load and Store Operations`_
|
||||||
+ `Conversions To and From Half-Precision Floats`_
|
+ `Conversions To and From Half-Precision Floats`_
|
||||||
+ `Atomic Operations and Memory Fences`_
|
+ `Atomic Operations and Memory Fences`_
|
||||||
|
+ `Prefetches`_
|
||||||
+ `Low-Level Bits`_
|
+ `Low-Level Bits`_
|
||||||
|
|
||||||
* `Interoperability with the Application`_
|
* `Interoperability with the Application`_
|
||||||
@@ -1990,6 +1991,39 @@ code.
|
|||||||
void memory_barrier();
|
void memory_barrier();
|
||||||
|
|
||||||
|
|
||||||
|
Prefetches
|
||||||
|
----------
|
||||||
|
|
||||||
|
The standard library has a variety of functions to prefetch data into the
|
||||||
|
processor's cache. While modern CPUs have automatic prefetchers that do a
|
||||||
|
reasonable job of prefetching data to the cache before its needed, high
|
||||||
|
performance applications may find it helpful to prefetch data before it's
|
||||||
|
needed.
|
||||||
|
|
||||||
|
For example, this code shows how to prefetch data to the processor's L1
|
||||||
|
cache while iterating over the items in an array.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
uniform int32 array[...];
|
||||||
|
for (uniform int i = 0; i < count; ++i) {
|
||||||
|
// do computation with array[i]
|
||||||
|
prefetch_l1(array[i+32]);
|
||||||
|
}
|
||||||
|
|
||||||
|
The standard library has routines to prefetch to the L1, L2, and L3
|
||||||
|
caches. It also has a variant, ``prefetch_nt()``, that indicates that the
|
||||||
|
value being prefetched isn't expected to be used more than once (so should
|
||||||
|
be high priority to be evicted from the cache).
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void prefetch_{l1,l2,l3,nt}(reference TYPE)
|
||||||
|
|
||||||
|
These functions are available for all of the basic types in the
|
||||||
|
language--``int8``, ``int16``, ``int32``, ``float``, and so forth.
|
||||||
|
|
||||||
|
|
||||||
Low-Level Bits
|
Low-Level Bits
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
|
|||||||
37
stdlib.ispc
37
stdlib.ispc
@@ -315,6 +315,39 @@ static inline uniform int lanemask() {
|
|||||||
return __movmsk(__mask);
|
return __movmsk(__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// Prefetching
|
||||||
|
|
||||||
|
#define PREFETCHES(NAME, TYPE) \
|
||||||
|
static inline void prefetch_l1(const reference TYPE ptr) { \
|
||||||
|
__prefetch_read_1_##NAME##_refsconst(ptr); \
|
||||||
|
} \
|
||||||
|
static inline void prefetch_l2(const reference TYPE ptr) { \
|
||||||
|
__prefetch_read_2_##NAME##_refsconst(ptr); \
|
||||||
|
} \
|
||||||
|
static inline void prefetch_l3(const reference TYPE ptr) { \
|
||||||
|
__prefetch_read_3_##NAME##_refsconst(ptr); \
|
||||||
|
} \
|
||||||
|
static inline void prefetch_nt(const reference TYPE ptr) { \
|
||||||
|
__prefetch_read_nt_##NAME##_refsconst(ptr); \
|
||||||
|
}
|
||||||
|
|
||||||
|
PREFETCHES(uniform_int8, uniform int8)
|
||||||
|
PREFETCHES(uniform_int16, uniform int16)
|
||||||
|
PREFETCHES(uniform_int32, uniform int32)
|
||||||
|
PREFETCHES(uniform_int64, uniform int64)
|
||||||
|
PREFETCHES(uniform_float, uniform float)
|
||||||
|
PREFETCHES(uniform_double, uniform double)
|
||||||
|
|
||||||
|
PREFETCHES(varying_int8, int8)
|
||||||
|
PREFETCHES(varying_int16, int16)
|
||||||
|
PREFETCHES(varying_int32, int32)
|
||||||
|
PREFETCHES(varying_int64, int64)
|
||||||
|
PREFETCHES(varying_float, float)
|
||||||
|
PREFETCHES(varying_double, double)
|
||||||
|
|
||||||
|
#undef PREFETCHES
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Horizontal ops / reductions
|
// Horizontal ops / reductions
|
||||||
|
|
||||||
@@ -522,6 +555,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
|
|||||||
|
|
||||||
DEFINE_ATOMIC_OP(double,double,swap,swap)
|
DEFINE_ATOMIC_OP(double,double,swap,swap)
|
||||||
|
|
||||||
|
#undef DEFINE_ATOMIC_OP
|
||||||
|
|
||||||
#define ATOMIC_DECL_CMPXCHG(TA, TB) \
|
#define ATOMIC_DECL_CMPXCHG(TA, TB) \
|
||||||
static inline TA atomic_compare_exchange_global( \
|
static inline TA atomic_compare_exchange_global( \
|
||||||
uniform reference TA ref, TA oldval, TA newval) { \
|
uniform reference TA ref, TA oldval, TA newval) { \
|
||||||
@@ -538,6 +573,8 @@ ATOMIC_DECL_CMPXCHG(int64, int64)
|
|||||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
|
ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
|
||||||
ATOMIC_DECL_CMPXCHG(double, double)
|
ATOMIC_DECL_CMPXCHG(double, double)
|
||||||
|
|
||||||
|
#undef ATOMIC_DECL_CMPXCHG
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Floating-Point Math
|
// Floating-Point Math
|
||||||
|
|
||||||
|
|||||||
7
type.cpp
7
type.cpp
@@ -1541,7 +1541,7 @@ StructType::GetElementNumber(const std::string &n) const {
|
|||||||
// ReferenceType
|
// ReferenceType
|
||||||
|
|
||||||
ReferenceType::ReferenceType(const Type *t, bool ic)
|
ReferenceType::ReferenceType(const Type *t, bool ic)
|
||||||
: isConst(ic), targetType(t) {
|
: isConst(ic), targetType(t->GetAsNonConstType()) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2136,8 +2136,9 @@ Type::Equal(const Type *a, const Type *b) {
|
|||||||
const ReferenceType *rta = dynamic_cast<const ReferenceType *>(a);
|
const ReferenceType *rta = dynamic_cast<const ReferenceType *>(a);
|
||||||
const ReferenceType *rtb = dynamic_cast<const ReferenceType *>(b);
|
const ReferenceType *rtb = dynamic_cast<const ReferenceType *>(b);
|
||||||
if (rta != NULL && rtb != NULL)
|
if (rta != NULL && rtb != NULL)
|
||||||
return Type::Equal(rta->GetReferenceTarget(),
|
return ((rta->IsConstType() == rtb->IsConstType()) &&
|
||||||
rtb->GetReferenceTarget());
|
Type::Equal(rta->GetReferenceTarget(),
|
||||||
|
rtb->GetReferenceTarget()));
|
||||||
|
|
||||||
const FunctionType *fta = dynamic_cast<const FunctionType *>(a);
|
const FunctionType *fta = dynamic_cast<const FunctionType *>(a);
|
||||||
const FunctionType *ftb = dynamic_cast<const FunctionType *>(b);
|
const FunctionType *ftb = dynamic_cast<const FunctionType *>(b);
|
||||||
|
|||||||
Reference in New Issue
Block a user