From 0ac4f7b6207136eefe1db53a9ef215c04de731c5 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 3 Aug 2011 12:07:30 -0700 Subject: [PATCH] Add various prefetch functions to the standard library. --- Makefile | 5 +++-- builtins.cpp | 47 +++++++++++++++++++++++++++++++++----------- builtins.m4 | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ docs/ispc.txt | 34 ++++++++++++++++++++++++++++++++ stdlib.ispc | 37 +++++++++++++++++++++++++++++++++++ type.cpp | 7 ++++--- 6 files changed, 168 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 46e60dad..0966c8c2 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,8 @@ CLANG_LIBS = -lclangFrontend -lclangDriver \ LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl LLVM_CXXFLAGS=$(shell llvm-config --cppflags) -LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/) +LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/) +LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION) BUILD_DATE=$(shell date +%Y%m%d) BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1) @@ -105,7 +106,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc objs/builtins-%.cpp: builtins-%.ll builtins.m4 builtins-sse.ll @echo Creating C++ source from builtin definitions file $< - @m4 builtins.m4 $< | ./bitcode2cpp.py $< > $@ + @m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@ objs/builtins-%.o: objs/builtins-%.cpp @echo Compiling $< diff --git a/builtins.cpp b/builtins.cpp index 2a02ceab..5b4f4d2e 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -170,6 +170,27 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { } +static void +lCreateSymbol(const std::string &name, const Type *returnType, + const std::vector &argTypes, + const llvm::FunctionType *ftype, llvm::Function *func, + SymbolTable *symbolTable) { + SourcePos noPos; + noPos.name = "__stdlib"; + + FunctionType *funcType = new FunctionType(returnType, argTypes, noPos); + // set NULL default arguments + std::vector defaults; + for (unsigned int j = 0; j < ftype->getNumParams(); ++j) + defaults.push_back(NULL); + funcType->SetArgumentDefaults(defaults); + + Symbol *sym = new Symbol(name, noPos, funcType); + sym->function = func; + symbolTable->AddFunction(sym); +} + + /** Given an LLVM function declaration, synthesize the equivalent ispc symbol for the function (if possible). Returns true on success, false on failure. @@ -221,7 +242,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { // Iterate over the arguments and try to find their equivalent ispc // types. Track if any of the arguments has an integer type. - bool anyIntArgs = false; + bool anyIntArgs = false, anyReferenceArgs = false; std::vector argTypes; for (unsigned int j = 0; j < ftype->getNumParams(); ++j) { const llvm::Type *llvmArgType = ftype->getParamType(j); @@ -230,22 +251,26 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { return false; anyIntArgs |= (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false); + anyReferenceArgs |= (dynamic_cast(type) != NULL); argTypes.push_back(type); } // Always create the symbol the first time through, in particular // so that we get symbols for things with no integer types! - if (i == 0 || anyIntArgs == true) { - FunctionType *funcType = new FunctionType(returnType, argTypes, noPos); - // set NULL default arguments - std::vector defaults; - for (unsigned int j = 0; j < ftype->getNumParams(); ++j) - defaults.push_back(NULL); - funcType->SetArgumentDefaults(defaults); + if (i == 0 || anyIntArgs == true) + lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable); - Symbol *sym = new Symbol(name, noPos, funcType); - sym->function = func; - symbolTable->AddFunction(sym); + // If there are any reference types, also make a variant of the + // symbol that has them as const references. This obviously + // doesn't make sense for many builtins, but we'll give the stdlib + // the option to call one if it needs one. + if (anyReferenceArgs == true) { + for (unsigned int j = 0; j < argTypes.size(); ++j) { + if (dynamic_cast(argTypes[j]) != NULL) + argTypes[j] = argTypes[j]->GetAsConstType(); + lCreateSymbol(name + "_refsconst", returnType, argTypes, + ftype, func, symbolTable); + } } } diff --git a/builtins.m4 b/builtins.m4 index 661d9ba7..47158292 100644 --- a/builtins.m4 +++ b/builtins.m4 @@ -557,6 +557,41 @@ define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $ } ') +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; prefetch definitions + +; prefetch has a new parameter in LLVM3.0, to distinguish between instruction +; and data caches--the declaration is now: +; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality, +; i32 %cachetype) (cachetype 1 == data cache) +; however, the version below seems to still work... + +declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality) + +define(`prefetch_read', ` +define internal void @__prefetch_read_1_$1($2 *) alwaysinline { + %ptr8 = bitcast $2 * %0 to i8 * + call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3) + ret void +} +define internal void @__prefetch_read_2_$1($2 *) alwaysinline { + %ptr8 = bitcast $2 * %0 to i8 * + call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2) + ret void +} +define internal void @__prefetch_read_3_$1($2 *) alwaysinline { + %ptr8 = bitcast $2 * %0 to i8 * + call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1) + ret void +} +define internal void @__prefetch_read_nt_$1($2 *) alwaysinline { + %ptr8 = bitcast $2 * %0 to i8 * + call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0) + ret void +} +') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define(`stdlib_core', ` @@ -779,6 +814,25 @@ define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone al ret <$1 x i32> %0 } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; prefetching + +prefetch_read(uniform_bool, i1) +prefetch_read(uniform_int8, i8) +prefetch_read(uniform_int16, i16) +prefetch_read(uniform_int32, i32) +prefetch_read(uniform_int64, i64) +prefetch_read(uniform_float, float) +prefetch_read(uniform_double, double) + +prefetch_read(varying_bool, <$1 x i32>) +prefetch_read(varying_int8, <$1 x i8>) +prefetch_read(varying_int16, <$1 x i16>) +prefetch_read(varying_int32, <$1 x i32>) +prefetch_read(varying_int64, <$1 x i64>) +prefetch_read(varying_float, <$1 x float>) +prefetch_read(varying_double, <$1 x double>) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; stdlib transcendentals ;; diff --git a/docs/ispc.txt b/docs/ispc.txt index b72a4849..3ba925dd 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -79,6 +79,7 @@ Contents: + `Packed Load and Store Operations`_ + `Conversions To and From Half-Precision Floats`_ + `Atomic Operations and Memory Fences`_ + + `Prefetches`_ + `Low-Level Bits`_ * `Interoperability with the Application`_ @@ -1990,6 +1991,39 @@ code. void memory_barrier(); +Prefetches +---------- + +The standard library has a variety of functions to prefetch data into the +processor's cache. While modern CPUs have automatic prefetchers that do a +reasonable job of prefetching data to the cache before its needed, high +performance applications may find it helpful to prefetch data before it's +needed. + +For example, this code shows how to prefetch data to the processor's L1 +cache while iterating over the items in an array. + +:: + + uniform int32 array[...]; + for (uniform int i = 0; i < count; ++i) { + // do computation with array[i] + prefetch_l1(array[i+32]); + } + +The standard library has routines to prefetch to the L1, L2, and L3 +caches. It also has a variant, ``prefetch_nt()``, that indicates that the +value being prefetched isn't expected to be used more than once (so should +be high priority to be evicted from the cache). + +:: + + void prefetch_{l1,l2,l3,nt}(reference TYPE) + +These functions are available for all of the basic types in the +language--``int8``, ``int16``, ``int32``, ``float``, and so forth. + + Low-Level Bits -------------- diff --git a/stdlib.ispc b/stdlib.ispc index bbd9515b..9907904f 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -315,6 +315,39 @@ static inline uniform int lanemask() { return __movmsk(__mask); } +/////////////////////////////////////////////////////////////////////////// +// Prefetching + +#define PREFETCHES(NAME, TYPE) \ +static inline void prefetch_l1(const reference TYPE ptr) { \ + __prefetch_read_1_##NAME##_refsconst(ptr); \ +} \ +static inline void prefetch_l2(const reference TYPE ptr) { \ + __prefetch_read_2_##NAME##_refsconst(ptr); \ +} \ +static inline void prefetch_l3(const reference TYPE ptr) { \ + __prefetch_read_3_##NAME##_refsconst(ptr); \ +} \ + static inline void prefetch_nt(const reference TYPE ptr) { \ + __prefetch_read_nt_##NAME##_refsconst(ptr); \ +} + +PREFETCHES(uniform_int8, uniform int8) +PREFETCHES(uniform_int16, uniform int16) +PREFETCHES(uniform_int32, uniform int32) +PREFETCHES(uniform_int64, uniform int64) +PREFETCHES(uniform_float, uniform float) +PREFETCHES(uniform_double, uniform double) + +PREFETCHES(varying_int8, int8) +PREFETCHES(varying_int16, int16) +PREFETCHES(varying_int32, int32) +PREFETCHES(varying_int64, int64) +PREFETCHES(varying_float, float) +PREFETCHES(varying_double, double) + +#undef PREFETCHES + /////////////////////////////////////////////////////////////////////////// // Horizontal ops / reductions @@ -522,6 +555,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap) DEFINE_ATOMIC_OP(double,double,swap,swap) +#undef DEFINE_ATOMIC_OP + #define ATOMIC_DECL_CMPXCHG(TA, TB) \ static inline TA atomic_compare_exchange_global( \ uniform reference TA ref, TA oldval, TA newval) { \ @@ -538,6 +573,8 @@ ATOMIC_DECL_CMPXCHG(int64, int64) ATOMIC_DECL_CMPXCHG(unsigned int64, int64) ATOMIC_DECL_CMPXCHG(double, double) +#undef ATOMIC_DECL_CMPXCHG + /////////////////////////////////////////////////////////////////////////// // Floating-Point Math diff --git a/type.cpp b/type.cpp index 5b419c88..053a37d3 100644 --- a/type.cpp +++ b/type.cpp @@ -1541,7 +1541,7 @@ StructType::GetElementNumber(const std::string &n) const { // ReferenceType ReferenceType::ReferenceType(const Type *t, bool ic) - : isConst(ic), targetType(t) { + : isConst(ic), targetType(t->GetAsNonConstType()) { } @@ -2136,8 +2136,9 @@ Type::Equal(const Type *a, const Type *b) { const ReferenceType *rta = dynamic_cast(a); const ReferenceType *rtb = dynamic_cast(b); if (rta != NULL && rtb != NULL) - return Type::Equal(rta->GetReferenceTarget(), - rtb->GetReferenceTarget()); + return ((rta->IsConstType() == rtb->IsConstType()) && + Type::Equal(rta->GetReferenceTarget(), + rtb->GetReferenceTarget())); const FunctionType *fta = dynamic_cast(a); const FunctionType *ftb = dynamic_cast(b);