Add various prefetch functions to the standard library.

2011-08-03 12:07:30 -07:00
parent 467f1e71d7
commit 0ac4f7b620
6 changed files with 168 additions and 16 deletions
--- a/5
+++ b/5
@@ -12,7 +12,8 @@ CLANG_LIBS = -lclangFrontend -lclangDriver \
 LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
 LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
-LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
+LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
 LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
 BUILD_DATE=$(shell date +%Y%m%d)
 BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
@@ -105,7 +106,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
 objs/builtins-%.cpp: builtins-%.ll builtins.m4 builtins-sse.ll
 	@echo Creating C++ source from builtin definitions file $<
-	@m4 builtins.m4 $< | ./bitcode2cpp.py $< > $@
+	@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
 objs/builtins-%.o: objs/builtins-%.cpp
 	@echo Compiling $<
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -170,6 +170,27 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
 }
 static void
 lCreateSymbol(const std::string &name, const Type *returnType, 
              const std::vector<const Type *> &argTypes, 
              const llvm::FunctionType *ftype, llvm::Function *func, 
              SymbolTable *symbolTable) {
    SourcePos noPos;
    noPos.name = "__stdlib";
    FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
    // set NULL default arguments
    std::vector<ConstExpr *> defaults;
    for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
        defaults.push_back(NULL);
    funcType->SetArgumentDefaults(defaults);
    Symbol *sym = new Symbol(name, noPos, funcType);
    sym->function = func;
    symbolTable->AddFunction(sym);
 }
 /** Given an LLVM function declaration, synthesize the equivalent ispc
    symbol for the function (if possible).  Returns true on success, false
    on failure.
@@ -221,7 +242,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
        // Iterate over the arguments and try to find their equivalent ispc
        // types.  Track if any of the arguments has an integer type.
-        bool anyIntArgs = false;
+        bool anyIntArgs = false, anyReferenceArgs = false;
        std::vector<const Type *> argTypes;
        for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
            const llvm::Type *llvmArgType = ftype->getParamType(j);
@@ -230,22 +251,26 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
                return false;
            anyIntArgs |= 
                (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
            anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
            argTypes.push_back(type);
        }
        // Always create the symbol the first time through, in particular
        // so that we get symbols for things with no integer types!
-        if (i == 0 || anyIntArgs == true) {
+        if (i == 0 || anyIntArgs == true)
-            FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
+            lCreateSymbol(name, returnType, argTypes, ftype, func, symbolTable);
            // set NULL default arguments
            std::vector<ConstExpr *> defaults;
            for (unsigned int j = 0; j < ftype->getNumParams(); ++j)
                defaults.push_back(NULL);
            funcType->SetArgumentDefaults(defaults);
-            Symbol *sym = new Symbol(name, noPos, funcType);
+        // If there are any reference types, also make a variant of the
-            sym->function = func;
+        // symbol that has them as const references.  This obviously
-            symbolTable->AddFunction(sym);
+        // doesn't make sense for many builtins, but we'll give the stdlib
        // the option to call one if it needs one.
        if (anyReferenceArgs == true) {
            for (unsigned int j = 0; j < argTypes.size(); ++j) {
                if (dynamic_cast<const ReferenceType *>(argTypes[j]) != NULL)
                    argTypes[j] = argTypes[j]->GetAsConstType();
                lCreateSymbol(name + "_refsconst", returnType, argTypes, 
                              ftype, func, symbolTable);
            }
        }
    }
--- a/builtins.m4
+++ b/builtins.m4
@@ -557,6 +557,41 @@ define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $
 }
 ')
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; prefetch definitions
 ; prefetch has a new parameter in LLVM3.0, to distinguish between instruction
 ; and data caches--the declaration is now:
 ; declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality,
 ;                             i32 %cachetype)  (cachetype 1 == data cache)
 ; however, the version below seems to still work...
 declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
 define(`prefetch_read', `
 define internal void @__prefetch_read_1_$1($2 *) alwaysinline {
  %ptr8 = bitcast $2 * %0 to i8 *
  call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
  ret void
 }
 define internal void @__prefetch_read_2_$1($2 *) alwaysinline {
  %ptr8 = bitcast $2 * %0 to i8 *
  call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
  ret void
 }
 define internal void @__prefetch_read_3_$1($2 *) alwaysinline {
  %ptr8 = bitcast $2 * %0 to i8 *
  call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
  ret void
 }
 define internal void @__prefetch_read_nt_$1($2 *) alwaysinline {
  %ptr8 = bitcast $2 * %0 to i8 *
  call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
  ret void
 }
 ')
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 define(`stdlib_core', `
@@ -779,6 +814,25 @@ define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone al
  ret <$1 x i32> %0
 }
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; prefetching
 prefetch_read(uniform_bool, i1)
 prefetch_read(uniform_int8, i8)
 prefetch_read(uniform_int16, i16)
 prefetch_read(uniform_int32, i32)
 prefetch_read(uniform_int64, i64)
 prefetch_read(uniform_float, float)
 prefetch_read(uniform_double, double)
 prefetch_read(varying_bool, <$1 x i32>)
 prefetch_read(varying_int8, <$1 x i8>)
 prefetch_read(varying_int16, <$1 x i16>)
 prefetch_read(varying_int32, <$1 x i32>)
 prefetch_read(varying_int64, <$1 x i64>)
 prefetch_read(varying_float, <$1 x float>)
 prefetch_read(varying_double, <$1 x double>)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; stdlib transcendentals
 ;;
--- a/docs/ispc.txt
+++ b/docs/ispc.txt
@@ -79,6 +79,7 @@ Contents:
  + `Packed Load and Store Operations`_
  + `Conversions To and From Half-Precision Floats`_
  + `Atomic Operations and Memory Fences`_
  + `Prefetches`_
  + `Low-Level Bits`_
 * `Interoperability with the Application`_
@@ -1990,6 +1991,39 @@ code.
    void memory_barrier();
 Prefetches
 ----------
 The standard library has a variety of functions to prefetch data into the
 processor's cache.  While modern CPUs have automatic prefetchers that do a
 reasonable job of prefetching data to the cache before its needed, high
 performance applications may find it helpful to prefetch data before it's
 needed.
 For example, this code shows how to prefetch data to the processor's L1
 cache while iterating over the items in an array.  
 ::
   uniform int32 array[...];
   for (uniform int i = 0; i < count; ++i) {
       // do computation with array[i]
       prefetch_l1(array[i+32]);
   }
 The standard library has routines to prefetch to the L1, L2, and L3
 caches.  It also has a variant, ``prefetch_nt()``, that indicates that the
 value being prefetched isn't expected to be used more than once (so should
 be high priority to be evicted from the cache).
 ::
    void prefetch_{l1,l2,l3,nt}(reference TYPE)
 These functions are available for all of the basic types in the
 language--``int8``, ``int16``, ``int32``, ``float``, and so forth.
 Low-Level Bits
 --------------
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -315,6 +315,39 @@ static inline uniform int lanemask() {
    return __movmsk(__mask);
 }
 ///////////////////////////////////////////////////////////////////////////
 // Prefetching
 #define PREFETCHES(NAME, TYPE)                                  \
 static inline void prefetch_l1(const reference TYPE ptr) {      \
    __prefetch_read_1_##NAME##_refsconst(ptr);                  \
 }                                                               \
 static inline void prefetch_l2(const reference TYPE ptr) {      \
    __prefetch_read_2_##NAME##_refsconst(ptr);                  \
 }                                                               \
 static inline void prefetch_l3(const reference TYPE ptr) {      \
    __prefetch_read_3_##NAME##_refsconst(ptr);                  \
 }                                                               \
 static inline void prefetch_nt(const reference TYPE ptr) {     \
     __prefetch_read_nt_##NAME##_refsconst(ptr);                \
 }
 PREFETCHES(uniform_int8, uniform int8)
 PREFETCHES(uniform_int16, uniform int16)
 PREFETCHES(uniform_int32, uniform int32)
 PREFETCHES(uniform_int64, uniform int64)
 PREFETCHES(uniform_float, uniform float)
 PREFETCHES(uniform_double, uniform double)
 PREFETCHES(varying_int8, int8)
 PREFETCHES(varying_int16, int16)
 PREFETCHES(varying_int32, int32)
 PREFETCHES(varying_int64, int64)
 PREFETCHES(varying_float, float)
 PREFETCHES(varying_double, double)
 #undef PREFETCHES
 ///////////////////////////////////////////////////////////////////////////
 // Horizontal ops / reductions
@@ -522,6 +555,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
 DEFINE_ATOMIC_OP(double,double,swap,swap)
 #undef DEFINE_ATOMIC_OP
 #define ATOMIC_DECL_CMPXCHG(TA, TB)                                        \
 static inline TA atomic_compare_exchange_global(                           \
         uniform reference TA ref, TA oldval, TA newval) {                 \
@@ -538,6 +573,8 @@ ATOMIC_DECL_CMPXCHG(int64, int64)
 ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
 ATOMIC_DECL_CMPXCHG(double, double)
 #undef ATOMIC_DECL_CMPXCHG
 ///////////////////////////////////////////////////////////////////////////
 // Floating-Point Math
--- a/type.cpp
+++ b/type.cpp
@@ -1541,7 +1541,7 @@ StructType::GetElementNumber(const std::string &n) const {
 // ReferenceType
 ReferenceType::ReferenceType(const Type *t, bool ic) 
-    : isConst(ic), targetType(t) {
+    : isConst(ic), targetType(t->GetAsNonConstType()) {
 }
@@ -2136,8 +2136,9 @@ Type::Equal(const Type *a, const Type *b) {
    const ReferenceType *rta = dynamic_cast<const ReferenceType *>(a);
    const ReferenceType *rtb = dynamic_cast<const ReferenceType *>(b);
    if (rta != NULL && rtb != NULL)
-        return Type::Equal(rta->GetReferenceTarget(),
+        return ((rta->IsConstType() == rtb->IsConstType()) &&
-                           rtb->GetReferenceTarget());
+                Type::Equal(rta->GetReferenceTarget(),
                            rtb->GetReferenceTarget()));
    const FunctionType *fta = dynamic_cast<const FunctionType *>(a);
    const FunctionType *ftb = dynamic_cast<const FunctionType *>(b);