diff --git a/cbackend.cpp b/cbackend.cpp index 3db2d504..2ac6cc0b 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -63,7 +63,13 @@ #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Verifier.h" +#if defined(LLVM_3_5) + #include "llvm/IR/Verifier.h" + #include +#else + #include "llvm/Analysis/Verifier.h" + #include +#endif #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/IntrinsicLowering.h" @@ -102,7 +108,6 @@ #include #include #include -#include #include // Some ms header decided to define setjmp as _setjmp, undo this for this file. #ifdef _MSC_VER @@ -241,9 +246,7 @@ namespace { class CBEMCAsmInfo : public llvm::MCAsmInfo { public: CBEMCAsmInfo() { -#if defined(LLVM_3_5) - GlobalPrefix = '\0'; -#else +#if !defined(LLVM_3_5) GlobalPrefix = ""; #endif PrivateGlobalPrefix = ""; diff --git a/docs/ispc.rst b/docs/ispc.rst index 9464dcde..2c41301c 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -4743,13 +4743,13 @@ have a declaration like: }; Because ``varying`` types have size that depends on the size of the gang of -program instances, ``ispc`` prohibits any varying types from being used in -parameters to functions with the ``export`` qualifier. (``ispc`` also -prohibits passing structures that themselves have varying types as members, -etc.) Thus, all datatypes that are shared with the application must have -the ``uniform`` or ``soa`` rate qualifier applied to them. (See `Use -"Structure of Arrays" Layout When Possible`_ in the Performance Guide for -more discussion of how to load vectors of SOA data from the application.) +program instances, ``ispc`` has restrictrictions on using varying types in +parameters to functions with the ``export`` qualifier. ``ispc `` prohibits +parameters to exported functions to have varying type unless the parameter is +of pointer type. (That is, ``varying float`` isn't allowed, but ``varying float * uniform`` +(uniform pointer to varying float) is permitted.) Care must be taken +by the programmer to ensure that the data being accessed through any +pointers to varying data has the correct organization. Similarly, ``struct`` types shared with the application can also have embedded pointers. @@ -4770,6 +4770,30 @@ On the ``ispc`` side, the corresponding ``struct`` declaration is: float * uniform foo, * uniform bar; }; +If a pointer to a varying ``struct`` type appears in an exported function, +the generated header file will have a definition like (for 8-wide SIMD): + +:: + + // C/C++ code + struct Node { + int count[8]; + float pos[3][8]; + }; + + +In the case of multiple target compilation, ``ispc`` will generate multiple +header files and a "general" header file with definitions for multiple sizes. +Any pointers to varyings in exported functions will be rewritten as ``void *``. +At runtime, the ``ispc`` dispatch mechanism will cast these pointers to the appropriate +types. Programmers can +provide C/C++ code with a mechanism to determine the gang width used +at runtime by ``ispc`` by creating an exported function that simply +returns the value of ``programCount``. An example of such a function +is provided in the file ``examples/util/util.isph`` included in the ``ispc`` +distribution. + + There is one subtlety related to data layout to be aware of: ``ispc`` stores ``uniform`` short-vector types in memory with their first element at the machine's natural vector alignment (i.e. 16 bytes for a target that is diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc index f9b0be4c..c765b29b 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc @@ -31,6 +31,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#define _3D_TASKING + static inline int mandel(float c_re, float c_im, int count) { float z_re = c_re, z_im = c_im; @@ -57,26 +59,34 @@ task void mandelbrot_scanline(uniform float x0, uniform float dx, uniform float y0, uniform float dy, uniform int width, uniform int height, +#ifdef _3D_TASKING uniform int xspan, uniform int yspan, +#else + uniform int span, +#endif uniform int maxIterations, uniform int output[]) { +#ifdef _3D_TASKING const uniform int xstart = taskIndex0 * xspan; const uniform int xend = min(xstart + xspan, width); - const uniform int ystart = taskIndex1 * yspan; const uniform int yend = min(ystart + yspan, height); - foreach (yi = ystart ... yend, xi = xstart ... xend) { +#else + uniform int ystart = taskIndex * span; + uniform int yend = min((taskIndex+1) * span, (unsigned int)height); + + foreach (yi = ystart ... yend, xi = 0 ... width) { +#endif float x = x0 + xi * dx; float y = y0 + yi * dy; int index = yi * width + xi; output[index] = mandel(x, y, maxIterations); } - } -#if 1 + export void mandelbrot_ispc(uniform float x0, uniform float y0, uniform float x1, uniform float y1, @@ -84,16 +94,17 @@ mandelbrot_ispc(uniform float x0, uniform float y0, uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; +#ifdef _3D_TASKING const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */ const uniform int yspan = 16; - -#if 1 launch [width/xspan, height/yspan] -#else - launch [height/yspan][width/xspan] -#endif - mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, maxIterations, output); -} +#else + uniform int span = 4; + + launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span, + maxIterations, output); #endif +} diff --git a/examples/perfbench/perfbench.cpp b/examples/perfbench/perfbench.cpp index 04e72bd9..1defffe7 100644 --- a/examples/perfbench/perfbench.cpp +++ b/examples/perfbench/perfbench.cpp @@ -69,6 +69,7 @@ static PerfTest tests[] = { { xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" }, { xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" }, { xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" }, + { xyzSumSOA, "serial", (FuncType *) ispc::xyzSumVarying, "ispc", "Varying vector element sum" }, { ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" }, { ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" }, }; diff --git a/examples/perfbench/perfbench.ispc b/examples/perfbench/perfbench.ispc index 38fe6cee..4a189a71 100644 --- a/examples/perfbench/perfbench.ispc +++ b/examples/perfbench/perfbench.ispc @@ -104,6 +104,52 @@ export void xyzSumSOA(uniform float array[], uniform int count, result[2] = reduce_add(zsum); } +export void xyzSumVarying(varying float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float xsum = 0, ysum = 0, zsum = 0; + varying float * uniform ap = array; + assert(programCount <= 8); + + if (programCount == 4) { + for (uniform int i = 0; i < count/3; i += 8) { + float x0 = ap[0]; + float y0 = ap[2]; + float z0 = ap[4]; + + xsum += x0; + ysum += y0; + zsum += z0; + + float x1 = ap[1]; + float y1 = ap[3]; + float z1 = ap[5]; + + xsum += x1; + ysum += y1; + zsum += z1; + + ap += 6; + } + } + else { + for (uniform int i = 0; i < count/3; i += 8) { + // programCount == 8 + float x = ap[0]; + float y = ap[1]; + float z = ap[2]; + + xsum += x; + ysum += y; + zsum += z; + + ap += 3; + } + } + result[0] = reduce_add(xsum); + result[1] = reduce_add(ysum); + result[2] = reduce_add(zsum); +} + export void gathers(uniform float array[], uniform int count, uniform float zeros[], uniform float result[]) { float sum = 0; diff --git a/examples/util/util.isph b/examples/util/util.isph new file mode 100644 index 00000000..1e598dc5 --- /dev/null +++ b/examples/util/util.isph @@ -0,0 +1,5 @@ + +// utility function to read the value of programCount from C/C++ +export uniform int32 get_programCount() { + return programCount; +} diff --git a/func.cpp b/func.cpp index 3923e780..f12f244b 100644 --- a/func.cpp +++ b/func.cpp @@ -71,10 +71,15 @@ #include #include #include -#include +#if defined(LLVM_3_5) + #include + #include +#else + #include + #include +#endif #include #include -#include Function::Function(Symbol *s, Stmt *c) { sym = s; @@ -522,7 +527,14 @@ Function::GenerateIR() { #else appFunction->setDoesNotThrow(); #endif - + // We should iterate from 1 because zero parameter is return. + // We should iterate till getNumParams instead of getNumParams+1 because new + // function is export function and doesn't contain the last parameter "mask". + for (int i = 1; i < function->getFunctionType()->getNumParams(); i++) { + if (function->doesNotAlias(i)) { + appFunction->setDoesNotAlias(i); + } + } g->target->markFuncWithTargetAttr(appFunction); if (appFunction->getName() != functionName) { diff --git a/module.cpp b/module.cpp index a407c2fe..f8936625 100644 --- a/module.cpp +++ b/module.cpp @@ -95,7 +95,13 @@ #include #include #endif -#include +#if defined(LLVM_3_5) + #include + #include +#else + #include + #include +#endif #include #include #include @@ -103,7 +109,6 @@ #include #include #include -#include #include #include @@ -576,9 +581,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE /** Given an arbitrary type, see if it or any of the leaf types contained in it has a type that's illegal to have exported to C/C++ - code--specifically, that it has a varying value in memory, or a pointer - to SOA data (which has a different representation than a regular - pointer. + code. (Note that it's fine for the original struct or a contained struct to be varying, so long as all of its members have bound 'uniform' @@ -610,15 +613,18 @@ lRecursiveCheckValidParamType(const Type *t, bool vectorOk) { const PointerType *pt = CastType(t); if (pt != NULL) { - if (pt->IsSlice() || pt->IsVaryingType()) - return false; + // Only allow exported uniform pointers + // Uniform pointers to varying data, however, are ok. + if (pt->IsVaryingType()) + return false; + else return lRecursiveCheckValidParamType(pt->GetBaseType(), true); } - if (t->IsVaryingType()) - return false; - else - return true; + if (t->IsVaryingType() && !vectorOk) + return false; + else + return true; } @@ -634,8 +640,8 @@ lCheckExportedParameterTypes(const Type *type, const std::string &name, if (CastType(type)) Error(pos, "Varying pointer type parameter \"%s\" is illegal " "in an exported function.", name.c_str()); - else if (CastType(type->GetBaseType())) - Error(pos, "Struct parameter \"%s\" with varying or vector typed " + if (CastType(type->GetBaseType())) + Error(pos, "Struct parameter \"%s\" with vector typed " "member(s) is illegal in an exported function.", name.c_str()); else if (CastType(type)) Error(pos, "Vector-typed parameter \"%s\" is illegal in an exported " @@ -847,9 +853,11 @@ Module::AddFunctionDeclaration(const std::string &name, const SourcePos &argPos = functionType->GetParameterSourcePos(i); // If the function is exported, make sure that the parameter - // doesn't have any varying stuff going on in it. - if (functionType->isExported) - lCheckExportedParameterTypes(argType, argName, argPos); + // doesn't have any funky stuff going on in it. + // JCB nomosoa - Varying is now a-ok. + if (functionType->isExported) { + lCheckExportedParameterTypes(argType, argName, argPos); + } // ISPC assumes that no pointers alias. (It should be possible to // specify when this is not the case, but this should be the @@ -952,7 +960,7 @@ Module::AddExportedTypes(const std::vectorfinalize(); @@ -1019,8 +1027,12 @@ Module::writeOutput(OutputType outputType, const char *outFileName, "has suffix \"%s\"?", fileType, outFileName, suffix); } - if (outputType == Header) - return writeHeader(outFileName); + if (outputType == Header) { + if (DHI) + return writeDispatchHeader(DHI); + else + return writeHeader(outFileName); + } else if (outputType == Deps) return writeDeps(outFileName); else if (outputType == HostStub) @@ -1149,6 +1161,19 @@ lGetElementStructType(const Type *t) { return NULL; } +static bool +lContainsPtrToVarying(const StructType *st) { + int numElts = st->GetElementCount(); + + for (int j = 0; j < numElts; ++j) { + const Type *t = st->GetElementType(j); + + if (t->IsVaryingType()) return true; + } + + return false; +} + /** Emits a declaration for the given struct to the given file. This function first makes sure that declarations for any structs that are @@ -1156,7 +1181,14 @@ lGetElementStructType(const Type *t) { */ static void lEmitStructDecl(const StructType *st, std::vector *emittedStructs, - FILE *file) { + FILE *file, bool printGenericHeader=false, bool emitUnifs=true) { + + // if we're emitting this for a generic dispatch header file and it's + // struct that only contains uniforms, don't bother if we're emitting uniforms + if (printGenericHeader && !emitUnifs && !lContainsPtrToVarying(st)) { + return; + } + // Has this struct type already been declared? (This happens if it's a // member of another struct for which we emitted a declaration // previously.) @@ -1169,19 +1201,33 @@ lEmitStructDecl(const StructType *st, std::vector *emittedSt const StructType *elementStructType = lGetElementStructType(st->GetElementType(i)); if (elementStructType != NULL) - lEmitStructDecl(elementStructType, emittedStructs, file); + lEmitStructDecl(elementStructType, emittedStructs, file, printGenericHeader, emitUnifs); } // And now it's safe to declare this one emittedStructs->push_back(st); - fprintf(file, "#ifndef __ISPC_STRUCT_%s__\n",st->GetStructName().c_str()); - fprintf(file, "#define __ISPC_STRUCT_%s__\n",st->GetStructName().c_str()); + + if (printGenericHeader && lContainsPtrToVarying(st)) { + fprintf(file, "#ifndef __ISPC_STRUCT_%s%d__\n", + st->GetStructName().c_str(), + g->target->getVectorWidth()); + fprintf(file, "#define __ISPC_STRUCT_%s%d__\n", + st->GetStructName().c_str(), + g->target->getVectorWidth()); + } + else { + fprintf(file, "#ifndef __ISPC_STRUCT_%s__\n",st->GetStructName().c_str()); + fprintf(file, "#define __ISPC_STRUCT_%s__\n",st->GetStructName().c_str()); + } fprintf(file, "struct %s", st->GetStructName().c_str()); if (st->GetSOAWidth() > 0) // This has to match the naming scheme in // StructType::GetCDeclaration(). fprintf(file, "_SOA%d", st->GetSOAWidth()); + if (printGenericHeader && lContainsPtrToVarying(st)) { + fprintf(file, "%d", g->target->getVectorWidth()); + } fprintf(file, " {\n"); for (int i = 0; i < st->GetElementCount(); ++i) { @@ -1198,10 +1244,10 @@ lEmitStructDecl(const StructType *st, std::vector *emittedSt header file, emit their declarations. */ static void -lEmitStructDecls(std::vector &structTypes, FILE *file) { +lEmitStructDecls(std::vector &structTypes, FILE *file, bool printGenericHeader=false, bool emitUnifs=true) { std::vector emittedStructs; for (unsigned int i = 0; i < structTypes.size(); ++i) - lEmitStructDecl(structTypes[i], &emittedStructs, file); + lEmitStructDecl(structTypes[i], &emittedStructs, file, printGenericHeader, emitUnifs); } @@ -1366,14 +1412,20 @@ lGetExportedParamTypes(const std::vector &funcs, static void lPrintFunctionDeclarations(FILE *file, const std::vector &funcs, - bool useExternC=1) { + bool useExternC=1, bool rewriteForDispatch=false) { if (useExternC) fprintf(file, "#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)\nextern \"C\" {\n#endif // __cplusplus\n"); // fprintf(file, "#ifdef __cplusplus\nextern \"C\" {\n#endif // __cplusplus\n"); for (unsigned int i = 0; i < funcs.size(); ++i) { const FunctionType *ftype = CastType(funcs[i]->type); Assert(ftype); - std::string decl = ftype->GetCDeclaration(funcs[i]->name); + std::string decl; + if (rewriteForDispatch) { + decl = ftype->GetCDeclarationForDispatch(funcs[i]->name); + } + else { + decl = ftype->GetCDeclaration(funcs[i]->name); + } fprintf(file, " extern %s;\n", decl.c_str()); } if (useExternC) @@ -1819,6 +1871,137 @@ Module::writeHeader(const char *fn) { return true; } +struct DispatchHeaderInfo { + bool EmitUnifs; + bool EmitFuncs; + bool EmitFrontMatter; + bool EmitBackMatter; + bool Emit4; + bool Emit8; + bool Emit16; + FILE *file; + const char *fn; +}; + +bool +Module::writeDispatchHeader(DispatchHeaderInfo *DHI) { + FILE *f = DHI->file; + + if (DHI->EmitFrontMatter) { + fprintf(f, "//\n// %s\n// (Header automatically generated by the ispc compiler.)\n", DHI->fn); + fprintf(f, "// DO NOT EDIT THIS FILE.\n//\n\n"); + } + // Create a nice guard string from the filename, turning any + // non-number/letter characters into underbars + std::string guard = "ISPC_"; + const char *p = DHI->fn; + while (*p) { + if (isdigit(*p)) + guard += *p; + else if (isalpha(*p)) + guard += toupper(*p); + else + guard += "_"; + ++p; + } + if (DHI->EmitFrontMatter) { + fprintf(f, "#ifndef %s\n#define %s\n\n", guard.c_str(), guard.c_str()); + + fprintf(f, "#include \n\n"); + + + if (g->emitInstrumentation) { + fprintf(f, "#define ISPC_INSTRUMENTATION 1\n"); + fprintf(f, "extern \"C\" {\n"); + fprintf(f, " void ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask);\n"); + fprintf(f, "}\n"); + } + + // end namespace + fprintf(f, "\n"); + fprintf(f, "\n#ifdef __cplusplus\nnamespace ispc { /* namespace */\n#endif // __cplusplus\n\n"); + DHI->EmitFrontMatter = false; + } + + + // Collect single linear arrays of the exported and extern "C" + // functions + std::vector exportedFuncs, externCFuncs; + m->symbolTable->GetMatchingFunctions(lIsExported, &exportedFuncs); + m->symbolTable->GetMatchingFunctions(lIsExternC, &externCFuncs); + + int programCount = g->target->getVectorWidth(); + + if ((DHI->Emit4 && (programCount == 4)) || + (DHI->Emit8 && (programCount == 8)) || + (DHI->Emit16 && (programCount == 16))) { + // Get all of the struct, vector, and enumerant types used as function + // parameters. These vectors may have repeats. + std::vector exportedStructTypes; + std::vector exportedEnumTypes; + std::vector exportedVectorTypes; + lGetExportedParamTypes(exportedFuncs, &exportedStructTypes, + &exportedEnumTypes, &exportedVectorTypes); + lGetExportedParamTypes(externCFuncs, &exportedStructTypes, + &exportedEnumTypes, &exportedVectorTypes); + + // Go through the explicitly exported types + for (int i = 0; i < (int)exportedTypes.size(); ++i) { + if (const StructType *st = CastType(exportedTypes[i].first)) + exportedStructTypes.push_back(st->GetAsUniformType()); + else if (const EnumType *et = CastType(exportedTypes[i].first)) + exportedEnumTypes.push_back(et->GetAsUniformType()); + else if (const VectorType *vt = CastType(exportedTypes[i].first)) + exportedVectorTypes.push_back(vt->GetAsUniformType()); + else + FATAL("Unexpected type in export list"); + } + + + // And print them + if (DHI->EmitUnifs) { + lEmitVectorTypedefs(exportedVectorTypes, f); + lEmitEnumDecls(exportedEnumTypes, f); + } + lEmitStructDecls(exportedStructTypes, f, true, DHI->EmitUnifs); + + // Update flags + DHI->EmitUnifs = false; + if (programCount == 4) { + DHI->Emit4 = false; + } + else if (programCount == 8) { + DHI->Emit8 = false; + } + else if (programCount == 16) { + DHI->Emit16 = false; + } + } + if (DHI->EmitFuncs) { + // emit function declarations for exported stuff... + if (exportedFuncs.size() > 0) { + fprintf(f, "\n"); + fprintf(f, "///////////////////////////////////////////////////////////////////////////\n"); + fprintf(f, "// Functions exported from ispc code\n"); + fprintf(f, "///////////////////////////////////////////////////////////////////////////\n"); + lPrintFunctionDeclarations(f, exportedFuncs, 1, true); + fprintf(f, "\n"); + } + DHI->EmitFuncs = false; + } + + if (DHI->EmitBackMatter) { + // end namespace + fprintf(f, "\n"); + fprintf(f, "\n#ifdef __cplusplus\n} /* namespace */\n#endif // __cplusplus\n"); + + // end guard + fprintf(f, "\n#endif // %s\n", guard.c_str()); + DHI->EmitBackMatter = false; + } + + return true; +} void Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostream) const @@ -2033,13 +2216,16 @@ lSymbolIsExported(const Symbol *s) { // llvm::Function that were compiled for different compilation target ISAs. struct FunctionTargetVariants { FunctionTargetVariants() { - for (int i = 0; i < Target::NUM_ISAS; ++i) + for (int i = 0; i < Target::NUM_ISAS; ++i) { func[i] = NULL; + FTs[i] = NULL; + } } // The func array is indexed with the Target::ISA enumerant. Some // values may be NULL, indicating that the original function wasn't // compiled to the corresponding target ISA. llvm::Function *func[Target::NUM_ISAS]; + const FunctionType *FTs[Target::NUM_ISAS]; }; @@ -2054,6 +2240,7 @@ lGetExportedFunctions(SymbolTable *symbolTable, for (unsigned int i = 0; i < syms.size(); ++i) { FunctionTargetVariants &ftv = functions[syms[i]->name]; ftv.func[g->target->getISA()] = syms[i]->exportedFunction; + ftv.FTs[g->target->getISA()] = CastType(syms[i]->type); } } @@ -2160,6 +2347,51 @@ lAddExtractedGlobals(llvm::Module *module, } } +static llvm::FunctionType * +lGetVaryingDispatchType(FunctionTargetVariants &funcs) { + llvm::Type *ptrToInt8Ty = llvm::Type::getInt8PtrTy(*g->ctx); + llvm::FunctionType *resultFuncTy = NULL; + + for (int i = 0; i < Target::NUM_ISAS; ++i) { + if (funcs.func[i] == NULL) { + continue; + } + else { + bool foundVarying = false; + const FunctionType *ft = funcs.FTs[i]; + resultFuncTy = funcs.func[i]->getFunctionType(); + + int numArgs = ft->GetNumParameters(); + llvm::SmallVector ftype; + for (int j = 0; j < numArgs; ++j) { + ftype.push_back(resultFuncTy->getParamType(j)); + } + + for (int j = 0; j < numArgs; ++j) { + const Type *arg = ft->GetParameterType(j); + + if (arg->IsPointerType()) { + const Type *baseType = CastType(arg)->GetBaseType(); + // For each varying type pointed to, swap the LLVM pointer type + // with i8 * (as close as we can get to void *) + if (baseType->IsVaryingType()) { + ftype[j] = ptrToInt8Ty; + foundVarying = true; + } + } + } + if (foundVarying) { + resultFuncTy = llvm::FunctionType::get(resultFuncTy->getReturnType(), ftype, false); + } + } + } + + // We should've found at least one variant here + // or else something fishy is going on. + Assert(resultFuncTy); + + return resultFuncTy; +} /** Create the dispatch function for an exported ispc function. This function checks to see which vector ISAs the system the @@ -2187,11 +2419,12 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, // we'll start by generating an 'extern' declaration of each one that // we have in the current module so that we can then call out to that. llvm::Function *targetFuncs[Target::NUM_ISAS]; - llvm::FunctionType *ftype = NULL; + llvm::FunctionType *ftypes[Target::NUM_ISAS]; for (int i = 0; i < Target::NUM_ISAS; ++i) { if (funcs.func[i] == NULL) { targetFuncs[i] = NULL; + ftypes[i] = NULL; continue; } @@ -2202,14 +2435,23 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, // because we only allow uniform stuff to pass through the // export'ed function layer, they should all have the same memory // layout, so this is benign.. - if (ftype == NULL) - ftype = funcs.func[i]->getFunctionType(); + // JCB nomosoa - not anymore... + // add a helper to see if this type has any varying thingies? + // might be hard to detect.... + // If so, return a new type with the pointers to those replaced + // by i8 *'s. + // if (ftype == NULL) + ftypes[i] = funcs.func[i]->getFunctionType(); targetFuncs[i] = - llvm::Function::Create(ftype, llvm::GlobalValue::ExternalLinkage, + llvm::Function::Create(ftypes[i], llvm::GlobalValue::ExternalLinkage, funcs.func[i]->getName(), module); } + // New helper function checks to see if we need to rewrite the + // type for the dispatch function in case of pointers to varyings + llvm::FunctionType *ftype = lGetVaryingDispatchType(funcs); + bool voidReturn = ftype->getReturnType()->isVoidTy(); // Now we can emit the definition of the dispatch function.. @@ -2254,8 +2496,21 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, // the target-specific function. std::vector args; llvm::Function::arg_iterator argIter = dispatchFunc->arg_begin(); - for (; argIter != dispatchFunc->arg_end(); ++argIter) + llvm::Function::arg_iterator targsIter = targetFuncs[i]->arg_begin(); + for (; argIter != dispatchFunc->arg_end(); ++argIter, ++targsIter) { + // Check to see if we rewrote any types in the dispatch function. + // If so, create bitcasts for the appropriate pointer types. + if (argIter->getType() == targsIter->getType()) { args.push_back(argIter); + } + else { + llvm::CastInst *argCast = + llvm::CastInst::CreatePointerCast(argIter, targsIter->getType(), + "dpatch_arg_bitcast", callBBlock); + args.push_back(argCast); + } + + } if (voidReturn) { llvm::CallInst::Create(targetFuncs[i], args, "", callBBlock); llvm::ReturnInst::Create(*g->ctx, callBBlock); @@ -2292,7 +2547,6 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc, } } - // Given a map that holds the mapping from each of the 'export'ed functions // in the ispc program to the target-specific variants of the function, // create a llvm::Module that has a dispatch function for each exported @@ -2452,6 +2706,28 @@ Module::CompileAndOutput(const char *srcFile, std::map exportedFunctions; std::vector globals[Target::NUM_ISAS]; int errorCount = 0; + + // Handle creating a "generic" header file for multiple targets + // that use exported varyings + DispatchHeaderInfo DHI; + if ((targets.size() > 1) && (headerFileName != NULL)) { + DHI.file = fopen(headerFileName, "w"); + if (!DHI.file) { + perror("fopen"); + return false; + } + DHI.fn = headerFileName; + DHI.EmitUnifs = true; + DHI.EmitFuncs = true; + DHI.EmitFrontMatter = true; + DHI.Emit4 = true; + DHI.Emit8 = true; + DHI.Emit16 = true; + // This is toggled later. + DHI.EmitBackMatter = false; + } + + for (unsigned int i = 0; i < targets.size(); ++i) { g->target = new Target(arch, cpu, targets[i].c_str(), generatePIC); if (!g->target->isValid()) @@ -2488,9 +2764,26 @@ Module::CompileAndOutput(const char *srcFile, // Only write the generate header file, if desired, the first // time through the loop here. - if (i == 0 && headerFileName != NULL) - if (!m->writeOutput(Module::Header, headerFileName)) - return 1; + if (headerFileName != NULL) { + if (i == targets.size()-1) { + // only print backmatter on the last target. + DHI.EmitBackMatter = true; + } + + const char *isaName = g->target->GetISAString(); + std::string targetHeaderFileName = + lGetTargetFileName(headerFileName, isaName); + // write out a header w/o target name for the first target only + if (!m->writeOutput(Module::Header, headerFileName, "", &DHI)) { + return 1; + } + if (!m->writeOutput(Module::Header, targetHeaderFileName.c_str())) { + return 1; + } + if (i == targets.size()-1) { + fclose(DHI.file); + } + } delete g->target; g->target = NULL; diff --git a/module.h b/module.h index a817d5ba..3609260c 100644 --- a/module.h +++ b/module.h @@ -50,6 +50,8 @@ namespace llvm class raw_string_ostream; } +struct DispatchHeaderInfo; + class Module { public: /** The name of the source file being compiled should be passed as the @@ -171,8 +173,10 @@ private: filename may be NULL, indicating that output should go to standard output. */ bool writeOutput(OutputType ot, const char *filename, - const char *includeFileName = NULL); + const char *includeFileName = NULL, + DispatchHeaderInfo *DHI = 0); bool writeHeader(const char *filename); + bool writeDispatchHeader(DispatchHeaderInfo *DHI); bool writeDeps(const char *filename); bool writeDevStub(const char *filename); bool writeHostStub(const char *filename); diff --git a/opt.cpp b/opt.cpp index 9f2599a5..b0910b90 100644 --- a/opt.cpp +++ b/opt.cpp @@ -68,7 +68,13 @@ #endif #include #include -#include +#if defined(LLVM_3_5) + #include + #include +#else + #include + #include +#endif #include #include #include @@ -86,7 +92,6 @@ #include #endif #include -#include #include #include #include diff --git a/type.cpp b/type.cpp index 95a7a4ef..00795737 100644 --- a/type.cpp +++ b/type.cpp @@ -429,8 +429,7 @@ AtomicType::Mangle() const { std::string AtomicType::GetCDeclaration(const std::string &name) const { std::string ret; - if (variability != Variability::Uniform && - variability != Variability::SOA) { + if (variability == Variability::Unbound) { Assert(m->errorCount > 0); return ret; } @@ -457,9 +456,15 @@ AtomicType::GetCDeclaration(const std::string &name) const { ret += name; } - if (variability == Variability::SOA) { + if (variability == Variability::Varying || + variability == Variability::SOA) { char buf[32]; - sprintf(buf, "[%d]", variability.soaWidth); + // get program count + // g->mangleFunctionsNamesWithTarget - hack check for void * + int vWidth = (variability == Variability::Varying) ? + g->target->getVectorWidth() : + variability.soaWidth; + sprintf(buf, "[%d]", vWidth); ret += buf; } @@ -751,8 +756,7 @@ EnumType::Mangle() const { std::string EnumType::GetCDeclaration(const std::string &varName) const { - if (variability != Variability::Uniform && - variability != Variability::SOA) { + if (variability == Variability::Unbound) { Assert(m->errorCount > 0); return ""; } @@ -768,9 +772,13 @@ EnumType::GetCDeclaration(const std::string &varName) const { ret += varName; } - if (variability == Variability::SOA) { + if (variability == Variability::SOA || + variability == Variability::Varying) { + int vWidth = (variability == Variability::Varying) ? + g->target->getVectorWidth() : + variability.soaWidth; char buf[32]; - sprintf(buf, "[%d]", variability.soaWidth); + sprintf(buf, "[%d]", vWidth); ret += buf; } @@ -1077,8 +1085,7 @@ PointerType::Mangle() const { std::string PointerType::GetCDeclaration(const std::string &name) const { if (isSlice || - (variability != Variability::Uniform && - variability != Variability::SOA)) { + (variability == Variability::Unbound)) { Assert(m->errorCount > 0); return ""; } @@ -1094,9 +1101,13 @@ PointerType::GetCDeclaration(const std::string &name) const { ret += std::string(" "); ret += name; - if (variability == Variability::SOA) { + if (variability == Variability::SOA || + variability == Variability::Varying) { + int vWidth = (variability == Variability::Varying) ? + g->target->getVectorWidth() : + variability.soaWidth; char buf[32]; - sprintf(buf, "[%d]", variability.soaWidth); + sprintf(buf, "[%d]", vWidth); ret += buf; } @@ -1422,6 +1433,7 @@ ArrayType::GetCDeclaration(const std::string &name) const { } int soaWidth = base->GetSOAWidth(); + int vWidth = (base->IsVaryingType()) ? g->target->getVectorWidth() : 0; base = base->GetAsUniformType(); std::string s = base->GetCDeclaration(name); @@ -1443,6 +1455,12 @@ ArrayType::GetCDeclaration(const std::string &name) const { s += buf; } + if (vWidth > 0) { + char buf[16]; + sprintf(buf, "[%d]", vWidth); + s += buf; + } + return s; } @@ -2851,13 +2869,55 @@ FunctionType::GetCDeclaration(const std::string &fname) const { CastType(pt->GetBaseType()) != NULL) { type = new ArrayType(pt->GetBaseType(), 0); } - + if (paramNames[i] != "") - ret += type->GetCDeclaration(paramNames[i]); + ret += type->GetCDeclaration(paramNames[i]); else - ret += type->GetString(); + ret += type->GetString(); if (i != paramTypes.size() - 1) - ret += ", "; + ret += ", "; + } + ret += ")"; + return ret; +} + + +std::string +FunctionType::GetCDeclarationForDispatch(const std::string &fname) const { + std::string ret; + ret += returnType->GetCDeclaration(""); + ret += " "; + ret += fname; + ret += "("; + for (unsigned int i = 0; i < paramTypes.size(); ++i) { + const Type *type = paramTypes[i]; + + // Convert pointers to arrays to unsized arrays, which are more clear + // to print out for multidimensional arrays (i.e. "float foo[][4] " + // versus "float (foo *)[4]"). + const PointerType *pt = CastType(type); + if (pt != NULL && + CastType(pt->GetBaseType()) != NULL) { + type = new ArrayType(pt->GetBaseType(), 0); + } + + // Change pointers to varying thingies to void * + if (pt != NULL && pt->GetBaseType()->IsVaryingType()) { + PointerType *t = PointerType::Void; + + if (paramNames[i] != "") + ret += t->GetCDeclaration(paramNames[i]); + else + ret += t->GetString(); + } + else { + if (paramNames[i] != "") + ret += type->GetCDeclaration(paramNames[i]); + else + ret += type->GetString(); + } + if (i != paramTypes.size() - 1) + ret += ", "; } ret += ")"; return ret; diff --git a/type.h b/type.h index 880f8574..0337be6e 100644 --- a/type.h +++ b/type.h @@ -872,6 +872,7 @@ public: std::string GetString() const; std::string Mangle() const; std::string GetCDeclaration(const std::string &fname) const; + std::string GetCDeclarationForDispatch(const std::string &fname) const; llvm::Type *LLVMType(llvm::LLVMContext *ctx) const; llvm::DIType GetDIType(llvm::DIDescriptor scope) const;