diff --git a/builtins.cpp b/builtins.cpp index 4da5bb44..bdc132ca 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -516,11 +516,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod FATAL("logic error"); } - // Add a declaration of void *ISPCMalloc(int64_t). The user is - // responsible for linking in a definition of this if it's needed by - // the compiled program. + // Add a declaration of void *ISPCMalloc(int64_t size, int alignment). + // The user is responsible for linking in a definition of this if it's + // needed by the compiled program. { std::vector argTypes; argTypes.push_back(llvm::Type::getInt64Ty(*ctx)); + argTypes.push_back(llvm::Type::getInt32Ty(*ctx)); llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType, argTypes, false); llvm::Function *func = diff --git a/ctx.cpp b/ctx.cpp index 9548a777..1087ccc1 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -757,7 +757,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { llvm::Value * -FunctionEmitContext::EmitMalloc(const llvm::Type *ty) { +FunctionEmitContext::EmitMalloc(const llvm::Type *ty, int align) { // Emit code to compute the size of the given type using a GEP with a // NULL base pointer, indexing one element of the given type, and // casting the resulting 'pointer' to an int giving its size. @@ -767,12 +767,13 @@ FunctionEmitContext::EmitMalloc(const llvm::Type *ty) { llvm::Value *poffset = llvm::GetElementPtrInst::Create(nullPtr, &index[0], &index[1], "offset_ptr", bblock); AddDebugPos(poffset); - llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int"); + llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int"); // And given the size, call the malloc function llvm::Function *fmalloc = m->module->getFunction("ISPCMalloc"); assert(fmalloc != NULL); - llvm::Value *mem = CallInst(fmalloc, sizeOf, "raw_argmem"); + llvm::Value *mem = CallInst(fmalloc, sizeOf, LLVMInt32(align), + "raw_argmem"); // Cast the void * back to the result pointer type return BitCastInst(mem, ptrType, "mem_bitcast"); } @@ -1921,12 +1922,19 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee, static_cast(pt->getElementType()); assert(argStructType->getNumElements() == argVals.size() + 1); - // Use alloca for space for the task args. KEY DETAIL: pass false - // to the call of FunctionEmitContext::AllocaInst so that the alloca - // doesn't happen just once at the top of the function, but happens - // each time the enclosing basic block executes. int align = 4 * RoundUpPow2(g->target.nativeVectorWidth); +#ifdef ISPC_IS_WINDOWS + // Use malloc() to allocate storage on Windows, since the stack is + // generally not big enough there to do enough allocations for lots of + // tasks and then things crash horribly... + llvm::Value *argmem = EmitMalloc(argStructType, align); +#else + // Use alloca for space for the task args on OSX And Linux. KEY + // DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so + // that the alloca doesn't happen just once at the top of the function, + // but happens each time the enclosing basic block executes. llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false); +#endif // ISPC_IS_WINDOWS llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType); // Copy the values of the parameters into the appropriate place in diff --git a/ctx.h b/ctx.h index 236bfc4d..f16fa398 100644 --- a/ctx.h +++ b/ctx.h @@ -213,7 +213,7 @@ public: /** Emit code to call the user-supplied ISPCMalloc function to allocate space for an object of thee given type. Returns the pointer value returned by the ISPCMalloc call. */ - llvm::Value *EmitMalloc(const llvm::Type *ty); + llvm::Value *EmitMalloc(const llvm::Type *ty, int align = 0); /** Emit code to call the user-supplied ISPCFree function, passing it the given pointer to storage previously allocated by an diff --git a/docs/ispc.txt b/docs/ispc.txt index 09e3c590..6376b025 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -1375,13 +1375,25 @@ parallel execution. If you use the task launch feature in ``ispc``, you must provide C/C++ implementations of two functions and link them into your final executable -file: +file. Although these functions may be implemented in either language, they +must have "C" linkage (i.e. their prototypes must be declared inside an +``extern "C"`` block if they are defined in C++.) :: void ISPCLaunch(void *funcptr, void *data); void ISPCSync(); +On Windows, two additional functions must be provided to dynamically +allocate and free memory to store the arguments passed to tasks. (On OSX +and Linux, the stack provides memory for task arguments; on Windows, the +stack is generally not large enough to do this for large numbers of tasks.) + +:: + + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); + These are called by the task launch code generated by the ``ispc`` compiler; the first is called to launch to launch a task and the second is called to wait for, respectively. (Factoring them out in this way diff --git a/examples/mandelbrot_tasks/tasks_concrt.cpp b/examples/mandelbrot_tasks/tasks_concrt.cpp index 753b4fb3..b70d5cbe 100644 --- a/examples/mandelbrot_tasks/tasks_concrt.cpp +++ b/examples/mandelbrot_tasks/tasks_concrt.cpp @@ -37,6 +37,7 @@ #include #include using namespace Concurrency; +#include #include #include #include @@ -45,6 +46,8 @@ using namespace Concurrency; extern "C" { void ISPCLaunch(void *f, void *data); void ISPCSync(); + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); } typedef void (*TaskFuncType)(void *, int, int); @@ -126,3 +129,13 @@ void ISPCSync() { taskOffset = 0; } + + +void *ISPCMalloc(int64_t size, int32_t alignment) { + return _aligned_malloc(size, alignment); +} + + +void ISPCFree(void *ptr) { + _aligned_free(ptr); +} diff --git a/ispc_test.cpp b/ispc_test.cpp index 3665aa42..f6bece46 100644 --- a/ispc_test.cpp +++ b/ispc_test.cpp @@ -33,6 +33,10 @@ #define _CRT_SECURE_NO_WARNINGS +#ifdef ISPC_IS_WINDOWS +#define NOMINMAX +#include +#endif #include #include @@ -77,6 +81,8 @@ extern "C" { extern "C" { void ISPCLaunch(void *, void *); void ISPCSync(); + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); } void ISPCLaunch(void *func, void *data) { @@ -89,6 +95,18 @@ void ISPCLaunch(void *func, void *data) { void ISPCSync() { } + +#ifdef ISPC_IS_WINDOWS +void *ISPCMalloc(int64_t size, int32_t alignment) { + return _aligned_malloc(size, alignment); +} + + +void ISPCFree(void *ptr) { + _aligned_free(ptr); +} +#endif + static void usage(int ret) { fprintf(stderr, "usage: ispc_test\n"); fprintf(stderr, "\t[-h/--help]\tprint help\n"); @@ -144,6 +162,12 @@ static bool lRunTest(const char *fn) { ee->addGlobalMapping(func, (void *)ISPCLaunch); if ((func = module->getFunction("ISPCSync")) != NULL) ee->addGlobalMapping(func, (void *)ISPCSync); +#ifdef ISPC_IS_WINDOWS + if ((func = module->getFunction("ISPCMalloc")) != NULL) + ee->addGlobalMapping(func, (void *)ISPCMalloc); + if ((func = module->getFunction("ISPCFree")) != NULL) + ee->addGlobalMapping(func, (void *)ISPCFree); +#endif // ISPC_IS_WINDOWS if ((func = module->getFunction("putchar")) != NULL) ee->addGlobalMapping(func, (void *)putchar); if ((func = module->getFunction("printf")) != NULL) diff --git a/ispc_test.vcxproj b/ispc_test.vcxproj index bd7a6407..353e3020 100755 --- a/ispc_test.vcxproj +++ b/ispc_test.vcxproj @@ -52,7 +52,7 @@ Level3 Disabled - _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ISPC_IS_WINDOWS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(LLVM_INSTALL_DIR)/include @@ -70,7 +70,7 @@ MaxSpeed true true - NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ISPC_IS_WINDOWS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) $(LLVM_INSTALL_DIR)/include diff --git a/module.cpp b/module.cpp index 09db04fd..10d74861 100644 --- a/module.cpp +++ b/module.cpp @@ -640,6 +640,14 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function, assert(threadCountSym); threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount"); ctx->StoreInst(threadCount, threadCountSym->storagePtr); + +#ifdef ISPC_IS_WINDOWS + // On Windows, we dynamically-allocate space for the task arguments + // (see FunctionEmitContext::LaunchInst().) Here is where we emit + // the code to free that memory, now that we've copied the + // parameter values out of the structure. + ctx->EmitFree(structParamPtr); +#endif // ISPC_IS_WINDOWS } else { // Regular, non-task function