From 6e4c165c7e5ec1e8f5f8980444d173547460080b Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Wed, 6 Jul 2011 05:53:25 -0700 Subject: [PATCH] Use malloc to allocate storage for task parameters on Windows. Fixes bug #55. A number of tests were crashing on Windows due to the task launch code using alloca to allocate space for the tasks' parameters. On Windows, the stack isn't generally big enough for this to be a good idea. Also added an alignment parmaeter to ISPCMalloc() to pass the alignment requirement along. --- builtins.cpp | 7 ++++--- ctx.cpp | 22 +++++++++++++------- ctx.h | 2 +- docs/ispc.txt | 14 ++++++++++++- examples/mandelbrot_tasks/tasks_concrt.cpp | 13 ++++++++++++ ispc_test.cpp | 24 ++++++++++++++++++++++ ispc_test.vcxproj | 4 ++-- module.cpp | 8 ++++++++ 8 files changed, 80 insertions(+), 14 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 4da5bb44..bdc132ca 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -516,11 +516,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod FATAL("logic error"); } - // Add a declaration of void *ISPCMalloc(int64_t). The user is - // responsible for linking in a definition of this if it's needed by - // the compiled program. + // Add a declaration of void *ISPCMalloc(int64_t size, int alignment). + // The user is responsible for linking in a definition of this if it's + // needed by the compiled program. { std::vector argTypes; argTypes.push_back(llvm::Type::getInt64Ty(*ctx)); + argTypes.push_back(llvm::Type::getInt32Ty(*ctx)); llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType, argTypes, false); llvm::Function *func = diff --git a/ctx.cpp b/ctx.cpp index 9548a777..1087ccc1 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -757,7 +757,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { llvm::Value * -FunctionEmitContext::EmitMalloc(const llvm::Type *ty) { +FunctionEmitContext::EmitMalloc(const llvm::Type *ty, int align) { // Emit code to compute the size of the given type using a GEP with a // NULL base pointer, indexing one element of the given type, and // casting the resulting 'pointer' to an int giving its size. @@ -767,12 +767,13 @@ FunctionEmitContext::EmitMalloc(const llvm::Type *ty) { llvm::Value *poffset = llvm::GetElementPtrInst::Create(nullPtr, &index[0], &index[1], "offset_ptr", bblock); AddDebugPos(poffset); - llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int"); + llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int"); // And given the size, call the malloc function llvm::Function *fmalloc = m->module->getFunction("ISPCMalloc"); assert(fmalloc != NULL); - llvm::Value *mem = CallInst(fmalloc, sizeOf, "raw_argmem"); + llvm::Value *mem = CallInst(fmalloc, sizeOf, LLVMInt32(align), + "raw_argmem"); // Cast the void * back to the result pointer type return BitCastInst(mem, ptrType, "mem_bitcast"); } @@ -1921,12 +1922,19 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee, static_cast(pt->getElementType()); assert(argStructType->getNumElements() == argVals.size() + 1); - // Use alloca for space for the task args. KEY DETAIL: pass false - // to the call of FunctionEmitContext::AllocaInst so that the alloca - // doesn't happen just once at the top of the function, but happens - // each time the enclosing basic block executes. int align = 4 * RoundUpPow2(g->target.nativeVectorWidth); +#ifdef ISPC_IS_WINDOWS + // Use malloc() to allocate storage on Windows, since the stack is + // generally not big enough there to do enough allocations for lots of + // tasks and then things crash horribly... + llvm::Value *argmem = EmitMalloc(argStructType, align); +#else + // Use alloca for space for the task args on OSX And Linux. KEY + // DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so + // that the alloca doesn't happen just once at the top of the function, + // but happens each time the enclosing basic block executes. llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false); +#endif // ISPC_IS_WINDOWS llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType); // Copy the values of the parameters into the appropriate place in diff --git a/ctx.h b/ctx.h index 236bfc4d..f16fa398 100644 --- a/ctx.h +++ b/ctx.h @@ -213,7 +213,7 @@ public: /** Emit code to call the user-supplied ISPCMalloc function to allocate space for an object of thee given type. Returns the pointer value returned by the ISPCMalloc call. */ - llvm::Value *EmitMalloc(const llvm::Type *ty); + llvm::Value *EmitMalloc(const llvm::Type *ty, int align = 0); /** Emit code to call the user-supplied ISPCFree function, passing it the given pointer to storage previously allocated by an diff --git a/docs/ispc.txt b/docs/ispc.txt index 09e3c590..6376b025 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -1375,13 +1375,25 @@ parallel execution. If you use the task launch feature in ``ispc``, you must provide C/C++ implementations of two functions and link them into your final executable -file: +file. Although these functions may be implemented in either language, they +must have "C" linkage (i.e. their prototypes must be declared inside an +``extern "C"`` block if they are defined in C++.) :: void ISPCLaunch(void *funcptr, void *data); void ISPCSync(); +On Windows, two additional functions must be provided to dynamically +allocate and free memory to store the arguments passed to tasks. (On OSX +and Linux, the stack provides memory for task arguments; on Windows, the +stack is generally not large enough to do this for large numbers of tasks.) + +:: + + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); + These are called by the task launch code generated by the ``ispc`` compiler; the first is called to launch to launch a task and the second is called to wait for, respectively. (Factoring them out in this way diff --git a/examples/mandelbrot_tasks/tasks_concrt.cpp b/examples/mandelbrot_tasks/tasks_concrt.cpp index 753b4fb3..b70d5cbe 100644 --- a/examples/mandelbrot_tasks/tasks_concrt.cpp +++ b/examples/mandelbrot_tasks/tasks_concrt.cpp @@ -37,6 +37,7 @@ #include #include using namespace Concurrency; +#include #include #include #include @@ -45,6 +46,8 @@ using namespace Concurrency; extern "C" { void ISPCLaunch(void *f, void *data); void ISPCSync(); + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); } typedef void (*TaskFuncType)(void *, int, int); @@ -126,3 +129,13 @@ void ISPCSync() { taskOffset = 0; } + + +void *ISPCMalloc(int64_t size, int32_t alignment) { + return _aligned_malloc(size, alignment); +} + + +void ISPCFree(void *ptr) { + _aligned_free(ptr); +} diff --git a/ispc_test.cpp b/ispc_test.cpp index 3665aa42..f6bece46 100644 --- a/ispc_test.cpp +++ b/ispc_test.cpp @@ -33,6 +33,10 @@ #define _CRT_SECURE_NO_WARNINGS +#ifdef ISPC_IS_WINDOWS +#define NOMINMAX +#include +#endif #include #include @@ -77,6 +81,8 @@ extern "C" { extern "C" { void ISPCLaunch(void *, void *); void ISPCSync(); + void *ISPCMalloc(int64_t size, int32_t alignment); + void ISPCFree(void *ptr); } void ISPCLaunch(void *func, void *data) { @@ -89,6 +95,18 @@ void ISPCLaunch(void *func, void *data) { void ISPCSync() { } + +#ifdef ISPC_IS_WINDOWS +void *ISPCMalloc(int64_t size, int32_t alignment) { + return _aligned_malloc(size, alignment); +} + + +void ISPCFree(void *ptr) { + _aligned_free(ptr); +} +#endif + static void usage(int ret) { fprintf(stderr, "usage: ispc_test\n"); fprintf(stderr, "\t[-h/--help]\tprint help\n"); @@ -144,6 +162,12 @@ static bool lRunTest(const char *fn) { ee->addGlobalMapping(func, (void *)ISPCLaunch); if ((func = module->getFunction("ISPCSync")) != NULL) ee->addGlobalMapping(func, (void *)ISPCSync); +#ifdef ISPC_IS_WINDOWS + if ((func = module->getFunction("ISPCMalloc")) != NULL) + ee->addGlobalMapping(func, (void *)ISPCMalloc); + if ((func = module->getFunction("ISPCFree")) != NULL) + ee->addGlobalMapping(func, (void *)ISPCFree); +#endif // ISPC_IS_WINDOWS if ((func = module->getFunction("putchar")) != NULL) ee->addGlobalMapping(func, (void *)putchar); if ((func = module->getFunction("printf")) != NULL) diff --git a/ispc_test.vcxproj b/ispc_test.vcxproj index bd7a6407..353e3020 100755 --- a/ispc_test.vcxproj +++ b/ispc_test.vcxproj @@ -52,7 +52,7 @@ Level3 Disabled - _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ISPC_IS_WINDOWS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(LLVM_INSTALL_DIR)/include @@ -70,7 +70,7 @@ MaxSpeed true true - NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ISPC_IS_WINDOWS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) $(LLVM_INSTALL_DIR)/include diff --git a/module.cpp b/module.cpp index 09db04fd..10d74861 100644 --- a/module.cpp +++ b/module.cpp @@ -640,6 +640,14 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function, assert(threadCountSym); threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount"); ctx->StoreInst(threadCount, threadCountSym->storagePtr); + +#ifdef ISPC_IS_WINDOWS + // On Windows, we dynamically-allocate space for the task arguments + // (see FunctionEmitContext::LaunchInst().) Here is where we emit + // the code to free that memory, now that we've copied the + // parameter values out of the structure. + ctx->EmitFree(structParamPtr); +#endif // ISPC_IS_WINDOWS } else { // Regular, non-task function