Use malloc to allocate storage for task parameters on Windows.

Fixes bug #55.  A number of tests were crashing on Windows due to the task
launch code using alloca to allocate space for the tasks' parameters.  On
Windows, the stack isn't generally big enough for this to be a good idea.
Also added an alignment parmaeter to ISPCMalloc() to pass the alignment
requirement along.
This commit is contained in:
Matt Pharr
2011-07-06 05:53:25 -07:00
parent 4d733af3c7
commit 6e4c165c7e
8 changed files with 80 additions and 14 deletions

View File

@@ -516,11 +516,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
FATAL("logic error"); FATAL("logic error");
} }
// Add a declaration of void *ISPCMalloc(int64_t). The user is // Add a declaration of void *ISPCMalloc(int64_t size, int alignment).
// responsible for linking in a definition of this if it's needed by // The user is responsible for linking in a definition of this if it's
// the compiled program. // needed by the compiled program.
{ std::vector<const llvm::Type *> argTypes; { std::vector<const llvm::Type *> argTypes;
argTypes.push_back(llvm::Type::getInt64Ty(*ctx)); argTypes.push_back(llvm::Type::getInt64Ty(*ctx));
argTypes.push_back(llvm::Type::getInt32Ty(*ctx));
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType, llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType,
argTypes, false); argTypes, false);
llvm::Function *func = llvm::Function *func =

22
ctx.cpp
View File

@@ -757,7 +757,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
llvm::Value * llvm::Value *
FunctionEmitContext::EmitMalloc(const llvm::Type *ty) { FunctionEmitContext::EmitMalloc(const llvm::Type *ty, int align) {
// Emit code to compute the size of the given type using a GEP with a // Emit code to compute the size of the given type using a GEP with a
// NULL base pointer, indexing one element of the given type, and // NULL base pointer, indexing one element of the given type, and
// casting the resulting 'pointer' to an int giving its size. // casting the resulting 'pointer' to an int giving its size.
@@ -767,12 +767,13 @@ FunctionEmitContext::EmitMalloc(const llvm::Type *ty) {
llvm::Value *poffset = llvm::GetElementPtrInst::Create(nullPtr, &index[0], &index[1], llvm::Value *poffset = llvm::GetElementPtrInst::Create(nullPtr, &index[0], &index[1],
"offset_ptr", bblock); "offset_ptr", bblock);
AddDebugPos(poffset); AddDebugPos(poffset);
llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int"); llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int");
// And given the size, call the malloc function // And given the size, call the malloc function
llvm::Function *fmalloc = m->module->getFunction("ISPCMalloc"); llvm::Function *fmalloc = m->module->getFunction("ISPCMalloc");
assert(fmalloc != NULL); assert(fmalloc != NULL);
llvm::Value *mem = CallInst(fmalloc, sizeOf, "raw_argmem"); llvm::Value *mem = CallInst(fmalloc, sizeOf, LLVMInt32(align),
"raw_argmem");
// Cast the void * back to the result pointer type // Cast the void * back to the result pointer type
return BitCastInst(mem, ptrType, "mem_bitcast"); return BitCastInst(mem, ptrType, "mem_bitcast");
} }
@@ -1921,12 +1922,19 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
static_cast<const llvm::StructType *>(pt->getElementType()); static_cast<const llvm::StructType *>(pt->getElementType());
assert(argStructType->getNumElements() == argVals.size() + 1); assert(argStructType->getNumElements() == argVals.size() + 1);
// Use alloca for space for the task args. KEY DETAIL: pass false
// to the call of FunctionEmitContext::AllocaInst so that the alloca
// doesn't happen just once at the top of the function, but happens
// each time the enclosing basic block executes.
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth); int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
#ifdef ISPC_IS_WINDOWS
// Use malloc() to allocate storage on Windows, since the stack is
// generally not big enough there to do enough allocations for lots of
// tasks and then things crash horribly...
llvm::Value *argmem = EmitMalloc(argStructType, align);
#else
// Use alloca for space for the task args on OSX And Linux. KEY
// DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so
// that the alloca doesn't happen just once at the top of the function,
// but happens each time the enclosing basic block executes.
llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false); llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false);
#endif // ISPC_IS_WINDOWS
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType); llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);
// Copy the values of the parameters into the appropriate place in // Copy the values of the parameters into the appropriate place in

2
ctx.h
View File

@@ -213,7 +213,7 @@ public:
/** Emit code to call the user-supplied ISPCMalloc function to /** Emit code to call the user-supplied ISPCMalloc function to
allocate space for an object of thee given type. Returns the allocate space for an object of thee given type. Returns the
pointer value returned by the ISPCMalloc call. */ pointer value returned by the ISPCMalloc call. */
llvm::Value *EmitMalloc(const llvm::Type *ty); llvm::Value *EmitMalloc(const llvm::Type *ty, int align = 0);
/** Emit code to call the user-supplied ISPCFree function, passing it /** Emit code to call the user-supplied ISPCFree function, passing it
the given pointer to storage previously allocated by an the given pointer to storage previously allocated by an

View File

@@ -1375,13 +1375,25 @@ parallel execution.
If you use the task launch feature in ``ispc``, you must provide C/C++ If you use the task launch feature in ``ispc``, you must provide C/C++
implementations of two functions and link them into your final executable implementations of two functions and link them into your final executable
file: file. Although these functions may be implemented in either language, they
must have "C" linkage (i.e. their prototypes must be declared inside an
``extern "C"`` block if they are defined in C++.)
:: ::
void ISPCLaunch(void *funcptr, void *data); void ISPCLaunch(void *funcptr, void *data);
void ISPCSync(); void ISPCSync();
On Windows, two additional functions must be provided to dynamically
allocate and free memory to store the arguments passed to tasks. (On OSX
and Linux, the stack provides memory for task arguments; on Windows, the
stack is generally not large enough to do this for large numbers of tasks.)
::
void *ISPCMalloc(int64_t size, int32_t alignment);
void ISPCFree(void *ptr);
These are called by the task launch code generated by the ``ispc`` These are called by the task launch code generated by the ``ispc``
compiler; the first is called to launch to launch a task and the second is compiler; the first is called to launch to launch a task and the second is
called to wait for, respectively. (Factoring them out in this way called to wait for, respectively. (Factoring them out in this way

View File

@@ -37,6 +37,7 @@
#include <windows.h> #include <windows.h>
#include <concrt.h> #include <concrt.h>
using namespace Concurrency; using namespace Concurrency;
#include <stdint.h>
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -45,6 +46,8 @@ using namespace Concurrency;
extern "C" { extern "C" {
void ISPCLaunch(void *f, void *data); void ISPCLaunch(void *f, void *data);
void ISPCSync(); void ISPCSync();
void *ISPCMalloc(int64_t size, int32_t alignment);
void ISPCFree(void *ptr);
} }
typedef void (*TaskFuncType)(void *, int, int); typedef void (*TaskFuncType)(void *, int, int);
@@ -126,3 +129,13 @@ void ISPCSync() {
taskOffset = 0; taskOffset = 0;
} }
void *ISPCMalloc(int64_t size, int32_t alignment) {
return _aligned_malloc(size, alignment);
}
void ISPCFree(void *ptr) {
_aligned_free(ptr);
}

View File

@@ -33,6 +33,10 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#ifdef ISPC_IS_WINDOWS
#define NOMINMAX
#include <windows.h>
#endif
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
@@ -77,6 +81,8 @@ extern "C" {
extern "C" { extern "C" {
void ISPCLaunch(void *, void *); void ISPCLaunch(void *, void *);
void ISPCSync(); void ISPCSync();
void *ISPCMalloc(int64_t size, int32_t alignment);
void ISPCFree(void *ptr);
} }
void ISPCLaunch(void *func, void *data) { void ISPCLaunch(void *func, void *data) {
@@ -89,6 +95,18 @@ void ISPCLaunch(void *func, void *data) {
void ISPCSync() { void ISPCSync() {
} }
#ifdef ISPC_IS_WINDOWS
void *ISPCMalloc(int64_t size, int32_t alignment) {
return _aligned_malloc(size, alignment);
}
void ISPCFree(void *ptr) {
_aligned_free(ptr);
}
#endif
static void usage(int ret) { static void usage(int ret) {
fprintf(stderr, "usage: ispc_test\n"); fprintf(stderr, "usage: ispc_test\n");
fprintf(stderr, "\t[-h/--help]\tprint help\n"); fprintf(stderr, "\t[-h/--help]\tprint help\n");
@@ -144,6 +162,12 @@ static bool lRunTest(const char *fn) {
ee->addGlobalMapping(func, (void *)ISPCLaunch); ee->addGlobalMapping(func, (void *)ISPCLaunch);
if ((func = module->getFunction("ISPCSync")) != NULL) if ((func = module->getFunction("ISPCSync")) != NULL)
ee->addGlobalMapping(func, (void *)ISPCSync); ee->addGlobalMapping(func, (void *)ISPCSync);
#ifdef ISPC_IS_WINDOWS
if ((func = module->getFunction("ISPCMalloc")) != NULL)
ee->addGlobalMapping(func, (void *)ISPCMalloc);
if ((func = module->getFunction("ISPCFree")) != NULL)
ee->addGlobalMapping(func, (void *)ISPCFree);
#endif // ISPC_IS_WINDOWS
if ((func = module->getFunction("putchar")) != NULL) if ((func = module->getFunction("putchar")) != NULL)
ee->addGlobalMapping(func, (void *)putchar); ee->addGlobalMapping(func, (void *)putchar);
if ((func = module->getFunction("printf")) != NULL) if ((func = module->getFunction("printf")) != NULL)

View File

@@ -52,7 +52,7 @@
</PrecompiledHeader> </PrecompiledHeader>
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>ISPC_IS_WINDOWS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>
@@ -70,7 +70,7 @@
<Optimization>MaxSpeed</Optimization> <Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>ISPC_IS_WINDOWS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>

View File

@@ -640,6 +640,14 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
assert(threadCountSym); assert(threadCountSym);
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount"); threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
ctx->StoreInst(threadCount, threadCountSym->storagePtr); ctx->StoreInst(threadCount, threadCountSym->storagePtr);
#ifdef ISPC_IS_WINDOWS
// On Windows, we dynamically-allocate space for the task arguments
// (see FunctionEmitContext::LaunchInst().) Here is where we emit
// the code to free that memory, now that we've copied the
// parameter values out of the structure.
ctx->EmitFree(structParamPtr);
#endif // ISPC_IS_WINDOWS
} }
else { else {
// Regular, non-task function // Regular, non-task function