Use malloc to allocate storage for task parameters on Windows.
Fixes bug #55. A number of tests were crashing on Windows due to the task launch code using alloca to allocate space for the tasks' parameters. On Windows, the stack isn't generally big enough for this to be a good idea. Also added an alignment parmaeter to ISPCMalloc() to pass the alignment requirement along.
This commit is contained in:
@@ -516,11 +516,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
FATAL("logic error");
|
||||
}
|
||||
|
||||
// Add a declaration of void *ISPCMalloc(int64_t). The user is
|
||||
// responsible for linking in a definition of this if it's needed by
|
||||
// the compiled program.
|
||||
// Add a declaration of void *ISPCMalloc(int64_t size, int alignment).
|
||||
// The user is responsible for linking in a definition of this if it's
|
||||
// needed by the compiled program.
|
||||
{ std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(llvm::Type::getInt64Ty(*ctx));
|
||||
argTypes.push_back(llvm::Type::getInt32Ty(*ctx));
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType,
|
||||
argTypes, false);
|
||||
llvm::Function *func =
|
||||
|
||||
22
ctx.cpp
22
ctx.cpp
@@ -757,7 +757,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::EmitMalloc(const llvm::Type *ty) {
|
||||
FunctionEmitContext::EmitMalloc(const llvm::Type *ty, int align) {
|
||||
// Emit code to compute the size of the given type using a GEP with a
|
||||
// NULL base pointer, indexing one element of the given type, and
|
||||
// casting the resulting 'pointer' to an int giving its size.
|
||||
@@ -767,12 +767,13 @@ FunctionEmitContext::EmitMalloc(const llvm::Type *ty) {
|
||||
llvm::Value *poffset = llvm::GetElementPtrInst::Create(nullPtr, &index[0], &index[1],
|
||||
"offset_ptr", bblock);
|
||||
AddDebugPos(poffset);
|
||||
llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int");
|
||||
llvm::Value *sizeOf = PtrToIntInst(poffset, LLVMTypes::Int64Type, "offset_int");
|
||||
|
||||
// And given the size, call the malloc function
|
||||
llvm::Function *fmalloc = m->module->getFunction("ISPCMalloc");
|
||||
assert(fmalloc != NULL);
|
||||
llvm::Value *mem = CallInst(fmalloc, sizeOf, "raw_argmem");
|
||||
llvm::Value *mem = CallInst(fmalloc, sizeOf, LLVMInt32(align),
|
||||
"raw_argmem");
|
||||
// Cast the void * back to the result pointer type
|
||||
return BitCastInst(mem, ptrType, "mem_bitcast");
|
||||
}
|
||||
@@ -1921,12 +1922,19 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
|
||||
static_cast<const llvm::StructType *>(pt->getElementType());
|
||||
assert(argStructType->getNumElements() == argVals.size() + 1);
|
||||
|
||||
// Use alloca for space for the task args. KEY DETAIL: pass false
|
||||
// to the call of FunctionEmitContext::AllocaInst so that the alloca
|
||||
// doesn't happen just once at the top of the function, but happens
|
||||
// each time the enclosing basic block executes.
|
||||
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
// Use malloc() to allocate storage on Windows, since the stack is
|
||||
// generally not big enough there to do enough allocations for lots of
|
||||
// tasks and then things crash horribly...
|
||||
llvm::Value *argmem = EmitMalloc(argStructType, align);
|
||||
#else
|
||||
// Use alloca for space for the task args on OSX And Linux. KEY
|
||||
// DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so
|
||||
// that the alloca doesn't happen just once at the top of the function,
|
||||
// but happens each time the enclosing basic block executes.
|
||||
llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false);
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);
|
||||
|
||||
// Copy the values of the parameters into the appropriate place in
|
||||
|
||||
2
ctx.h
2
ctx.h
@@ -213,7 +213,7 @@ public:
|
||||
/** Emit code to call the user-supplied ISPCMalloc function to
|
||||
allocate space for an object of thee given type. Returns the
|
||||
pointer value returned by the ISPCMalloc call. */
|
||||
llvm::Value *EmitMalloc(const llvm::Type *ty);
|
||||
llvm::Value *EmitMalloc(const llvm::Type *ty, int align = 0);
|
||||
|
||||
/** Emit code to call the user-supplied ISPCFree function, passing it
|
||||
the given pointer to storage previously allocated by an
|
||||
|
||||
@@ -1375,13 +1375,25 @@ parallel execution.
|
||||
|
||||
If you use the task launch feature in ``ispc``, you must provide C/C++
|
||||
implementations of two functions and link them into your final executable
|
||||
file:
|
||||
file. Although these functions may be implemented in either language, they
|
||||
must have "C" linkage (i.e. their prototypes must be declared inside an
|
||||
``extern "C"`` block if they are defined in C++.)
|
||||
|
||||
::
|
||||
|
||||
void ISPCLaunch(void *funcptr, void *data);
|
||||
void ISPCSync();
|
||||
|
||||
On Windows, two additional functions must be provided to dynamically
|
||||
allocate and free memory to store the arguments passed to tasks. (On OSX
|
||||
and Linux, the stack provides memory for task arguments; on Windows, the
|
||||
stack is generally not large enough to do this for large numbers of tasks.)
|
||||
|
||||
::
|
||||
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||
void ISPCFree(void *ptr);
|
||||
|
||||
These are called by the task launch code generated by the ``ispc``
|
||||
compiler; the first is called to launch to launch a task and the second is
|
||||
called to wait for, respectively. (Factoring them out in this way
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <windows.h>
|
||||
#include <concrt.h>
|
||||
using namespace Concurrency;
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -45,6 +46,8 @@ using namespace Concurrency;
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||
void ISPCFree(void *ptr);
|
||||
}
|
||||
|
||||
typedef void (*TaskFuncType)(void *, int, int);
|
||||
@@ -126,3 +129,13 @@ void ISPCSync() {
|
||||
|
||||
taskOffset = 0;
|
||||
}
|
||||
|
||||
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||
return _aligned_malloc(size, alignment);
|
||||
}
|
||||
|
||||
|
||||
void ISPCFree(void *ptr) {
|
||||
_aligned_free(ptr);
|
||||
}
|
||||
|
||||
@@ -33,6 +33,10 @@
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -77,6 +81,8 @@ extern "C" {
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *, void *);
|
||||
void ISPCSync();
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||
void ISPCFree(void *ptr);
|
||||
}
|
||||
|
||||
void ISPCLaunch(void *func, void *data) {
|
||||
@@ -89,6 +95,18 @@ void ISPCLaunch(void *func, void *data) {
|
||||
void ISPCSync() {
|
||||
}
|
||||
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||
return _aligned_malloc(size, alignment);
|
||||
}
|
||||
|
||||
|
||||
void ISPCFree(void *ptr) {
|
||||
_aligned_free(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void usage(int ret) {
|
||||
fprintf(stderr, "usage: ispc_test\n");
|
||||
fprintf(stderr, "\t[-h/--help]\tprint help\n");
|
||||
@@ -144,6 +162,12 @@ static bool lRunTest(const char *fn) {
|
||||
ee->addGlobalMapping(func, (void *)ISPCLaunch);
|
||||
if ((func = module->getFunction("ISPCSync")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCSync);
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
if ((func = module->getFunction("ISPCMalloc")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCMalloc);
|
||||
if ((func = module->getFunction("ISPCFree")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCFree);
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
if ((func = module->getFunction("putchar")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)putchar);
|
||||
if ((func = module->getFunction("printf")) != NULL)
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>ISPC_IS_WINDOWS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@@ -70,7 +70,7 @@
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>ISPC_IS_WINDOWS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
||||
@@ -640,6 +640,14 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
assert(threadCountSym);
|
||||
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
|
||||
ctx->StoreInst(threadCount, threadCountSym->storagePtr);
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
// On Windows, we dynamically-allocate space for the task arguments
|
||||
// (see FunctionEmitContext::LaunchInst().) Here is where we emit
|
||||
// the code to free that memory, now that we've copied the
|
||||
// parameter values out of the structure.
|
||||
ctx->EmitFree(structParamPtr);
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
}
|
||||
else {
|
||||
// Regular, non-task function
|
||||
|
||||
Reference in New Issue
Block a user