Use malloc() to get space for task arguments when compiling to AVX.

This is to work around the LLVM bug/limitation discused in LLVM bug
10841 (http://llvm.org/bugs/show_bug.cgi?id=10841).
This commit is contained in:
Matt Pharr
2011-09-17 13:36:38 -07:00
parent 3607f3e045
commit 2405dae8e6
7 changed files with 166 additions and 10 deletions

21
ctx.cpp
View File

@@ -1961,17 +1961,26 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
assert(argStructType->getNumElements() == argVals.size() + 1);
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
llvm::Value *argmem;
#ifdef ISPC_IS_WINDOWS
// Use malloc() to allocate storage on Windows, since the stack is
// generally not big enough there to do enough allocations for lots of
// tasks and then things crash horribly...
llvm::Value *argmem = EmitMalloc(argStructType, align);
argmem = EmitMalloc(argStructType, align);
#else
// Use alloca for space for the task args on OSX And Linux. KEY
// DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so
// that the alloca doesn't happen just once at the top of the function,
// but happens each time the enclosing basic block executes.
llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false);
// Otherwise, use alloca for space for the task args, ** unless we're
// compiling to AVX, in which case we use malloc after all **. (See
// http://llvm.org/bugs/show_bug.cgi?id=10841 for details. There are
// limitations in LLVM with respect to dynamic allocas of this sort
// when the stack also has to be 32-byte aligned...).
if (g->target.isa == Target::AVX)
argmem = EmitMalloc(argStructType, align);
else
// KEY DETAIL: pass false to the call of
// FunctionEmitContext::AllocaInst so that the alloca doesn't
// happen just once at the top of the function, but happens each
// time the enclosing basic block executes.
argmem = AllocaInst(argStructType, "argmem", align, false);
#endif // ISPC_IS_WINDOWS
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);

View File

@@ -33,10 +33,20 @@
#include "taskinfo.h"
#if defined(_WIN32) || defined(_WIN64)
#define ISPC_IS_WINDOWS
#elif defined(__linux__)
#define ISPC_IS_LINUX
#elif defined(__APPLE__)
#define ISPC_IS_APPLE
#endif
/* A simple task system for ispc programs based on Apple's Grand Central
Dispatch. */
#include <dispatch/dispatch.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
static int initialized = 0;
static volatile int32_t lock = 0;
@@ -47,6 +57,8 @@ static dispatch_group_t gcdGroup;
extern "C" {
void ISPCLaunch(void *f, void *data);
void ISPCSync();
void *ISPCMalloc(int64_t size, int32_t alignment);
void ISPCFree(void *ptr);
}
@@ -97,3 +109,18 @@ void ISPCSync() {
lResetTaskInfo();
}
void *ISPCMalloc(int64_t size, int32_t alignment) {
void *mem = malloc(size + (alignment-1) + sizeof(void*));
char *amem = ((char*)mem) + sizeof(void*);
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
(alignment - 1)));
((void**)amem)[-1] = mem;
return amem;
}
void ISPCFree(void *ptr) {
free(((void**)ptr)[-1]);
}

View File

@@ -31,6 +31,14 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(_WIN32) || defined(_WIN64)
#define ISPC_IS_WINDOWS
#elif defined(__linux__)
#define ISPC_IS_LINUX
#elif defined(__APPLE__)
#define ISPC_IS_APPLE
#endif
#include "taskinfo.h"
#include <pthread.h>
#include <semaphore.h>
@@ -63,6 +71,8 @@ static pthread_cond_t tasksRunningCondition;
extern "C" {
void ISPCLaunch(void *f, void *data);
void ISPCSync();
void *ISPCMalloc(int64_t size, int32_t alignment);
void ISPCFree(void *ptr);
}
static void *lTaskEntry(void *arg);
@@ -292,3 +302,35 @@ void ISPCSync() {
exit(1);
}
}
void *ISPCMalloc(int64_t size, int32_t alignment) {
#ifdef ISPC_IS_WINDOWS
return _aligned_malloc(size, alignment);
#endif
#ifdef ISPC_IS_LINUX
return memalign(alignment, size);
#endif
#ifdef ISPC_IS_APPLE
void *mem = malloc(size + (alignment-1) + sizeof(void*));
char *amem = ((char*)mem) + sizeof(void*);
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
(alignment - 1)));
((void**)amem)[-1] = mem;
return amem;
#endif
}
void ISPCFree(void *ptr) {
#ifdef ISPC_IS_WINDOWS
_aligned_free(ptr);
#endif
#ifdef ISPC_IS_LINUX
free(ptr);
#endif
#ifdef ISPC_IS_APPLE
free(((void**)ptr)[-1]);
#endif
}

View File

@@ -33,12 +33,22 @@
#define _CRT_SECURE_NO_WARNINGS
#if defined(_WIN32) || defined(_WIN64)
#define ISPC_IS_WINDOWS
#elif defined(__linux__)
#define ISPC_IS_LINUX
#elif defined(__APPLE__)
#define ISPC_IS_APPLE
#endif
#ifdef ISPC_IS_WINDOWS
#define NOMINMAX
#include <windows.h>
#endif
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <memory.h>
#ifdef ISPC_HAVE_SVML
#include <xmmintrin.h>
@@ -103,16 +113,35 @@ void ISPCSync() {
}
#ifdef ISPC_IS_WINDOWS
void *ISPCMalloc(int64_t size, int32_t alignment) {
#ifdef ISPC_IS_WINDOWS
return _aligned_malloc(size, alignment);
#endif
#ifdef ISPC_IS_LINUX
return memalign(alignment, size);
#endif
#ifdef ISPC_IS_APPLE
void *mem = malloc(size + (alignment-1) + sizeof(void*));
char *amem = ((char*)mem) + sizeof(void*);
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
(alignment - 1)));
((void**)amem)[-1] = mem;
return amem;
#endif
}
void ISPCFree(void *ptr) {
#ifdef ISPC_IS_WINDOWS
_aligned_free(ptr);
}
#endif
#ifdef ISPC_IS_LINUX
free(ptr);
#endif
#ifdef ISPC_IS_APPLE
free(((void**)ptr)[-1]);
#endif
}
static void usage(int ret) {
fprintf(stderr, "usage: ispc_test\n");
@@ -186,10 +215,8 @@ static bool lRunTest(const char *fn) {
ee->addGlobalMapping(func, (void *)FUNC)
DO_FUNC(ISPCLaunch, "ISPCLaunch");
DO_FUNC(ISPCSync, "ISPCSync");
#ifdef ISPC_IS_WINDOWS
DO_FUNC(ISPCMalloc, "ISPCMalloc");
DO_FUNC(ISPCFree, "ISPCFree");
#endif // ISPC_IS_WINDOWS
DO_FUNC(putchar, "putchar");
DO_FUNC(printf, "printf");
DO_FUNC(fflush, "fflush");

View File

@@ -659,6 +659,11 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
// the code to free that memory, now that we've copied the
// parameter values out of the structure.
ctx->EmitFree(structParamPtr);
#else
// We also do this for AVX... (See discussion in
// FunctionEmitContext::LaunchInst().)
if (g->target.isa == Target::AVX)
ctx->EmitFree(structParamPtr);
#endif // ISPC_IS_WINDOWS
}
else {

View File

@@ -187,6 +187,10 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(targetLibraryInfo);
optPM.add(new llvm::TargetData(module));
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
optPM.add(llvm::createIndVarSimplifyPass());
#endif
if (optLevel == 0) {
// This is more or less the minimum set of optimizations that we
// need to do to generate code that will actually run. (We can't

View File

@@ -31,9 +31,18 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(_WIN32) || defined(_WIN64)
#define ISPC_IS_WINDOWS
#elif defined(__linux__)
#define ISPC_IS_LINUX
#elif defined(__APPLE__)
#define ISPC_IS_APPLE
#endif
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <stdint.h>
extern "C" {
extern int width();
@@ -48,6 +57,8 @@ extern "C" {
void ISPCLaunch(void *f, void *d);
void ISPCSync();
void *ISPCMalloc(int64_t size, int32_t alignment);
void ISPCFree(void *ptr);
}
void ISPCLaunch(void *f, void *d) {
@@ -60,6 +71,37 @@ void ISPCSync() {
}
void *ISPCMalloc(int64_t size, int32_t alignment) {
#ifdef ISPC_IS_WINDOWS
return _aligned_malloc(size, alignment);
#endif
#ifdef ISPC_IS_LINUX
return memalign(alignment, size);
#endif
#ifdef ISPC_IS_APPLE
void *mem = malloc(size + (alignment-1) + sizeof(void*));
char *amem = ((char*)mem) + sizeof(void*);
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
(alignment - 1)));
((void**)amem)[-1] = mem;
return amem;
#endif
}
void ISPCFree(void *ptr) {
#ifdef ISPC_IS_WINDOWS
_aligned_free(ptr);
#endif
#ifdef ISPC_IS_LINUX
free(ptr);
#endif
#ifdef ISPC_IS_APPLE
free(((void**)ptr)[-1]);
#endif
}
int main(int argc, char *argv[]) {
int w = width();
assert(w <= 16);