Use malloc() to get space for task arguments when compiling to AVX.
This is to work around the LLVM bug/limitation discused in LLVM bug 10841 (http://llvm.org/bugs/show_bug.cgi?id=10841).
This commit is contained in:
21
ctx.cpp
21
ctx.cpp
@@ -1961,17 +1961,26 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
|
|||||||
assert(argStructType->getNumElements() == argVals.size() + 1);
|
assert(argStructType->getNumElements() == argVals.size() + 1);
|
||||||
|
|
||||||
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
|
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
|
||||||
|
llvm::Value *argmem;
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
// Use malloc() to allocate storage on Windows, since the stack is
|
// Use malloc() to allocate storage on Windows, since the stack is
|
||||||
// generally not big enough there to do enough allocations for lots of
|
// generally not big enough there to do enough allocations for lots of
|
||||||
// tasks and then things crash horribly...
|
// tasks and then things crash horribly...
|
||||||
llvm::Value *argmem = EmitMalloc(argStructType, align);
|
argmem = EmitMalloc(argStructType, align);
|
||||||
#else
|
#else
|
||||||
// Use alloca for space for the task args on OSX And Linux. KEY
|
// Otherwise, use alloca for space for the task args, ** unless we're
|
||||||
// DETAIL: pass false to the call of FunctionEmitContext::AllocaInst so
|
// compiling to AVX, in which case we use malloc after all **. (See
|
||||||
// that the alloca doesn't happen just once at the top of the function,
|
// http://llvm.org/bugs/show_bug.cgi?id=10841 for details. There are
|
||||||
// but happens each time the enclosing basic block executes.
|
// limitations in LLVM with respect to dynamic allocas of this sort
|
||||||
llvm::Value *argmem = AllocaInst(argStructType, "argmem", align, false);
|
// when the stack also has to be 32-byte aligned...).
|
||||||
|
if (g->target.isa == Target::AVX)
|
||||||
|
argmem = EmitMalloc(argStructType, align);
|
||||||
|
else
|
||||||
|
// KEY DETAIL: pass false to the call of
|
||||||
|
// FunctionEmitContext::AllocaInst so that the alloca doesn't
|
||||||
|
// happen just once at the top of the function, but happens each
|
||||||
|
// time the enclosing basic block executes.
|
||||||
|
argmem = AllocaInst(argStructType, "argmem", align, false);
|
||||||
#endif // ISPC_IS_WINDOWS
|
#endif // ISPC_IS_WINDOWS
|
||||||
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);
|
llvm::Value *voidmem = BitCastInst(argmem, LLVMTypes::VoidPointerType);
|
||||||
|
|
||||||
|
|||||||
@@ -33,10 +33,20 @@
|
|||||||
|
|
||||||
#include "taskinfo.h"
|
#include "taskinfo.h"
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
/* A simple task system for ispc programs based on Apple's Grand Central
|
/* A simple task system for ispc programs based on Apple's Grand Central
|
||||||
Dispatch. */
|
Dispatch. */
|
||||||
#include <dispatch/dispatch.h>
|
#include <dispatch/dispatch.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
static int initialized = 0;
|
static int initialized = 0;
|
||||||
static volatile int32_t lock = 0;
|
static volatile int32_t lock = 0;
|
||||||
@@ -47,6 +57,8 @@ static dispatch_group_t gcdGroup;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
void ISPCLaunch(void *f, void *data);
|
void ISPCLaunch(void *f, void *data);
|
||||||
void ISPCSync();
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -97,3 +109,18 @@ void ISPCSync() {
|
|||||||
|
|
||||||
lResetTaskInfo();
|
lResetTaskInfo();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,14 @@
|
|||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "taskinfo.h"
|
#include "taskinfo.h"
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <semaphore.h>
|
#include <semaphore.h>
|
||||||
@@ -63,6 +71,8 @@ static pthread_cond_t tasksRunningCondition;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
void ISPCLaunch(void *f, void *data);
|
void ISPCLaunch(void *f, void *data);
|
||||||
void ISPCSync();
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *lTaskEntry(void *arg);
|
static void *lTaskEntry(void *arg);
|
||||||
@@ -292,3 +302,35 @@ void ISPCSync() {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
return memalign(alignment, size);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
_aligned_free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,12 +33,22 @@
|
|||||||
|
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
#define NOMINMAX
|
#define NOMINMAX
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <memory.h>
|
||||||
|
|
||||||
#ifdef ISPC_HAVE_SVML
|
#ifdef ISPC_HAVE_SVML
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
@@ -103,16 +113,35 @@ void ISPCSync() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef ISPC_IS_WINDOWS
|
|
||||||
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
return _aligned_malloc(size, alignment);
|
return _aligned_malloc(size, alignment);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
return memalign(alignment, size);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ISPCFree(void *ptr) {
|
void ISPCFree(void *ptr) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
_aligned_free(ptr);
|
_aligned_free(ptr);
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static void usage(int ret) {
|
static void usage(int ret) {
|
||||||
fprintf(stderr, "usage: ispc_test\n");
|
fprintf(stderr, "usage: ispc_test\n");
|
||||||
@@ -186,10 +215,8 @@ static bool lRunTest(const char *fn) {
|
|||||||
ee->addGlobalMapping(func, (void *)FUNC)
|
ee->addGlobalMapping(func, (void *)FUNC)
|
||||||
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||||
DO_FUNC(ISPCSync, "ISPCSync");
|
DO_FUNC(ISPCSync, "ISPCSync");
|
||||||
#ifdef ISPC_IS_WINDOWS
|
|
||||||
DO_FUNC(ISPCMalloc, "ISPCMalloc");
|
DO_FUNC(ISPCMalloc, "ISPCMalloc");
|
||||||
DO_FUNC(ISPCFree, "ISPCFree");
|
DO_FUNC(ISPCFree, "ISPCFree");
|
||||||
#endif // ISPC_IS_WINDOWS
|
|
||||||
DO_FUNC(putchar, "putchar");
|
DO_FUNC(putchar, "putchar");
|
||||||
DO_FUNC(printf, "printf");
|
DO_FUNC(printf, "printf");
|
||||||
DO_FUNC(fflush, "fflush");
|
DO_FUNC(fflush, "fflush");
|
||||||
|
|||||||
@@ -659,6 +659,11 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
// the code to free that memory, now that we've copied the
|
// the code to free that memory, now that we've copied the
|
||||||
// parameter values out of the structure.
|
// parameter values out of the structure.
|
||||||
ctx->EmitFree(structParamPtr);
|
ctx->EmitFree(structParamPtr);
|
||||||
|
#else
|
||||||
|
// We also do this for AVX... (See discussion in
|
||||||
|
// FunctionEmitContext::LaunchInst().)
|
||||||
|
if (g->target.isa == Target::AVX)
|
||||||
|
ctx->EmitFree(structParamPtr);
|
||||||
#endif // ISPC_IS_WINDOWS
|
#endif // ISPC_IS_WINDOWS
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|||||||
4
opt.cpp
4
opt.cpp
@@ -187,6 +187,10 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(targetLibraryInfo);
|
optPM.add(targetLibraryInfo);
|
||||||
optPM.add(new llvm::TargetData(module));
|
optPM.add(new llvm::TargetData(module));
|
||||||
|
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
|
optPM.add(llvm::createIndVarSimplifyPass());
|
||||||
|
#endif
|
||||||
|
|
||||||
if (optLevel == 0) {
|
if (optLevel == 0) {
|
||||||
// This is more or less the minimum set of optimizations that we
|
// This is more or less the minimum set of optimizations that we
|
||||||
// need to do to generate code that will actually run. (We can't
|
// need to do to generate code that will actually run. (We can't
|
||||||
|
|||||||
@@ -31,9 +31,18 @@
|
|||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ISPC_IS_WINDOWS
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#define ISPC_IS_LINUX
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#define ISPC_IS_APPLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
extern int width();
|
extern int width();
|
||||||
@@ -48,6 +57,8 @@ extern "C" {
|
|||||||
|
|
||||||
void ISPCLaunch(void *f, void *d);
|
void ISPCLaunch(void *f, void *d);
|
||||||
void ISPCSync();
|
void ISPCSync();
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment);
|
||||||
|
void ISPCFree(void *ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ISPCLaunch(void *f, void *d) {
|
void ISPCLaunch(void *f, void *d) {
|
||||||
@@ -60,6 +71,37 @@ void ISPCSync() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void *ISPCMalloc(int64_t size, int32_t alignment) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
return memalign(alignment, size);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||||
|
char *amem = ((char*)mem) + sizeof(void*);
|
||||||
|
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||||
|
(alignment - 1)));
|
||||||
|
((void**)amem)[-1] = mem;
|
||||||
|
return amem;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ISPCFree(void *ptr) {
|
||||||
|
#ifdef ISPC_IS_WINDOWS
|
||||||
|
_aligned_free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
free(ptr);
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_IS_APPLE
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
int w = width();
|
int w = width();
|
||||||
assert(w <= 16);
|
assert(w <= 16);
|
||||||
|
|||||||
Reference in New Issue
Block a user