From 95950885cf519ff46dcb0a93531f7e7e8427f89e Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 26 Apr 2013 20:33:24 +0400 Subject: [PATCH] Use posix_memalign to allocate 16 byte alligned memeory on Linux/MacOS. --- Makefile | 2 +- builtins.cpp | 8 ++++--- builtins/util.m4 | 59 ++++++++++++++++++++++++++++++++++++++++++++---- expr.cpp | 18 +++++++++++---- ispc.cpp | 3 +++ main.cpp | 1 + 6 files changed, 77 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 34055496..69468576 100644 --- a/Makefile +++ b/Makefile @@ -182,7 +182,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll) @echo Creating C++ source from builtins definition file $< - @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | python bitcode2cpp.py $< > $@ + @m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@ objs/builtins-c-32.cpp: builtins/builtins.c @echo Creating C++ source from builtins definition file $< diff --git a/builtins.cpp b/builtins.cpp index 53cab1f6..e5745372 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -477,9 +477,11 @@ lSetInternalFunctions(llvm::Module *module) { "__min_varying_uint32", "__min_varying_uint64", "__movmsk", - "__new_uniform", - "__new_varying32", - "__new_varying64", + "__new_uniform_32rt", + "__new_uniform_64rt", + "__new_varying32_32rt", + "__new_varying32_64rt", + "__new_varying64_64rt", "__none", "__num_cores", "__packed_load_active", diff --git a/builtins/util.m4 b/builtins/util.m4 index 87a1fd68..ac9ba2d0 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2536,15 +2536,59 @@ ok: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; new/delete -declare noalias i8 * @malloc(i64) -declare void @free(i8 *) +;; Set of function for 32 bit runtime -define noalias i8 * @__new_uniform(i64 %size) { +ifelse(BUILD_OS, `UNIX', +` + +;; posix_memalign is for 32 bit runtime +declare i32 @posix_memalign(i8**, i32, i32) + +define noalias i8 * @__new_uniform_32rt(i64 %size) { + %ptr = alloca i8* + %conv = trunc i64 %size to i32 + %call1 = call i32 @posix_memalign(i8** %ptr, i32 16, i32 %conv) + %ptr_val = load i8** %ptr + ret i8* %ptr_val +} + +define @__new_varying32_32rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + + per_lane(WIDTH, %mask, ` + %sz_LANE_ID = extractelement %size, i32 LANE + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 16, i32 %sz_LANE_ID)') + + %r = load * %ret + ret %r +} + +', +BUILD_OS, `WINDOWS', +` +;; Windows version TBD +', +` +errprint(`BUILD_OS should be defined to either UNIX or WINDOWS +') +m4exit(`1') +') + +;; Set of function for 64 bit runtime + +;; malloc is for 64 bit runtime +declare noalias i8 * @malloc(i64) + +define noalias i8 * @__new_uniform_64rt(i64 %size) { %a = call noalias i8 * @malloc(i64 %size) ret i8 * %a } -define @__new_varying32( %size, %mask) { +define @__new_varying32_64rt( %size, %mask) { %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * @@ -2561,7 +2605,7 @@ define @__new_varying32( %size, %mask) ret %r } -define @__new_varying64( %size, %mask) { +define @__new_varying64_64rt( %size, %mask) { %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * @@ -2577,6 +2621,11 @@ define @__new_varying64( %size, %mask) ret %r } +;; Functions for both 32 and 64 bit runtimes. + +;; free works fine with both 32 and 64 bit runtime +declare void @free(i8 *) + define void @__delete_uniform(i8 * %ptr) { call void @free(i8 * %ptr) ret void diff --git a/expr.cpp b/expr.cpp index 7808d2af..27f45299 100644 --- a/expr.cpp +++ b/expr.cpp @@ -8214,16 +8214,24 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const { // varying, and taking 32-bit or 64-bit allocation counts. llvm::Function *func; if (isVarying) { - if (do32Bit) - func = m->module->getFunction("__new_varying32"); - else - func = m->module->getFunction("__new_varying64"); + if (g->target->is32Bit()) { + func = m->module->getFunction("__new_varying32_32rt"); + } else if (g->opt.force32BitAddressing) { + func = m->module->getFunction("__new_varying32_64rt"); + } else { + func = m->module->getFunction("__new_varying64_64rt"); + } } else { + // FIXME: __new_uniform_32rt should take i32 if (allocSize->getType() != LLVMTypes::Int64Type) allocSize = ctx->SExtInst(allocSize, LLVMTypes::Int64Type, "alloc_size64"); - func = m->module->getFunction("__new_uniform"); + if (g->target->is32Bit()) { + func = m->module->getFunction("__new_uniform_32rt"); + } else { + func = m->module->getFunction("__new_uniform_64rt"); + } } AssertPos(pos, func != NULL); diff --git a/ispc.cpp b/ispc.cpp index 63b66c9c..daa3f5a8 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -477,6 +477,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #endif // Set is32Bit + // This indicates if we are compiling for 32 bit platform + // and can assume 32 bit runtime. + // FIXME: all generic targets are handled as 64 bit, which is incorrect. this->m_is32Bit = (getDataLayout()->getPointerSize() == 4); #if !defined(LLVM_3_1) && !defined(LLVM_3_2) diff --git a/main.cpp b/main.cpp index 4a970ac8..45d5f1ee 100644 --- a/main.cpp +++ b/main.cpp @@ -272,6 +272,7 @@ int main(int Argc, char *Argv[]) { g->cppArgs.push_back(argv[i]); else if (!strncmp(argv[i], "--addressing=", 13)) { if (atoi(argv[i] + 13) == 64) + // FIXME: this doesn't make sense on 32 bit platform. g->opt.force32BitAddressing = false; else if (atoi(argv[i] + 13) == 32) g->opt.force32BitAddressing = true;