Use posix_memalign to allocate 16 byte alligned memeory on Linux/MacOS.
This commit is contained in:
2
Makefile
2
Makefile
@@ -182,7 +182,7 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
|||||||
|
|
||||||
objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||||
@echo Creating C++ source from builtins definition file $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | python bitcode2cpp.py $< > $@
|
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@
|
||||||
|
|
||||||
objs/builtins-c-32.cpp: builtins/builtins.c
|
objs/builtins-c-32.cpp: builtins/builtins.c
|
||||||
@echo Creating C++ source from builtins definition file $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
|
|||||||
@@ -477,9 +477,11 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__min_varying_uint32",
|
"__min_varying_uint32",
|
||||||
"__min_varying_uint64",
|
"__min_varying_uint64",
|
||||||
"__movmsk",
|
"__movmsk",
|
||||||
"__new_uniform",
|
"__new_uniform_32rt",
|
||||||
"__new_varying32",
|
"__new_uniform_64rt",
|
||||||
"__new_varying64",
|
"__new_varying32_32rt",
|
||||||
|
"__new_varying32_64rt",
|
||||||
|
"__new_varying64_64rt",
|
||||||
"__none",
|
"__none",
|
||||||
"__num_cores",
|
"__num_cores",
|
||||||
"__packed_load_active",
|
"__packed_load_active",
|
||||||
|
|||||||
@@ -2536,15 +2536,59 @@ ok:
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; new/delete
|
;; new/delete
|
||||||
|
|
||||||
declare noalias i8 * @malloc(i64)
|
;; Set of function for 32 bit runtime
|
||||||
declare void @free(i8 *)
|
|
||||||
|
|
||||||
define noalias i8 * @__new_uniform(i64 %size) {
|
ifelse(BUILD_OS, `UNIX',
|
||||||
|
`
|
||||||
|
|
||||||
|
;; posix_memalign is for 32 bit runtime
|
||||||
|
declare i32 @posix_memalign(i8**, i32, i32)
|
||||||
|
|
||||||
|
define noalias i8 * @__new_uniform_32rt(i64 %size) {
|
||||||
|
%ptr = alloca i8*
|
||||||
|
%conv = trunc i64 %size to i32
|
||||||
|
%call1 = call i32 @posix_memalign(i8** %ptr, i32 16, i32 %conv)
|
||||||
|
%ptr_val = load i8** %ptr
|
||||||
|
ret i8* %ptr_val
|
||||||
|
}
|
||||||
|
|
||||||
|
define <WIDTH x i64> @__new_varying32_32rt(<WIDTH x i32> %size, <WIDTH x MASK> %mask) {
|
||||||
|
%ret = alloca <WIDTH x i64>
|
||||||
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
||||||
|
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
||||||
|
|
||||||
|
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||||
|
%sz_LANE_ID = extractelement <WIDTH x i32> %size, i32 LANE
|
||||||
|
%store_LANE_ID = getelementptr i64 * %ret64, i32 LANE
|
||||||
|
%ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8**
|
||||||
|
%call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 16, i32 %sz_LANE_ID)')
|
||||||
|
|
||||||
|
%r = load <WIDTH x i64> * %ret
|
||||||
|
ret <WIDTH x i64> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
',
|
||||||
|
BUILD_OS, `WINDOWS',
|
||||||
|
`
|
||||||
|
;; Windows version TBD
|
||||||
|
',
|
||||||
|
`
|
||||||
|
errprint(`BUILD_OS should be defined to either UNIX or WINDOWS
|
||||||
|
')
|
||||||
|
m4exit(`1')
|
||||||
|
')
|
||||||
|
|
||||||
|
;; Set of function for 64 bit runtime
|
||||||
|
|
||||||
|
;; malloc is for 64 bit runtime
|
||||||
|
declare noalias i8 * @malloc(i64)
|
||||||
|
|
||||||
|
define noalias i8 * @__new_uniform_64rt(i64 %size) {
|
||||||
%a = call noalias i8 * @malloc(i64 %size)
|
%a = call noalias i8 * @malloc(i64 %size)
|
||||||
ret i8 * %a
|
ret i8 * %a
|
||||||
}
|
}
|
||||||
|
|
||||||
define <WIDTH x i64> @__new_varying32(<WIDTH x i32> %size, <WIDTH x MASK> %mask) {
|
define <WIDTH x i64> @__new_varying32_64rt(<WIDTH x i32> %size, <WIDTH x MASK> %mask) {
|
||||||
%ret = alloca <WIDTH x i64>
|
%ret = alloca <WIDTH x i64>
|
||||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
||||||
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
||||||
@@ -2561,7 +2605,7 @@ define <WIDTH x i64> @__new_varying32(<WIDTH x i32> %size, <WIDTH x MASK> %mask)
|
|||||||
ret <WIDTH x i64> %r
|
ret <WIDTH x i64> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <WIDTH x i64> @__new_varying64(<WIDTH x i64> %size, <WIDTH x MASK> %mask) {
|
define <WIDTH x i64> @__new_varying64_64rt(<WIDTH x i64> %size, <WIDTH x MASK> %mask) {
|
||||||
%ret = alloca <WIDTH x i64>
|
%ret = alloca <WIDTH x i64>
|
||||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
||||||
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
||||||
@@ -2577,6 +2621,11 @@ define <WIDTH x i64> @__new_varying64(<WIDTH x i64> %size, <WIDTH x MASK> %mask)
|
|||||||
ret <WIDTH x i64> %r
|
ret <WIDTH x i64> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; Functions for both 32 and 64 bit runtimes.
|
||||||
|
|
||||||
|
;; free works fine with both 32 and 64 bit runtime
|
||||||
|
declare void @free(i8 *)
|
||||||
|
|
||||||
define void @__delete_uniform(i8 * %ptr) {
|
define void @__delete_uniform(i8 * %ptr) {
|
||||||
call void @free(i8 * %ptr)
|
call void @free(i8 * %ptr)
|
||||||
ret void
|
ret void
|
||||||
|
|||||||
18
expr.cpp
18
expr.cpp
@@ -8214,16 +8214,24 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
// varying, and taking 32-bit or 64-bit allocation counts.
|
// varying, and taking 32-bit or 64-bit allocation counts.
|
||||||
llvm::Function *func;
|
llvm::Function *func;
|
||||||
if (isVarying) {
|
if (isVarying) {
|
||||||
if (do32Bit)
|
if (g->target->is32Bit()) {
|
||||||
func = m->module->getFunction("__new_varying32");
|
func = m->module->getFunction("__new_varying32_32rt");
|
||||||
else
|
} else if (g->opt.force32BitAddressing) {
|
||||||
func = m->module->getFunction("__new_varying64");
|
func = m->module->getFunction("__new_varying32_64rt");
|
||||||
|
} else {
|
||||||
|
func = m->module->getFunction("__new_varying64_64rt");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
// FIXME: __new_uniform_32rt should take i32
|
||||||
if (allocSize->getType() != LLVMTypes::Int64Type)
|
if (allocSize->getType() != LLVMTypes::Int64Type)
|
||||||
allocSize = ctx->SExtInst(allocSize, LLVMTypes::Int64Type,
|
allocSize = ctx->SExtInst(allocSize, LLVMTypes::Int64Type,
|
||||||
"alloc_size64");
|
"alloc_size64");
|
||||||
func = m->module->getFunction("__new_uniform");
|
if (g->target->is32Bit()) {
|
||||||
|
func = m->module->getFunction("__new_uniform_32rt");
|
||||||
|
} else {
|
||||||
|
func = m->module->getFunction("__new_uniform_64rt");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
AssertPos(pos, func != NULL);
|
AssertPos(pos, func != NULL);
|
||||||
|
|
||||||
|
|||||||
3
ispc.cpp
3
ispc.cpp
@@ -477,6 +477,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Set is32Bit
|
// Set is32Bit
|
||||||
|
// This indicates if we are compiling for 32 bit platform
|
||||||
|
// and can assume 32 bit runtime.
|
||||||
|
// FIXME: all generic targets are handled as 64 bit, which is incorrect.
|
||||||
this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
|
this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
|
||||||
|
|
||||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
|
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
|
||||||
|
|||||||
1
main.cpp
1
main.cpp
@@ -272,6 +272,7 @@ int main(int Argc, char *Argv[]) {
|
|||||||
g->cppArgs.push_back(argv[i]);
|
g->cppArgs.push_back(argv[i]);
|
||||||
else if (!strncmp(argv[i], "--addressing=", 13)) {
|
else if (!strncmp(argv[i], "--addressing=", 13)) {
|
||||||
if (atoi(argv[i] + 13) == 64)
|
if (atoi(argv[i] + 13) == 64)
|
||||||
|
// FIXME: this doesn't make sense on 32 bit platform.
|
||||||
g->opt.force32BitAddressing = false;
|
g->opt.force32BitAddressing = false;
|
||||||
else if (atoi(argv[i] + 13) == 32)
|
else if (atoi(argv[i] + 13) == 32)
|
||||||
g->opt.force32BitAddressing = true;
|
g->opt.force32BitAddressing = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user