From 630215f56fbbf19ce61a67fba764e3abf21052e0 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 17 May 2013 20:13:01 +0400 Subject: [PATCH] Defining memory routines completely separately for Windows/Unix 32/64 bit. --- builtins/util.m4 | 143 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 128 insertions(+), 15 deletions(-) diff --git a/builtins/util.m4 b/builtins/util.m4 index 7f31d010..87f63de7 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2547,7 +2547,19 @@ ok: ifelse(BUILD_OS, `UNIX', ` +ifelse(RUNTIME, `32', +` + +;; Unix 32 bit environment. +;; Use: posix_memalign and free +;; Define: +;; - __new_uniform_32rt +;; - __new_varying32_32rt +;; - __delete_uniform_32rt +;; - __delete_varying_32rt + declare i32 @posix_memalign(i8**, i32, i32) +declare void @free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %ptr = alloca i8* @@ -2586,9 +2598,95 @@ define void @__delete_varying_32rt( %ptr, %mask) { ret void } +', +RUNTIME, `64', +` + +;; Unix 64 bit environment. +;; Use: posix_memalign and free +;; Define: +;; - __new_uniform_64rt +;; - __new_varying32_64rt +;; - __new_varying64_64rt +;; - __delete_uniform_64rt +;; - __delete_varying_64rt + +declare i32 @posix_memalign(i8**, i64, i64) +declare void @free(i8 *) + +define noalias i8 * @__new_uniform_64rt(i64 %size) { + %ptr = alloca i8* + %call1 = call i32 @posix_memalign(i8** %ptr, i64 16, i64 %size) + %ptr_val = load i8** %ptr + ret i8* %ptr_val +} + +define @__new_varying32_64rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + + per_lane(WIDTH, %mask, ` + %sz_LANE_ID = extractelement %size, i32 LANE + %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 16, i64 %sz64_LANE_ID)') + + %r = load * %ret + ret %r +} + +define @__new_varying64_64rt( %size, %mask) { + %ret = alloca + store zeroinitializer, * %ret + %ret64 = bitcast * %ret to i64 * + + per_lane(WIDTH, %mask, ` + %sz64_LANE_ID = extractelement %size, i32 LANE + %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE + %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 16, i64 %sz64_LANE_ID)') + + %r = load * %ret + ret %r +} + +define void @__delete_uniform_64rt(i8 * %ptr) { + call void @free(i8 * %ptr) + ret void +} + +define void @__delete_varying_64rt( %ptr, %mask) { + per_lane(WIDTH, %mask, ` + %iptr_LANE_ID = extractelement %ptr, i32 LANE + %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * + call void @free(i8 * %ptr_LANE_ID) + ') + ret void +} + +', ` +errprint(`RUNTIME should be defined to either 32 or 64 +') +m4exit(`1') +') + ', BUILD_OS, `WINDOWS', ` + +ifelse(RUNTIME, `32', +` + +;; Windows 32 bit environment. +;; Use: _aligned_malloc and _aligned_free +;; Define: +;; - __new_uniform_32rt +;; - __new_varying32_32rt +;; - __delete_uniform_32rt +;; - __delete_varying_32rt + declare i8* @_aligned_malloc(i32, i32) declare void @_aligned_free(i8 *) @@ -2629,21 +2727,24 @@ define void @__delete_varying_32rt( %ptr, %mask) { } ', +RUNTIME, `64', ` -errprint(`BUILD_OS should be defined to either UNIX or WINDOWS -') -m4exit(`1') -') -;; Set of functions for 64 bit runtime -;; We use the same standard malloc/free pair on all platforms (Windows/Linux/MacOS). +;; Windows 64 bit environment. +;; Use: _aligned_malloc and _aligned_free +;; Define: +;; - __new_uniform_64rt +;; - __new_varying32_64rt +;; - __new_varying64_64rt +;; - __delete_uniform_64rt +;; - __delete_varying_64rt -declare noalias i8 * @malloc(i64) -declare void @free(i8 *) +declare i8* @_aligned_malloc(i64, i64) +declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { - %a = call noalias i8 * @malloc(i64 %size) - ret i8 * %a + %ptr = tail call i8* @_aligned_malloc(i64 %size, i64 16) + ret i8* %ptr } define @__new_varying32_64rt( %size, %mask) { @@ -2654,7 +2755,7 @@ define @__new_varying32_64rt( %size, % per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 - %ptr_LANE_ID = call noalias i8 * @malloc(i64 %sz64_LANE_ID) + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 16) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2669,8 +2770,8 @@ define @__new_varying64_64rt( %size, % %ret64 = bitcast * %ret to i64 * per_lane(WIDTH, %mask, ` - %sz_LANE_ID = extractelement %size, i32 LANE - %ptr_LANE_ID = call noalias i8 * @malloc(i64 %sz_LANE_ID) + %sz64_LANE_ID = extractelement %size, i32 LANE + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 16) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2680,7 +2781,7 @@ define @__new_varying64_64rt( %size, % } define void @__delete_uniform_64rt(i8 * %ptr) { - call void @free(i8 * %ptr) + call void @_aligned_free(i8 * %ptr) ret void } @@ -2688,11 +2789,23 @@ define void @__delete_varying_64rt( %ptr, %mask) { per_lane(WIDTH, %mask, ` %iptr_LANE_ID = extractelement %ptr, i32 LANE %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 * - call void @free(i8 * %ptr_LANE_ID) + call void @_aligned_free(i8 * %ptr_LANE_ID) ') ret void } +', ` +errprint(`RUNTIME should be defined to either 32 or 64 +') +m4exit(`1') +') + +', +` +errprint(`BUILD_OS should be defined to either UNIX or WINDOWS +') +m4exit(`1') +') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; read hw clock