From 7bedb4a081263349b2c1f6a94ed6c70be6e221d4 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 17 May 2013 21:35:27 +0400 Subject: [PATCH] Add memory alignment dependant on the platform (16/32/64/etc) --- builtins/util.m4 | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/builtins/util.m4 b/builtins/util.m4 index 87f63de7..c19d4930 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2544,6 +2544,10 @@ ok: ;; Note that this should be really two different libraries for 32 and 64 ;; environment and it should happen sooner or later +ifelse(WIDTH, 1, `define(`ALIGNMENT', `16')', `define(`ALIGNMENT', `eval(WIDTH*4)')') + +@memory_alignment = internal constant i32 ALIGNMENT + ifelse(BUILD_OS, `UNIX', ` @@ -2564,7 +2568,8 @@ declare void @free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %ptr = alloca i8* %conv = trunc i64 %size to i32 - %call1 = call i32 @posix_memalign(i8** %ptr, i32 16, i32 %conv) + %alignment = load i32* @memory_alignment + %call1 = call i32 @posix_memalign(i8** %ptr, i32 %alignment, i32 %conv) %ptr_val = load i8** %ptr ret i8* %ptr_val } @@ -2573,12 +2578,13 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** - %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 16, i32 %sz_LANE_ID)') + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 %alignment, i32 %sz_LANE_ID)') %r = load * %ret ret %r @@ -2616,7 +2622,9 @@ declare void @free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { %ptr = alloca i8* - %call1 = call i32 @posix_memalign(i8** %ptr, i64 16, i64 %size) + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 + %call1 = call i32 @posix_memalign(i8** %ptr, i64 %alignment64, i64 %size) %ptr_val = load i8** %ptr ret i8* %ptr_val } @@ -2625,13 +2633,15 @@ define @__new_varying32_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** - %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 16, i64 %sz64_LANE_ID)') + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 %alignment64, i64 %sz64_LANE_ID)') %r = load * %ret ret %r @@ -2641,12 +2651,14 @@ define @__new_varying64_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz64_LANE_ID = extractelement %size, i32 LANE %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** - %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 16, i64 %sz64_LANE_ID)') + %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 %alignment64, i64 %sz64_LANE_ID)') %r = load * %ret ret %r @@ -2692,7 +2704,8 @@ declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %conv = trunc i64 %size to i32 - %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 16) + %alignment = load i32* @memory_alignment + %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 %alignment) ret i8* %ptr } @@ -2700,10 +2713,11 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE - %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 16) + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 %alignment) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2743,7 +2757,9 @@ declare i8* @_aligned_malloc(i64, i64) declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { - %ptr = tail call i8* @_aligned_malloc(i64 %size, i64 16) + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 + %ptr = tail call i8* @_aligned_malloc(i64 %size, i64 %alignment64) ret i8* %ptr } @@ -2751,11 +2767,13 @@ define @__new_varying32_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 - %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 16) + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') @@ -2768,10 +2786,12 @@ define @__new_varying64_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * + %alignment = load i32* @memory_alignment + %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz64_LANE_ID = extractelement %size, i32 LANE - %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 16) + %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 %store_LANE_ID = getelementptr i64 * %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID')