Update per_lane macro to not use ID for lane number in macro expansion

This was leading to unintended consequences if WIDTH was used in macro code,
which was undesirable.
This commit is contained in:
Matt Pharr
2012-01-26 14:15:58 -08:00
parent 56ffc78fa4
commit 24f58fa16a

View File

@@ -2187,9 +2187,9 @@ return:
define(`gen_masked_store', `
define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline {
per_lane($1, <$1 x i32> %2, `
%ptr_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
%storeval_ID = extractelement <$1 x $2> %1, i32 LANE
store $2 %storeval_ID, $2 * %ptr_ID')
%ptr_LANE_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
%storeval_LANE_ID = extractelement <$1 x $2> %1, i32 LANE
store $2 %storeval_LANE_ID, $2 * %ptr_LANE_ID')
ret void
}
')
@@ -2644,7 +2644,7 @@ pl_known_mask:
pl_all_on:
;; the mask is all on--just expand the code for each lane sequentially
forloop(i, 0, eval($1-1),
`patsubst(`$3', `ID\|LANE', i)')
`patsubst(`$3', `LANE', i)')
br label %pl_done
pl_unknown_mask:
@@ -2806,11 +2806,11 @@ define <$1 x $2> @__gather32_$2(<$1 x i32> %ptrs,
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
%ret_ptr = alloca <$1 x $2>
per_lane($1, <$1 x i32> %vecmask, `
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_ID = inttoptr i32 %iptr_ID to $2 *
%val_ID = load $2 * %ptr_ID
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_ID, $2 * %store_ptr_ID
%iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
%val_LANE_ID = load $2 * %ptr_LANE_ID
%store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
')
%ret = load <$1 x $2> * %ret_ptr
@@ -2822,11 +2822,11 @@ define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs,
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
%ret_ptr = alloca <$1 x $2>
per_lane($1, <$1 x i32> %vecmask, `
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_ID = inttoptr i64 %iptr_ID to $2 *
%val_ID = load $2 * %ptr_ID
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_ID, $2 * %store_ptr_ID
%iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
%val_LANE_ID = load $2 * %ptr_LANE_ID
%store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
')
%ret = load <$1 x $2> * %ret_ptr
@@ -2910,10 +2910,10 @@ define void @__scatter_base_offsets64_$2(i8* %base, <$1 x i64> %offsets, i32 %of
define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline {
per_lane($1, <$1 x i32> %mask, `
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_ID = inttoptr i32 %iptr_ID to $2 *
%val_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_ID, $2 * %ptr_ID
%iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
%val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
')
ret void
}
@@ -2922,10 +2922,10 @@ define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline {
per_lane($1, <$1 x i32> %mask, `
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_ID = inttoptr i64 %iptr_ID to $2 *
%val_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_ID, $2 * %ptr_ID
%iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
%val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
')
ret void
}