Fixed a number of issues related to memory alignment; a number of places
were expecting vector-width-aligned pointers where in point of fact, there's no guarantee that they would have been in general. Removed the aligned memory allocation routines from some of the examples; they're no longer needed. No perf. difference on Core2/Core i5 CPUs; older CPUs may see some regressions. Still need to update the documentation for this change and finish reviewing alignment issues in Load/Store instructions generated by .cpp files.
This commit is contained in:
@@ -566,7 +566,7 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%mask_b = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%oldValue = load <8 x i32>* %0
|
||||
%oldValue = load <8 x i32>* %0, align 4
|
||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
||||
%old_a = shufflevector <8 x float> %oldAsFloat, <8 x float> undef,
|
||||
@@ -584,7 +584,7 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
%blend = shufflevector <4 x float> %blend_a, <4 x float> %blend_b,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
||||
store <8 x i32> %blendAsInt, <8 x i32>* %0
|
||||
store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -595,7 +595,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||
|
||||
%mask_as_float = bitcast <8 x i32> %mask to <8 x float>
|
||||
|
||||
%old = load <8 x i64>* %ptr
|
||||
%old = load <8 x i64>* %ptr, align 8
|
||||
|
||||
; set up the first two 64-bit values
|
||||
%old01 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
@@ -651,7 +651,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%final = shufflevector <4 x i64> %final0123, <4 x i64> %final4567,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x i64> %final, <8 x i64> * %ptr
|
||||
store <8 x i64> %final, <8 x i64> * %ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user