Fixed a number of issues related to memory alignment; a number of places

were expecting vector-width-aligned pointers where in point of fact, there's no guarantee that they would have been in general. Removed the aligned memory allocation routines from some of the examples; they're no longer needed. No perf. difference on Core2/Core i5 CPUs; older CPUs may see some regressions. Still need to update the documentation for this change and finish reviewing alignment issues in Load/Store instructions generated by .cpp files.
2011-06-23 18:18:33 -07:00
parent d340dcbfcc
commit b84167dddd
11 changed files with 45 additions and 112 deletions
--- a/stdlib-avx.ll
+++ b/stdlib-avx.ll
@@ -513,14 +513,14 @@ declare <8 x float> @llvm.x86.avx.blendvps(<8 x float>, <8 x float>,
 define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
                                           <8 x i32>) nounwind alwaysinline {
  %mask_as_float = bitcast <8 x i32> %2 to <8 x float>
-  %oldValue = load <8 x i32>* %0
+  %oldValue = load <8 x i32>* %0, align 4
  %oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
  %newAsFloat = bitcast <8 x i32> %1 to <8 x float>
  %blend = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %oldAsFloat,
                                                   <8 x float> %newAsFloat,
                                                   <8 x float> %mask_as_float)
  %blendAsInt = bitcast <8 x float> %blend to <8 x i32>
-  store <8 x i32> %blendAsInt, <8 x i32>* %0
+  store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4
  ret void
 }