Fixed a number of issues related to memory alignment; a number of places

were expecting vector-width-aligned pointers where in point of fact,
there's no guarantee that they would have been in general.

Removed the aligned memory allocation routines from some of the examples;
they're no longer needed.

No perf. difference on Core2/Core i5 CPUs; older CPUs may see some
regressions.

Still need to update the documentation for this change and finish reviewing
alignment issues in Load/Store instructions generated by .cpp files.
This commit is contained in:
Matt Pharr
2011-06-23 18:18:33 -07:00
parent d340dcbfcc
commit b84167dddd
11 changed files with 45 additions and 112 deletions

View File

@@ -43,9 +43,6 @@
#include <algorithm>
#include <assert.h>
#include <sys/types.h>
#ifndef __APPLE__
#include <malloc.h>
#endif
#include "../timing.h"
#include "rt_ispc.h"
@@ -53,23 +50,6 @@ using namespace ispc;
typedef unsigned int uint;
template <typename T>
T *AllocAligned(int count) {
int size = count * sizeof(T);
#if defined(_WIN32) || defined(_WIN64)
return (T *)_aligned_malloc(size, 64);
#elif defined (__APPLE__)
// Allocate excess memory to ensure an aligned pointer can be returned
void *mem = malloc(size + (64-1) + sizeof(void*));
char *amem = ((char*)mem) + sizeof(void*);
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
((void**)amem)[-1] = mem;
return (T *)amem;
#else
return (T *)memalign(64, size);
#endif
}
extern void raytrace_serial(int width, int height, const float raster2camera[4][4],
const float camera2world[4][4], float image[],
int id[], const LinearBVHNode nodes[],
@@ -161,7 +141,7 @@ int main(int argc, char *argv[]) {
uint nNodes;
READ(nNodes, 1);
LinearBVHNode *nodes = AllocAligned<LinearBVHNode>(nNodes);
LinearBVHNode *nodes = new LinearBVHNode[nNodes];
for (unsigned int i = 0; i < nNodes; ++i) {
// Each node is 6x floats for a boox, then an integer for an offset
// to the second child node, then an integer that encodes the type
@@ -181,7 +161,7 @@ int main(int argc, char *argv[]) {
// And then read the triangles
uint nTris;
READ(nTris, 1);
Triangle *triangles = AllocAligned<Triangle>(nTris);
Triangle *triangles = new Triangle[nTris];
for (uint i = 0; i < nTris; ++i) {
// 9x floats for the 3 vertices
float v[9];