Fixed a number of issues related to memory alignment; a number of places

were expecting vector-width-aligned pointers where in point of fact,
there's no guarantee that they would have been in general.

Removed the aligned memory allocation routines from some of the examples;
they're no longer needed.

No perf. difference on Core2/Core i5 CPUs; older CPUs may see some
regressions.

Still need to update the documentation for this change and finish reviewing
alignment issues in Load/Store instructions generated by .cpp files.
This commit is contained in:
Matt Pharr
2011-06-23 18:18:33 -07:00
parent d340dcbfcc
commit b84167dddd
11 changed files with 45 additions and 112 deletions

View File

@@ -37,9 +37,6 @@
#include <assert.h>
#include <math.h>
#include <algorithm>
#ifndef __APPLE__
#include <malloc.h>
#endif // !__APPLE__
using std::max;
#include "options_defs.h"
@@ -48,23 +45,6 @@ using std::max;
#include "options_ispc.h"
using namespace ispc;
// Allocate memory with 64-byte alignment.
float *AllocFloats(int count) {
int size = count * sizeof(float);
#if defined(_WIN32) || defined(_WIN64)
return (float *)_aligned_malloc(size, 64);
#elif defined (__APPLE__)
// Allocate excess memory to ensure an aligned pointer can be returned
void *mem = malloc(size + (64-1) + sizeof(void*));
char *amem = ((char*)mem) + sizeof(void*);
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
((void**)amem)[-1] = mem;
return (float *)amem;
#else
return (float *)memalign(64, size);
#endif
}
extern void black_scholes_serial(float Sa[], float Xa[], float Ta[],
float ra[], float va[],
float result[], int count);
@@ -76,12 +56,12 @@ extern void binomial_put_serial(float Sa[], float Xa[], float Ta[],
int main() {
// Pointers passed to ispc code must have alignment of the target's
// vector width at minimum.
float *S = AllocFloats(N_OPTIONS);
float *X = AllocFloats(N_OPTIONS);
float *T = AllocFloats(N_OPTIONS);
float *r = AllocFloats(N_OPTIONS);
float *v = AllocFloats(N_OPTIONS);
float *result = AllocFloats(N_OPTIONS);
float *S = new float[N_OPTIONS];
float *X = new float[N_OPTIONS];
float *T = new float[N_OPTIONS];
float *r = new float[N_OPTIONS];
float *v = new float[N_OPTIONS];
float *result = new float[N_OPTIONS];
for (int i = 0; i < N_OPTIONS; ++i) {
S[i] = 100; // stock price