Merge pull request #482 from dbabokin/sprintf

Some more clean and stability fixes
This commit is contained in:
jbrodman
2013-04-25 12:07:27 -07:00
4 changed files with 125 additions and 106 deletions

View File

@@ -50,6 +50,16 @@
available to ispc programs at compile time automatically. available to ispc programs at compile time automatically.
*/ */
#ifdef _MSC_VER
// We do want old school sprintf and don't want secure Microsoft extensions.
// And we also don't want warnings about it, so the define.
#define _CRT_SECURE_NO_WARNINGS
#else
// Some versions of glibc has "fortification" feature, which expands sprintf
// to __builtin___sprintf_chk(..., __builtin_object_size(...), ...).
// We don't want this kind of expansion, as we don't support these intrinsics.
#define _FORTIFY_SOURCE 0
#endif
#ifndef _MSC_VER #ifndef _MSC_VER
#include <unistd.h> #include <unistd.h>

View File

@@ -2048,7 +2048,6 @@ static void generateCompilerSpecificCode(llvm::formatted_raw_ostream& Out,
"__builtin_prefetch(addr,rw,locality)\n" "__builtin_prefetch(addr,rw,locality)\n"
<< "//#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n" << "//#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
<< "//#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n" << "//#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
<< "//#define LLVM_ASM __asm__\n"
<< "#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)\n" << "#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)\n"
<< "#include <limits>\n" << "#include <limits>\n"
<< "#define LLVM_NAN(NanStr) std::numeric_limits<double>::quiet_NaN()\n" << "#define LLVM_NAN(NanStr) std::numeric_limits<double>::quiet_NaN()\n"
@@ -2060,11 +2059,19 @@ static void generateCompilerSpecificCode(llvm::formatted_raw_ostream& Out,
<< "//#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n" << "//#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
<< "//#define __ATTRIBUTE_CTOR__\n" << "//#define __ATTRIBUTE_CTOR__\n"
<< "//#define __ATTRIBUTE_DTOR__\n" << "//#define __ATTRIBUTE_DTOR__\n"
<< "//#define LLVM_ASM(X)\n"
<< "#else\n" << "#else\n"
<< "#error \"Not MSVC, clang, or g++?\"\n" << "#error \"Not MSVC, clang, or g++?\"\n"
<< "#endif\n\n"; << "#endif\n\n";
// LLVM_ASM() is used to define mapping of the symbol to a different name,
// this is expected to be MacOS-only feature. So defining it only for
// gcc and clang (Intel Compiler on Linux/MacOS is also ok).
// For example, this feature is required to translate symbols described in
// "Symbol Variants Release Notes" document (on Apple website).
Out << "#if (defined(__GNUC__) || defined(__clang__))\n"
<< "#define LLVM_ASM(X) __asm(X)\n"
<< "#endif\n\n";
Out << "#if defined(__clang__) || defined(__INTEL_COMPILER) || " Out << "#if defined(__clang__) || defined(__INTEL_COMPILER) || "
"(__GNUC__ < 4) /* Old GCCs, or compilers not GCC */ \n" "(__GNUC__ < 4) /* Old GCCs, or compilers not GCC */ \n"
<< "#define __builtin_stack_save() 0 /* not implemented */\n" << "#define __builtin_stack_save() 0 /* not implemented */\n"
@@ -2404,6 +2411,7 @@ bool CWriter::doInitialization(llvm::Module &M) {
if (I->hasHiddenVisibility()) if (I->hasHiddenVisibility())
Out << " __HIDDEN__"; Out << " __HIDDEN__";
// This is MacOS specific feature, this should not appear on other platforms.
if (I->hasName() && I->getName()[0] == 1) if (I->hasName() && I->getName()[0] == 1)
Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";

View File

@@ -3112,25 +3112,25 @@ static FORCEINLINE RetVec lGather32(RetVec, RetScalar, __vec4_i32 ptrs,
RetScalar r[4]; RetScalar r[4];
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
if (m != 0) { if (m != 0) {
RetScalar *ptr = (RetScalar *)_mm_extract_epi32(ptrs.v, 0); RetScalar *ptr = (RetScalar *)((uintptr_t)_mm_extract_epi32(ptrs.v, 0));
r[0] = *ptr; r[0] = *ptr;
} }
m = _mm_extract_ps(mask.v, 1); m = _mm_extract_ps(mask.v, 1);
if (m != 0) { if (m != 0) {
RetScalar *ptr = (RetScalar *)_mm_extract_epi32(ptrs.v, 1); RetScalar *ptr = (RetScalar *)((uintptr_t)_mm_extract_epi32(ptrs.v, 1));
r[1] = *ptr; r[1] = *ptr;
} }
m = _mm_extract_ps(mask.v, 2); m = _mm_extract_ps(mask.v, 2);
if (m != 0) { if (m != 0) {
RetScalar *ptr = (RetScalar *)_mm_extract_epi32(ptrs.v, 2); RetScalar *ptr = (RetScalar *)((uintptr_t)_mm_extract_epi32(ptrs.v, 2));
r[2] = *ptr; r[2] = *ptr;
} }
m = _mm_extract_ps(mask.v, 3); m = _mm_extract_ps(mask.v, 3);
if (m != 0) { if (m != 0) {
RetScalar *ptr = (RetScalar *)_mm_extract_epi32(ptrs.v, 3); RetScalar *ptr = (RetScalar *)((uintptr_t)_mm_extract_epi32(ptrs.v, 3));
r[3] = *ptr; r[3] = *ptr;
} }
@@ -3185,25 +3185,25 @@ static FORCEINLINE __vec4_i32 __gather32_i32(__vec4_i32 ptrs, __vec4_i1 mask) {
__m128i r = _mm_set_epi32(0, 0, 0, 0); __m128i r = _mm_set_epi32(0, 0, 0, 0);
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
if (m != 0) { if (m != 0) {
int32_t *ptr = (int32_t *)_mm_extract_epi32(ptrs.v, 0); int32_t *ptr = (int32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 0));
r = _mm_insert_epi32(r, *ptr, 0); r = _mm_insert_epi32(r, *ptr, 0);
} }
m = _mm_extract_ps(mask.v, 1); m = _mm_extract_ps(mask.v, 1);
if (m != 0) { if (m != 0) {
int32_t *ptr = (int32_t *)_mm_extract_epi32(ptrs.v, 1); int32_t *ptr = (int32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 1));
r = _mm_insert_epi32(r, *ptr, 1); r = _mm_insert_epi32(r, *ptr, 1);
} }
m = _mm_extract_ps(mask.v, 2); m = _mm_extract_ps(mask.v, 2);
if (m != 0) { if (m != 0) {
int32_t *ptr = (int32_t *)_mm_extract_epi32(ptrs.v, 2); int32_t *ptr = (int32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 2));
r = _mm_insert_epi32(r, *ptr, 2); r = _mm_insert_epi32(r, *ptr, 2);
} }
m = _mm_extract_ps(mask.v, 3); m = _mm_extract_ps(mask.v, 3);
if (m != 0) { if (m != 0) {
int32_t *ptr = (int32_t *)_mm_extract_epi32(ptrs.v, 3); int32_t *ptr = (int32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 3));
r = _mm_insert_epi32(r, *ptr, 3); r = _mm_insert_epi32(r, *ptr, 3);
} }
@@ -3410,25 +3410,25 @@ static FORCEINLINE void __scatter32_i8(__vec4_i32 ptrs, __vec4_i8 val,
__vec4_i1 mask) { __vec4_i1 mask) {
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
if (m != 0) { if (m != 0) {
uint8_t *ptr = (uint8_t *)_mm_extract_epi32(ptrs.v, 0); uint8_t *ptr = (uint8_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 0));
*ptr = _mm_extract_epi8(val.v, 0); *ptr = _mm_extract_epi8(val.v, 0);
} }
m = _mm_extract_ps(mask.v, 1); m = _mm_extract_ps(mask.v, 1);
if (m != 0) { if (m != 0) {
uint8_t *ptr = (uint8_t *)_mm_extract_epi32(ptrs.v, 1); uint8_t *ptr = (uint8_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 1));
*ptr = _mm_extract_epi8(val.v, 1); *ptr = _mm_extract_epi8(val.v, 1);
} }
m = _mm_extract_ps(mask.v, 2); m = _mm_extract_ps(mask.v, 2);
if (m != 0) { if (m != 0) {
uint8_t *ptr = (uint8_t *)_mm_extract_epi32(ptrs.v, 2); uint8_t *ptr = (uint8_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 2));
*ptr = _mm_extract_epi8(val.v, 2); *ptr = _mm_extract_epi8(val.v, 2);
} }
m = _mm_extract_ps(mask.v, 3); m = _mm_extract_ps(mask.v, 3);
if (m != 0) { if (m != 0) {
uint8_t *ptr = (uint8_t *)_mm_extract_epi32(ptrs.v, 3); uint8_t *ptr = (uint8_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 3));
*ptr = _mm_extract_epi8(val.v, 3); *ptr = _mm_extract_epi8(val.v, 3);
} }
} }
@@ -3464,25 +3464,25 @@ static FORCEINLINE void __scatter32_i16(__vec4_i32 ptrs, __vec4_i16 val,
__vec4_i1 mask) { __vec4_i1 mask) {
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
if (m != 0) { if (m != 0) {
uint16_t *ptr = (uint16_t *)_mm_extract_epi32(ptrs.v, 0); uint16_t *ptr = (uint16_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 0));
*ptr = _mm_extract_epi16(val.v, 0); *ptr = _mm_extract_epi16(val.v, 0);
} }
m = _mm_extract_ps(mask.v, 1); m = _mm_extract_ps(mask.v, 1);
if (m != 0) { if (m != 0) {
uint16_t *ptr = (uint16_t *)_mm_extract_epi32(ptrs.v, 1); uint16_t *ptr = (uint16_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 1));
*ptr = _mm_extract_epi16(val.v, 1); *ptr = _mm_extract_epi16(val.v, 1);
} }
m = _mm_extract_ps(mask.v, 2); m = _mm_extract_ps(mask.v, 2);
if (m != 0) { if (m != 0) {
uint16_t *ptr = (uint16_t *)_mm_extract_epi32(ptrs.v, 2); uint16_t *ptr = (uint16_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 2));
*ptr = _mm_extract_epi16(val.v, 2); *ptr = _mm_extract_epi16(val.v, 2);
} }
m = _mm_extract_ps(mask.v, 3); m = _mm_extract_ps(mask.v, 3);
if (m != 0) { if (m != 0) {
uint16_t *ptr = (uint16_t *)_mm_extract_epi32(ptrs.v, 3); uint16_t *ptr = (uint16_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 3));
*ptr = _mm_extract_epi16(val.v, 3); *ptr = _mm_extract_epi16(val.v, 3);
} }
} }
@@ -3518,25 +3518,25 @@ static FORCEINLINE void __scatter32_i32(__vec4_i32 ptrs, __vec4_i32 val,
__vec4_i1 mask) { __vec4_i1 mask) {
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
if (m != 0) { if (m != 0) {
uint32_t *ptr = (uint32_t *)_mm_extract_epi32(ptrs.v, 0); uint32_t *ptr = (uint32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 0));
*ptr = _mm_extract_epi32(val.v, 0); *ptr = _mm_extract_epi32(val.v, 0);
} }
m = _mm_extract_ps(mask.v, 1); m = _mm_extract_ps(mask.v, 1);
if (m != 0) { if (m != 0) {
uint32_t *ptr = (uint32_t *)_mm_extract_epi32(ptrs.v, 1); uint32_t *ptr = (uint32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 1));
*ptr = _mm_extract_epi32(val.v, 1); *ptr = _mm_extract_epi32(val.v, 1);
} }
m = _mm_extract_ps(mask.v, 2); m = _mm_extract_ps(mask.v, 2);
if (m != 0) { if (m != 0) {
uint32_t *ptr = (uint32_t *)_mm_extract_epi32(ptrs.v, 2); uint32_t *ptr = (uint32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 2));
*ptr = _mm_extract_epi32(val.v, 2); *ptr = _mm_extract_epi32(val.v, 2);
} }
m = _mm_extract_ps(mask.v, 3); m = _mm_extract_ps(mask.v, 3);
if (m != 0) { if (m != 0) {
uint32_t *ptr = (uint32_t *)_mm_extract_epi32(ptrs.v, 3); uint32_t *ptr = (uint32_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 3));
*ptr = _mm_extract_epi32(val.v, 3); *ptr = _mm_extract_epi32(val.v, 3);
} }
} }
@@ -3582,25 +3582,25 @@ static FORCEINLINE void __scatter32_i64(__vec4_i32 ptrs, __vec4_i64 val,
__vec4_i1 mask) { __vec4_i1 mask) {
uint32_t m = _mm_extract_ps(mask.v, 0); uint32_t m = _mm_extract_ps(mask.v, 0);
if (m != 0) { if (m != 0) {
uint64_t *ptr = (uint64_t *)_mm_extract_epi32(ptrs.v, 0); uint64_t *ptr = (uint64_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 0));
*ptr = _mm_extract_epi64(val.v[0], 0); *ptr = _mm_extract_epi64(val.v[0], 0);
} }
m = _mm_extract_ps(mask.v, 1); m = _mm_extract_ps(mask.v, 1);
if (m != 0) { if (m != 0) {
uint64_t *ptr = (uint64_t *)_mm_extract_epi32(ptrs.v, 1); uint64_t *ptr = (uint64_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 1));
*ptr = _mm_extract_epi64(val.v[0], 1); *ptr = _mm_extract_epi64(val.v[0], 1);
} }
m = _mm_extract_ps(mask.v, 2); m = _mm_extract_ps(mask.v, 2);
if (m != 0) { if (m != 0) {
uint64_t *ptr = (uint64_t *)_mm_extract_epi32(ptrs.v, 2); uint64_t *ptr = (uint64_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 2));
*ptr = _mm_extract_epi64(val.v[1], 0); *ptr = _mm_extract_epi64(val.v[1], 0);
} }
m = _mm_extract_ps(mask.v, 3); m = _mm_extract_ps(mask.v, 3);
if (m != 0) { if (m != 0) {
uint64_t *ptr = (uint64_t *)_mm_extract_epi32(ptrs.v, 3); uint64_t *ptr = (uint64_t *)((uintptr_t)_mm_extract_epi32(ptrs.v, 3));
*ptr = _mm_extract_epi64(val.v[1], 1); *ptr = _mm_extract_epi64(val.v[1], 1);
} }
} }

View File

@@ -398,6 +398,7 @@ if __name__ == '__main__':
run_error_files = [ ] run_error_files = [ ]
nthreads = min(multiprocessing.cpu_count(), options.num_jobs) nthreads = min(multiprocessing.cpu_count(), options.num_jobs)
nthreads = min(nthreads, len(files))
sys.stdout.write("Running %d jobs in parallel. Running %d tests.\n" % (nthreads, total_tests)) sys.stdout.write("Running %d jobs in parallel. Running %d tests.\n" % (nthreads, total_tests))
# put each of the test filenames into a queue # put each of the test filenames into a queue