From ffc9a33933987b71d245adb208d180805b64cb9e Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Sun, 10 Nov 2013 23:48:49 +0400 Subject: [PATCH] avx1-i32x4 implementation as sse4-i32x4 with avx target-feature flag --- builtins.cpp | 21 ++++++++++++++++----- ispc.cpp | 42 +++++++++++++++++++++++++++++++++++------- ispc.h | 8 +++++++- 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 730e315c..2c9703c6 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -942,11 +942,22 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod case Target::AVX: { switch (g->target->getVectorWidth()) { case 4: - if (runtime32) { - EXPORT_MODULE(builtins_bitcode_avx1_i64x4_32bit); - } - else { - EXPORT_MODULE(builtins_bitcode_avx1_i64x4_64bit); + if (g->target->getDataTypeWidth() == 32) { + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_sse4_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_sse4_64bit); + } + } else if (g->target->getDataTypeWidth() == 64) { + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx1_i64x4_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx1_i64x4_64bit); + } + } else { + FATAL("logic error in DefineStdlib"); } break; case 8: diff --git a/ispc.cpp b/ispc.cpp index 859865a5..cb70b879 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -169,7 +169,7 @@ static const char *supportedCPUs[] = { , "core-avx-i", "core-avx2" #endif // LLVM 3.2+ #if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) - , "slm" + , "slm" #endif // LLVM 3.4+ }; @@ -191,6 +191,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_tf_attributes(NULL), #endif m_nativeVectorWidth(-1), + m_dataTypeWidth(-1), m_vectorWidth(-1), m_generatePIC(pic), m_maskingIsFree(false), @@ -308,6 +309,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse2-i32x4")) { this->m_isa = Target::SSE2; this->m_nativeVectorWidth = 4; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" #if defined(LLVM_3_4) @@ -323,6 +325,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse2-i32x8")) { this->m_isa = Target::SSE2; this->m_nativeVectorWidth = 4; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" #if defined(LLVM_3_4) @@ -338,11 +341,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse4-i32x4")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 4; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; // TODO: why not sse42 and popcnt? this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" #if defined(LLVM_3_4) - ",+sse4.1,-sse4.2" + ",+sse4.1,-sse4.2" #else ",+sse41,-sse42" #endif @@ -355,10 +359,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse4-i32x8")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 4; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" #if defined(LLVM_3_4) - ",+sse4.1,-sse4.2" + ",+sse4.1,-sse4.2" #else ",+sse41,-sse42" #endif @@ -369,10 +374,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "sse4-i8x16")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 16; + this->m_dataTypeWidth = 8; this->m_vectorWidth = 16; this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" #if defined(LLVM_3_4) - ",+sse4.1,-sse4.2" + ",+sse4.1,-sse4.2" #else ",+sse41,-sse42" #endif @@ -383,10 +389,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "sse4-i16x8")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 16; this->m_vectorWidth = 8; this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" #if defined(LLVM_3_4) - ",+sse4.1,-sse4.2" + ",+sse4.1,-sse4.2" #else ",+sse41,-sse42" #endif @@ -457,11 +464,21 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskingIsFree = false; this->m_maskBitCount = 32; } + else if (!strcasecmp(isa, "avx1-i32x4")) { + this->m_isa = Target::AVX; + this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 32; + this->m_vectorWidth = 4; + this->m_attributes = "+avx,+popcnt,+cmov"; + this->m_maskingIsFree = false; + this->m_maskBitCount = 32; + } else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1") || !strcasecmp(isa, "avx1-i32x8")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; @@ -471,6 +488,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1-i64x4")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; @@ -481,6 +499,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1-i32x16")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; @@ -490,6 +509,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1.1-i32x8")) { this->m_isa = Target::AVX11; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+avx,+popcnt,+cmov,+f16c" #if defined(LLVM_3_4) @@ -510,6 +530,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1.1-i32x16")) { this->m_isa = Target::AVX11; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; this->m_attributes = "+avx,+popcnt,+cmov,+f16c" #if defined(LLVM_3_4) @@ -517,7 +538,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #else ",+rdrand" #endif - ; + ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; this->m_hasHalf = true; @@ -529,6 +550,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "avx1.1-i64x4")) { this->m_isa = Target::AVX11; this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; this->m_attributes = "+avx,+popcnt,+cmov,+f16c" #if defined(LLVM_3_4) @@ -536,7 +558,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #else ",+rdrand" #endif - ; + ; this->m_maskingIsFree = false; this->m_maskBitCount = 64; this->m_hasHalf = true; @@ -549,6 +571,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx2-i32x8")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" #if defined(LLVM_3_4) @@ -573,6 +596,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx2-i32x16")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 16; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" #if defined(LLVM_3_4) @@ -596,6 +620,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "avx2-i64x4")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" #if defined(LLVM_3_4) @@ -620,6 +645,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "neon-i8x16")) { this->m_isa = Target::NEON8; this->m_nativeVectorWidth = 16; + this->m_dataTypeWidth = 8; this->m_vectorWidth = 16; this->m_attributes = "+neon,+fp16"; this->m_hasHalf = true; // ?? @@ -629,6 +655,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "neon-i16x8")) { this->m_isa = Target::NEON16; this->m_nativeVectorWidth = 8; + this->m_dataTypeWidth = 16; this->m_vectorWidth = 8; this->m_attributes = "+neon,+fp16"; this->m_hasHalf = true; // ?? @@ -639,6 +666,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "neon-i32x4")) { this->m_isa = Target::NEON32; this->m_nativeVectorWidth = 4; + this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; this->m_attributes = "+neon,+fp16"; this->m_hasHalf = true; // ?? diff --git a/ispc.h b/ispc.h index 82cb9050..c74ff347 100644 --- a/ispc.h +++ b/ispc.h @@ -253,6 +253,8 @@ public: int getNativeVectorWidth() const {return m_nativeVectorWidth;} + int getDataTypeWidth() const {return m_dataTypeWidth;} + int getVectorWidth() const {return m_vectorWidth;} bool getGeneratePIC() const {return m_generatePIC;} @@ -319,10 +321,14 @@ private: #endif /** Native vector width of the vector instruction set. Note that this - value is directly derived from the ISA Being used (e.g. it's 4 for + value is directly derived from the ISA being used (e.g. it's 4 for SSE, 8 for AVX, etc.) */ int m_nativeVectorWidth; + /** Data type with in bits. Typically it's 32, but could be 8, 16 or 64. + For generic it's -1, which means undefined. */ + int m_dataTypeWidth; + /** Actual vector width currently being compiled to. This may be an integer multiple of the native vector width, for example if we're "doubling up" and compiling 8-wide on a 4-wide SSE system. */