From cc8bae2f2ca89555a0f5c57f1e0861ec0bbc8d3e Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 1 Apr 2014 16:09:16 +0400 Subject: [PATCH 01/26] Adding warning about LLVM_HOME in Makefile --- Makefile | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Makefile b/Makefile index 7d5d41dd..47b1da4c 100644 --- a/Makefile +++ b/Makefile @@ -34,11 +34,41 @@ # ispc Makefile # +define newline + + +endef + +define WARNING_BODY + ============================== !!! WARNING !!! =============================== \n +Location of LLVM files in your PATH is different than path in LLVM_HOME \n +variable (or LLVM_HOME is not set). The most likely this means that you are \n +using default LLVM installation on your system, which is very bad sign. \n +Note, that ISPC uses LLVM optimizer and is highly dependent on it. We recommend \n +using *patched* version of LLVM 3.3 or 3.4. Patches are availible in \n +llvm_patches folder. You can build LLVM manually, or run our scripts, which \n +will do all the work for you. Do the following: \n +1. Create a folder, where LLVM will reside and set LLVM_HOME variable to its \n + path. \n +2. Set ISPC_HOME variable to your ISPC location (probably current folder). +3. Run alloy.py tool to checkout and build LLVM: \n + alloy.py -b --version=3.4 \n +4. Add $LLVM_HOME/bin-3.4/bin path to your PATH. \n +============================================================================== +endef + # If you have your own special version of llvm and/or clang, change # these variables to match. LLVM_CONFIG=$(shell which llvm-config) CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir) +RIGHT_LLVM = $(WARNING_BODY) +ifdef LLVM_HOME + ifeq ($(findstring $(LLVM_HOME), $(LLVM_CONFIG)), $(LLVM_HOME)) + RIGHT_LLVM = LLVM from $$LLVM_HOME is used. + endif +endif + # Enable ARM by request # To enable: make ARM_ENABLED=1 ARM_ENABLED=0 @@ -188,6 +218,7 @@ llvm_check: echo "ERROR: llvm-config not found in your PATH"; \ echo "******************************************"; \ echo; exit 1) + @echo -e '$(subst $(newline), ,$(RIGHT_LLVM))' print_llvm_src: llvm_check @echo Using LLVM `llvm-config --version` from `llvm-config --libdir` From 114f58bb0b60cce9c38d42458d8debc2837aa71e Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 1 Apr 2014 18:48:47 +0400 Subject: [PATCH 02/26] support LLVM trunk after r204934 and zlib commits --- Makefile | 4 ++-- cbackend.cpp | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 47b1da4c..b03fc05e 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ will do all the work for you. Do the following: \n 2. Set ISPC_HOME variable to your ISPC location (probably current folder). 3. Run alloy.py tool to checkout and build LLVM: \n alloy.py -b --version=3.4 \n -4. Add $LLVM_HOME/bin-3.4/bin path to your PATH. \n +4. Add $$LLVM_HOME/bin-3.4/bin path to your PATH. \n ============================================================================== endef @@ -114,7 +114,7 @@ ifeq ($(LLVM_VERSION),LLVM_3_4) endif ifeq ($(LLVM_VERSION),LLVM_3_5) - ISPC_LIBS += -lcurses + ISPC_LIBS += -lcurses -lz endif ifeq ($(ARCH_OS),Linux) diff --git a/cbackend.cpp b/cbackend.cpp index cb56cb82..33906778 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1769,7 +1769,11 @@ std::string CWriter::GetValueName(const llvm::Value *Operand) { // Resolve potential alias. if (const llvm::GlobalAlias *GA = llvm::dyn_cast(Operand)) { +#if defined(LLVM_3_5) + if (const llvm::Value *V = GA->getAliasedGlobal()) +#else if (const llvm::Value *V = GA->resolveAliasedGlobal(false)) +#endif Operand = V; } From b1bf08c0d93dbedcb29ea43165062f1adbcca484 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 2 Apr 2014 12:45:21 +0400 Subject: [PATCH 03/26] removing winstuff --- alloy.py | 2 ++ ispc.vcxproj | 7 +++---- run_tests.py | 2 +- winstuff/stdint.h | 14 -------------- winstuff/unistd.h | 6 ------ 5 files changed, 6 insertions(+), 25 deletions(-) delete mode 100644 winstuff/stdint.h delete mode 100644 winstuff/unistd.h diff --git a/alloy.py b/alloy.py index f88f2abf..2b2c82b5 100755 --- a/alloy.py +++ b/alloy.py @@ -292,6 +292,8 @@ def build_ispc(version_LLVM, make): p_temp = os.getenv("LLVM_INSTALL_DIR") v_temp = os.getenv("LLVM_VERSION") os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM + if version_LLVM == "3.2": + temp = "3_2" if version_LLVM == "3.3": temp = "3_3" if version_LLVM == "3.4": diff --git a/ispc.vcxproj b/ispc.vcxproj index 8fa9be70..2ce69fde 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -105,7 +105,6 @@ - @@ -396,7 +395,7 @@ Level3 Disabled NOMINMAX;%LLVM_VERSION% - $(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories) + $(LLVM_INSTALL_DIR)\include;.;%(AdditionalIncludeDirectories) 4146;4800;4996;4355;4624;4244 @@ -415,7 +414,7 @@ true true NOMINMAX;%LLVM_VERSION% - $(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories) + $(LLVM_INSTALL_DIR)\include;.;%(AdditionalIncludeDirectories) 4146;4800;4996;4355;4624;4244 @@ -431,4 +430,4 @@ - \ No newline at end of file + diff --git a/run_tests.py b/run_tests.py index 89e6cd87..e33548b6 100755 --- a/run_tests.py +++ b/run_tests.py @@ -211,7 +211,7 @@ def run_test(testname): obj_name = "%s.obj" % os.path.basename(filename) exe_name = "%s.exe" % os.path.basename(filename) - cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \ + cc_cmd = "%s /I. /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \ (options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name) if should_fail: cc_cmd += " /DEXPECT_FAILURE" diff --git a/winstuff/stdint.h b/winstuff/stdint.h deleted file mode 100644 index 7f3f8100..00000000 --- a/winstuff/stdint.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef MY_STDINT_H -#define MY_STDINT_H 1 - -typedef signed char int8_t; -typedef signed __int16 int16_t; -typedef signed __int32 int32_t; -typedef signed __int64 int64_t; -typedef unsigned char uint8_t; - -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; - -#endif // MY_STDINT_H diff --git a/winstuff/unistd.h b/winstuff/unistd.h deleted file mode 100644 index f2f5dfae..00000000 --- a/winstuff/unistd.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef MY_UNISTD_H -#define MY_UNISTD_H 1 - -inline bool isatty(int) { return false; } - -#endif // MY_UNISTD_H \ No newline at end of file From 7ebea86a443a078a9b6ed733a4954b772817a38b Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 2 Apr 2014 15:49:57 +0400 Subject: [PATCH 04/26] These changes fix problem with debug info in LLVM 3.4 with structs and enums. The reason of problem is that ISPC generates debugInfo type of struct (or enum) in the scope, where the variable of this type appears. Ctx.cpp:1586 llvm::DIScope scope = GetDIScope(); llvm::DIType diType = sym->type->GetDIType(scope); It is always Lexical_Block in some function. It is not right because type can be declared global or in some function or in some namespace. Struct and enums are failing because they don't reduce to atomic types. The "Big" fix is to save declaration place in Type class. But now ISPC doesn't know about it, for example this doesn't emit an error: void function_1() { struct Foo {float x;}; uniform Foo myFoo;} void function_2() { uniform Foo myFoo;} So now all type declarations are global and we can simply change scope parameter to the current file. The "Big" fix will be after integration with clang. --- type.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/type.cpp b/type.cpp index b6d504f2..082f8910 100644 --- a/type.cpp +++ b/type.cpp @@ -826,7 +826,7 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIFile diFile = pos.GetDIFile(); llvm::DIType diType = - m->diBuilder->createEnumerationType(scope, name, diFile, pos.first_line, + m->diBuilder->createEnumerationType(diFile, name, diFile, pos.first_line, 32 /* size in bits */, 32 /* align in bits */, elementArray @@ -2179,7 +2179,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes); llvm::DIFile diFile = pos.GetDIFile(); return m->diBuilder->createStructType( - scope, + diFile, name, diFile, pos.first_line, // Line number @@ -2422,7 +2422,7 @@ UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIFile diFile = pos.GetDIFile(); llvm::DIArray elements; return m->diBuilder->createStructType( - scope, + diFile, name, diFile, pos.first_line, // Line number From 1705b5a65eb9166cc04a1991e71cdc749b8d2fd3 Mon Sep 17 00:00:00 2001 From: jbrodman Date: Thu, 10 Apr 2014 01:08:12 -0700 Subject: [PATCH 05/26] guard for single inclusion --- examples/util/util.isph | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/util/util.isph b/examples/util/util.isph index 1e598dc5..454df8f0 100644 --- a/examples/util/util.isph +++ b/examples/util/util.isph @@ -1,5 +1,9 @@ +#ifndef UTIL_ISPH +#define UTIL_ISPH // utility function to read the value of programCount from C/C++ export uniform int32 get_programCount() { return programCount; } + +#endif // UTIL_ISPH \ No newline at end of file From 61970e15008b042363f8d9e615279c8ed0c1e4db Mon Sep 17 00:00:00 2001 From: jbrodman Date: Thu, 10 Apr 2014 01:08:12 -0700 Subject: [PATCH 06/26] guard for single inclusion --- examples/util/util.isph | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/examples/util/util.isph b/examples/util/util.isph index 1e598dc5..7c075ac4 100644 --- a/examples/util/util.isph +++ b/examples/util/util.isph @@ -1,5 +1,42 @@ +/* + Copyright (c) 2010-2014, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTIL_ISPH +#define UTIL_ISPH // utility function to read the value of programCount from C/C++ export uniform int32 get_programCount() { return programCount; } + +#endif // UTIL_ISPH From a8b03e768c5d8185f573b86726b112b493b5a91c Mon Sep 17 00:00:00 2001 From: jbrodman Date: Thu, 10 Apr 2014 01:13:46 -0700 Subject: [PATCH 07/26] 2014. --- examples/util/util.isph | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/util/util.isph b/examples/util/util.isph index 7c075ac4..f0498a6f 100644 --- a/examples/util/util.isph +++ b/examples/util/util.isph @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2014, Intel Corporation + Copyright (c) 2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without From 141ea81ba574090a8183cb86d4c3647f097e0242 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Mon, 14 Apr 2014 19:33:52 +0400 Subject: [PATCH 08/26] Revert trigonometry to stdlib implementation on MIC --- ispc.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ispc.cpp b/ispc.cpp index 12898a8d..7ea97247 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -449,7 +449,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; - this->m_hasTrigonometry = true; + // It's set to false, because stdlib implementation of math functions + // is faster on MIC, than "native" implementation profided by the + // icc compiler. + this->m_hasTrigonometry = false; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } From 096546f88805d9030531cc73e6528ec3ee7765d0 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Thu, 17 Apr 2014 21:03:42 +0400 Subject: [PATCH 09/26] Fixing MIC performance issue, which showed up when we switched to LLVM 3.4 (due to more aggressive optimizations): vector of *the same* constants should be generated as scalar value in cpp file, instead of __extract_element(splat(value), 0). I.e. <2,2,2,2> should appear in cpp as 2, but not __extract_element(splat(2), 0); --- cbackend.cpp | 14 ++++++++++---- llvmutil.cpp | 36 +++++++++++++++++++++++++++--------- llvmutil.h | 3 ++- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 33906778..1c9626b5 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -3286,10 +3286,16 @@ void CWriter::visitBinaryOperator(llvm::Instruction &I) { if ((I.getOpcode() == llvm::Instruction::Shl || I.getOpcode() == llvm::Instruction::LShr || I.getOpcode() == llvm::Instruction::AShr)) { - if (LLVMVectorValuesAllEqual(I.getOperand(1))) { - Out << "__extract_element("; - writeOperand(I.getOperand(1)); - Out << ", 0) "; + llvm::Value *splat = NULL; + if (LLVMVectorValuesAllEqual(I.getOperand(1), &splat)) { + if (splat) { + // Avoid __extract_element(splat(value), 0), if possible. + writeOperand(splat); + } else { + Out << "__extract_element("; + writeOperand(I.getOperand(1)); + Out << ", 0) "; + } } else writeOperand(I.getOperand(1)); diff --git a/llvmutil.cpp b/llvmutil.cpp index 275cf794..5707bbc9 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -818,7 +818,8 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) { static bool lVectorValuesAllEqual(llvm::Value *v, int vectorLength, - std::vector &seenPhis); + std::vector &seenPhis, + llvm::Value **splatValue = NULL); /** This function checks to see if the given (scalar or vector) value is an @@ -1068,20 +1069,37 @@ lVectorShiftRightAllEqual(llvm::Value *val, llvm::Value *shift, static bool lVectorValuesAllEqual(llvm::Value *v, int vectorLength, - std::vector &seenPhis) { + std::vector &seenPhis, + llvm::Value **splatValue) { if (vectorLength == 1) return true; - if (llvm::isa(v)) + if (llvm::isa(v)) { + if (splatValue) { + llvm::ConstantAggregateZero *caz = + llvm::dyn_cast(v); + *splatValue = caz->getSequentialElement(); + } return true; + } llvm::ConstantVector *cv = llvm::dyn_cast(v); - if (cv != NULL) - return (cv->getSplatValue() != NULL); + if (cv != NULL) { + llvm::Value* splat = cv->getSplatValue(); + if (splat != NULL && splatValue) { + *splatValue = splat; + } + return (splat != NULL); + } llvm::ConstantDataVector *cdv = llvm::dyn_cast(v); - if (cdv != NULL) - return (cdv->getSplatValue() != NULL); + if (cdv != NULL) { + llvm::Value* splat = cdv->getSplatValue(); + if (splat != NULL && splatValue) { + *splatValue = splat; + } + return (splat != NULL); + } llvm::BinaryOperator *bop = llvm::dyn_cast(v); if (bop != NULL) { @@ -1178,14 +1196,14 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength, where the values are actually all equal. */ bool -LLVMVectorValuesAllEqual(llvm::Value *v) { +LLVMVectorValuesAllEqual(llvm::Value *v, llvm::Value **splat) { llvm::VectorType *vt = llvm::dyn_cast(v->getType()); Assert(vt != NULL); int vectorLength = vt->getNumElements(); std::vector seenPhis; - bool equal = lVectorValuesAllEqual(v, vectorLength, seenPhis); + bool equal = lVectorValuesAllEqual(v, vectorLength, seenPhis, splat); Debug(SourcePos(), "LLVMVectorValuesAllEqual(%s) -> %s.", v->getName().str().c_str(), equal ? "true" : "false"); diff --git a/llvmutil.h b/llvmutil.h index d6c5ede0..96310b94 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -228,7 +228,8 @@ extern llvm::Constant *LLVMMaskAllOff; /** Tests to see if all of the elements of the vector in the 'v' parameter are equal. Like lValuesAreEqual(), this is a conservative test and may return false for arrays where the values are actually all equal. */ -extern bool LLVMVectorValuesAllEqual(llvm::Value *v); +extern bool LLVMVectorValuesAllEqual(llvm::Value *v, + llvm::Value **splat = NULL); /** Given vector of integer-typed values, this function returns true if it can determine that the elements of the vector have a step of 'stride' From dcc37451e5f5fb770692537e7277eca4daadbead Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Thu, 17 Apr 2014 23:52:32 +0400 Subject: [PATCH 10/26] Removing alias phases causing segfaults --- opt.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/opt.cpp b/opt.cpp index 85319ce7..7b955a2c 100644 --- a/opt.cpp +++ b/opt.cpp @@ -534,8 +534,6 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createScalarReplAggregatesPass()); optPM.add(llvm::createEarlyCSEPass()); optPM.add(llvm::createLowerExpectIntrinsicPass()); - optPM.add(llvm::createTypeBasedAliasAnalysisPass()); - optPM.add(llvm::createBasicAliasAnalysisPass()); // Early optimizations to try to reduce the total amount of code to // work with if we can From d63a94300c346b91753e4d73231006f3e7137da0 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 11 Apr 2014 00:16:32 +0400 Subject: [PATCH 11/26] v1.7.0 --- doxygen.cfg | 2 +- ispc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doxygen.cfg b/doxygen.cfg index 9a8f88e5..5a396ddd 100644 --- a/doxygen.cfg +++ b/doxygen.cfg @@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.6.1dev +PROJECT_NUMBER = 1.7.0 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/ispc.h b/ispc.h index 111524ce..1f07b6ae 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.6.1dev" +#define ISPC_VERSION "1.7.0" #if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) #error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported" From a2774f2cf5b414f451bede449629f32a753a4958 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 11 Apr 2014 00:17:12 +0400 Subject: [PATCH 12/26] Release notes, docs update --- LICENSE.txt | 4 ++-- docs/ReleaseNotes.txt | 44 +++++++++++++++++++++++++++++++++++++++++++ docs/ispc.rst | 27 ++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index e7a34645..93c4d816 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2010-2013, Intel Corporation +Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -77,7 +77,7 @@ covered by the following license: University of Illinois/NCSA Open Source License -Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: diff --git a/docs/ReleaseNotes.txt b/docs/ReleaseNotes.txt index b7d0bb17..ef8cf6f8 100644 --- a/docs/ReleaseNotes.txt +++ b/docs/ReleaseNotes.txt @@ -1,3 +1,47 @@ +=== v1.7.0 === (18 April 2014) + +A major new version of ISPC with several language and library extensions and +fixes in debug info support. Binaries for all platforms are based on patched +version on LLVM 3.4. There also performance improvements beyond switchover to +LLVM 3.4. + +The list of language and library changes: + +* Support for varying types in exported functions was added. See documentation + for more details. + +* get_programCount() function was moved from stdlib.ispc to + examples/util/util.isph, which needs to be included somewhere in your + project, if you want to use it. + +* Library functions for saturated arithmetic were added. add/sub/mul/div + operations are supported for signed and unsigned 8/16/32/64 integer types + (both uniform and varying). + +* The algorithm for selecting overloaded function was extended to cover more + types of overloading. Handling of reference types in overloaded functions was + fixed. The rules for selecting the best match were changed to match C++, + which requires the function to be the best match for all parameters. In + ambiguous cases, a warning is issued, but it will be converted to an error + in the next release. + +* Explicit typecasts between any two reference types were allowed. + +* Implicit cast of pointer to const type to void* was disallowed. + +The list of other notable changes is: + +* Number of fixes for better debug info support. + +* Memory corruption bug was fixed, which caused rare but not reproducible + compile time fails. + +* Alias analysis was enabled (more aggressive optimizations are expected). + +* A bug involving inaccurate handling of "const" qualifier was fixed. As a + result, more "const" qualifiers may appear in .h files, which may cause + compilation errors. + === v1.6.0 === (19 December 2013) A major new version of ISPC with major improvements in performance and diff --git a/docs/ispc.rst b/docs/ispc.rst index e9d248f4..a2cf2a95 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -50,6 +50,7 @@ Contents: + `Updating ISPC Programs For Changes In ISPC 1.3`_ + `Updating ISPC Programs For Changes In ISPC 1.5.0`_ + `Updating ISPC Programs For Changes In ISPC 1.6.0`_ + + `Updating ISPC Programs For Changes In ISPC 1.7.0`_ * `Getting Started with ISPC`_ @@ -292,6 +293,32 @@ becomes a keyword and it potentially creates a conflict with existing user function. Also a new library function packed_store_active2() was introduced, which also may create a conflict with existing user functions. +Updating ISPC Programs For Changes In ISPC 1.7.0 +------------------------------------------------ + +This release contains several changes that may affect compatibility with +older versions: + +* The algorithm for selecting overloaded functions was extended to cover more + types of overloading, and handling of reference types was fixed. At the same + time the old scheme, which blindly used the function with "the best score" + summed for all arguments, was switched to the C++ approach, which requires + "the best score" for each argument. If the best function doesn't exist, a + warning is issued in this version. It will be turned into an error in the + next version. A simple example: Suppose we have two functions: max(int, int) + and max(unsigned int, unsigned int). The new rules lead to an error when + calling max(int, unsigned int), as the best choice is ambiguous. + +* Implicit cast of pointer to const type to void* was disallowed. Use explicit + cast if needed. + +* A bug which prevented "const" qualifiers from appearing in emitted .h files + was fixed. Consequently, "const" qualifiers now properly appearing in emitted + .h files may cause compile errors in pre-existing codes. + +* get_ProgramCount() was moved from stdlib to examples/util/util.isph file. You + need to include this file to be able to use this function. + Getting Started with ISPC ========================= From 77de0ac342d9d8f71e1058d05b0f4126c9b36046 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 18 Apr 2014 18:20:22 +0400 Subject: [PATCH 13/26] News update --- docs/news.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/news.rst b/docs/news.rst index 6a805e48..80c3e8b3 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -2,6 +2,14 @@ ispc News ========= +ispc 1.7.0 is Released +---------------------- + +A major new version of ISPC with several language and library extensions and +fixes in debug info support. Binaries for all platforms are based on patched +version on LLVM 3.4. There also performance improvements beyond switchover to +LLVM 3.4. + ispc 1.6.0 is Released ---------------------- @@ -11,7 +19,6 @@ a number of language and library extensions. Released binaries are based on patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer to Release Notes for complete set of changes. - ispc 1.5.0 is Released ---------------------- From eb8e94627d0cccfa0524ecabb79a400cb6ffe8c3 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 18 Apr 2014 20:44:00 +0400 Subject: [PATCH 14/26] Bumping version to 1.7.1dev --- doxygen.cfg | 2 +- ispc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doxygen.cfg b/doxygen.cfg index 5a396ddd..39e41fb3 100644 --- a/doxygen.cfg +++ b/doxygen.cfg @@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.7.0 +PROJECT_NUMBER = 1.7.1dev # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/ispc.h b/ispc.h index 1f07b6ae..86be754c 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.7.0" +#define ISPC_VERSION "1.7.1dev" #if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) #error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported" From 0173d607907ea609c1773856f19349adc67034c5 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 18 Apr 2014 22:50:53 +0400 Subject: [PATCH 15/26] Template copyright update (for html generation) --- docs/template-news.txt | 2 +- docs/template-perf.txt | 2 +- docs/template.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/template-news.txt b/docs/template-news.txt index d5eebdd1..4f63b6b6 100644 --- a/docs/template-news.txt +++ b/docs/template-news.txt @@ -57,7 +57,7 @@ %(body)s
- diff --git a/docs/template-perf.txt b/docs/template-perf.txt index 9537a836..3213008f 100644 --- a/docs/template-perf.txt +++ b/docs/template-perf.txt @@ -57,7 +57,7 @@ %(body)s
- diff --git a/docs/template.txt b/docs/template.txt index b9041f19..8c0908a0 100644 --- a/docs/template.txt +++ b/docs/template.txt @@ -57,7 +57,7 @@ %(body)s
- From 5f55a9b9e280ccbb057a995773fe77e8cb05b25e Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Thu, 22 May 2014 18:50:57 +0400 Subject: [PATCH 16/26] support of LLVM trunk --- cbackend.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cbackend.cpp b/cbackend.cpp index 1c9626b5..51890d7f 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -69,6 +69,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" #include "llvm/IR/GetElementPtrTypeIterator.h" + #include "llvm/Support/FileSystem.h" #else #include "llvm/Analysis/Verifier.h" #include @@ -1770,7 +1771,7 @@ std::string CWriter::GetValueName(const llvm::Value *Operand) { // Resolve potential alias. if (const llvm::GlobalAlias *GA = llvm::dyn_cast(Operand)) { #if defined(LLVM_3_5) - if (const llvm::Value *V = GA->getAliasedGlobal()) + if (const llvm::Value *V = GA->getAliasee()) #else if (const llvm::Value *V = GA->resolveAliasedGlobal(false)) #endif From e6131bd6a9d3a97c43f21b11554a92f347fbabbd Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Thu, 22 May 2014 18:51:25 +0400 Subject: [PATCH 17/26] fixing error for LLVM trunk --- llvmutil.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/llvmutil.cpp b/llvmutil.cpp index 5707bbc9..c9e156ce 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -1569,6 +1569,8 @@ lExtractFirstVectorElement(llvm::Value *v, phiMap); llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1), phiMap); + Assert(v0 != NULL); + Assert(v1 != NULL); // Note that the new binary operator is inserted immediately before // the previous vector one return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1, @@ -1615,10 +1617,22 @@ lExtractFirstVectorElement(llvm::Value *v, return scalarPhi; } + // We should consider "shuffle" case and "insertElement" case separately. + // For example we can have shuffle(mul, undef, zero) but function + // "LLVMFlattenInsertChain" can handle only case shuffle(insertElement, undef, zero). + // Also if we have insertElement under shuffle we will handle it the next call of + // "lExtractFirstVectorElement" function. + if (llvm::isa(v)) { + llvm::ShuffleVectorInst *shuf = llvm::dyn_cast(v); + llvm::Value *indices = shuf->getOperand(2); + if (llvm::isa(indices)) { + return lExtractFirstVectorElement(shuf->getOperand(0), phiMap); + } + } + // If we have a chain of insertelement instructions, then we can just // flatten them out and grab the value for the first one. - if (llvm::isa(v) || - llvm::isa(v)) { + if (llvm::isa(v)) { return LLVMFlattenInsertChain(v, vt->getNumElements(), false); } From 2b064b272a3000fb586eae2ab2965b2156714c9b Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 27 May 2014 19:24:21 +0400 Subject: [PATCH 18/26] deleting print from 'safe_for_all_mask_off' functions --- ast.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ast.cpp b/ast.cpp index 19eff152..00e81aa5 100644 --- a/ast.cpp +++ b/ast.cpp @@ -389,6 +389,11 @@ lCheckAllOffSafety(ASTNode *node, void *data) { return false; } + if (dynamic_cast(node) != NULL) { + *okPtr = false; + return false; + } + if (dynamic_cast(node) != NULL || dynamic_cast(node) != NULL) { // We definitely don't want to run the uniform variants of these if From d3144da5eb23f7cbfd196f63f2ff879eae1e261c Mon Sep 17 00:00:00 2001 From: jbrodman Date: Tue, 27 May 2014 15:50:53 -0700 Subject: [PATCH 19/26] Add error messages for structs containing nested undefined structs --- expr.cpp | 19 +++++++++++++++++-- type.cpp | 19 +++++++++++++++++++ type.h | 19 ++++++++++--------- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/expr.cpp b/expr.cpp index 4a1a16db..8fa64f91 100644 --- a/expr.cpp +++ b/expr.cpp @@ -5143,9 +5143,18 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos, exprType->GetString().c_str()); return NULL; } - - if (CastType(exprType) != NULL) + if (CastType(exprType) != NULL) { + const StructType *st = CastType(exprType); + if (st->IsDefined()) { return new StructMemberExpr(e, id, p, idpos, derefLValue); + } + else { + Error(p, "Member operator \"%s\" can't be applied to declared " + "struct \"%s\" containing an undefined struct type.", derefLValue ? "->" : ".", + exprType->GetString().c_str()); + return NULL; + } + } else if (CastType(exprType) != NULL) return new VectorMemberExpr(e, id, p, idpos, derefLValue); else if (CastType(exprType)) { @@ -8708,6 +8717,12 @@ NewExpr::TypeCheck() { "but not defined type \"%s\".", allocType->GetString().c_str()); return NULL; } + const StructType *st = CastType(allocType); + if (st != NULL && !st->IsDefined()) { + Error(pos, "Can't dynamically allocate storage for declared " + "type \"%s\" containing undefined member type.", allocType->GetString().c_str()); + return NULL; + } // Otherwise we only need to make sure that if we have an expression // giving a number of elements to allocate that it can be converted to diff --git a/type.cpp b/type.cpp index 082f8910..9e8ef131 100644 --- a/type.cpp +++ b/type.cpp @@ -1956,6 +1956,25 @@ StructType::IsConstType() const { } +bool +StructType::IsDefined() const { + for (int i = 0; i < GetElementCount(); i++) { + const Type *t = GetElementType(i); + const UndefinedStructType *ust = CastType(t); + if (ust != NULL) { + return false; + } + const StructType *st = CastType(t); + if (st != NULL) { + if (!st->IsDefined()) { + return false; + } + } + } + return true; +} + + const Type * StructType::GetBaseType() const { return this; diff --git a/type.h b/type.h index 9093af59..94648eb1 100644 --- a/type.h +++ b/type.h @@ -81,15 +81,15 @@ struct Variability { /** Enumerant that records each of the types that inherit from the Type baseclass. */ enum TypeId { - ATOMIC_TYPE, - ENUM_TYPE, - POINTER_TYPE, - ARRAY_TYPE, - VECTOR_TYPE, - STRUCT_TYPE, - UNDEFINED_STRUCT_TYPE, - REFERENCE_TYPE, - FUNCTION_TYPE + ATOMIC_TYPE, // 0 + ENUM_TYPE, // 1 + POINTER_TYPE, // 2 + ARRAY_TYPE, // 3 + VECTOR_TYPE, // 4 + STRUCT_TYPE, // 5 + UNDEFINED_STRUCT_TYPE, // 6 + REFERENCE_TYPE, // 7 + FUNCTION_TYPE // 8 }; @@ -675,6 +675,7 @@ public: bool IsIntType() const; bool IsUnsignedType() const; bool IsConstType() const; + bool IsDefined() const; const Type *GetBaseType() const; const StructType *GetAsVaryingType() const; From 5da05b365f42dee4f6671d110c86fb93c236d866 Mon Sep 17 00:00:00 2001 From: motiz88 Date: Thu, 5 Jun 2014 22:06:09 +0300 Subject: [PATCH 20/26] Small fixes for TBB on windows Changed an #ifdef ISPC_IS_WINDOWS in the definition of TaskInfo to #ifdef ISPC_USE_CONCRT, and fixed two calls to taskCount() that were missing parentheses. --- examples/tasksys.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 77269f9f..d7b2a801 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -183,7 +183,7 @@ struct TaskInfo { void *data; int taskIndex; int taskCount3d[3]; -#if defined(ISPC_IS_WINDOWS) +#if defined( ISPC_USE_CONCRT) event taskEvent; #endif int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; } @@ -998,7 +998,7 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; - int threadCount = ti->taskCount; + int threadCount = ti->taskCount(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), @@ -1028,7 +1028,7 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; - int threadCount = ti->taskCount; + int threadCount = ti->taskCount(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); From 4ed72335bd1f103ac0eb79789613b6666e2d0a76 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 9 Jun 2014 16:35:56 +0400 Subject: [PATCH 21/26] support LLVM --- cbackend.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cbackend.cpp b/cbackend.cpp index 51890d7f..3d515a60 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -2163,7 +2163,13 @@ static SpecialGlobalClass getGlobalVariableClass(const llvm::GlobalVariable *GV) // Otherwise, if it is other metadata, don't print it. This catches things // like debug information. +#if defined(LLVM_3_5) + // Here we compare char * + if (!strcmp(GV->getSection(), "llvm.metadata")) +#else + // Here we compare strings if (GV->getSection() == "llvm.metadata") +#endif return NotPrinted; return NotSpecial; From 425540922ca24a6d4e44298b6c1e148f1822b151 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 11 Jun 2014 09:56:36 +0400 Subject: [PATCH 22/26] changing +/-feature regulation to CPU regulation --- ispc.cpp | 209 ++++++++++++++++--------------------------------------- 1 file changed, 59 insertions(+), 150 deletions(-) diff --git a/ispc.cpp b/ispc.cpp index 7ea97247..a5834857 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -241,40 +241,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : } } -#if defined(ISPC_ARM_ENABLED) && !defined(__arm__) - if (cpu == NULL && !strncmp(isa, "neon", 4)) - // If we're compiling NEON on an x86 host and the CPU wasn't - // supplied, don't go and set the CPU based on the host... - cpu = "cortex-a9"; -#endif - - if (cpu == NULL) { - std::string hostCPU = llvm::sys::getHostCPUName(); - if (hostCPU.size() > 0) - cpu = strdup(hostCPU.c_str()); - else { - Warning(SourcePos(), "Unable to determine host CPU!\n"); - cpu = "generic"; - } - } - else { - bool foundCPU = false; - for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0])); - ++i) { - if (!strcmp(cpu, supportedCPUs[i])) { - foundCPU = true; - break; - } - } - if (foundCPU == false) { - Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: " - "%s.", cpu, SupportedCPUs().c_str()); - return; - } - } - - this->m_cpu = cpu; - if (arch == NULL) { #ifdef ISPC_ARM_ENABLED if (!strncmp(isa, "neon", 4)) @@ -308,6 +274,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_arch = arch; } + const char * cpuFromIsa; + // Check default LLVM generated targets if (!strcasecmp(isa, "sse2") || !strcasecmp(isa, "sse2-i32x4")) { @@ -316,15 +284,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; - this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",-sse4.1,-sse4.2" -#else - ",-sse41,-sse42" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "core2"; } else if (!strcasecmp(isa, "sse2-x2") || !strcasecmp(isa, "sse2-i32x8")) { @@ -333,15 +295,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; - this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",-sse4.1,-sse4.2" -#else - ",-sse41,-sse42" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "core2"; } else if (!strcasecmp(isa, "sse4") || !strcasecmp(isa, "sse4-i32x4")) { @@ -350,16 +306,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; - // TODO: why not sse42 and popcnt? - this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+sse4.1,-sse4.2" -#else - ",+sse41,-sse42" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "corei7"; } else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2") || @@ -369,15 +318,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; - this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+sse4.1,-sse4.2" -#else - ",+sse41,-sse42" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "corei7"; } else if (!strcasecmp(isa, "sse4-i8x16")) { this->m_isa = Target::SSE4; @@ -385,15 +328,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 8; this->m_vectorWidth = 16; - this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+sse4.1,-sse4.2" -#else - ",+sse41,-sse42" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 8; + cpuFromIsa = "corei7"; } else if (!strcasecmp(isa, "sse4-i16x8")) { this->m_isa = Target::SSE4; @@ -401,15 +338,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 16; this->m_vectorWidth = 8; - this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+sse4.1,-sse4.2" -#else - ",+sse41,-sse42" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 16; + cpuFromIsa = "corei7"; } else if (!strcasecmp(isa, "generic-4") || !strcasecmp(isa, "generic-x4")) { @@ -499,9 +430,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; - this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "corei7-avx"; } else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1") || @@ -511,9 +442,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; - this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "corei7-avx"; } else if (!strcasecmp(isa, "avx-i64x4") || !strcasecmp(isa, "avx1-i64x4")) { @@ -522,9 +453,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; - this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; this->m_maskBitCount = 64; + cpuFromIsa = "corei7-avx"; } else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2") || @@ -534,9 +465,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; - this->m_attributes = "+avx,+popcnt,+cmov"; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + cpuFromIsa = "corei7-avx"; } else if (!strcasecmp(isa, "avx1.1") || !strcasecmp(isa, "avx1.1-i32x8")) { @@ -545,20 +476,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; - this->m_attributes = "+avx,+popcnt,+cmov,+f16c" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+rdrnd" -#else - ",+rdrand" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; this->m_hasHalf = true; -#if !defined(LLVM_3_1) - // LLVM 3.2+ only this->m_hasRand = true; -#endif + cpuFromIsa = "core-avx-i"; } else if (!strcasecmp(isa, "avx1.1-x2") || !strcasecmp(isa, "avx1.1-i32x16")) { @@ -567,20 +489,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; - this->m_attributes = "+avx,+popcnt,+cmov,+f16c" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+rdrnd" -#else - ",+rdrand" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; this->m_hasHalf = true; -#if !defined(LLVM_3_1) - // LLVM 3.2+ only this->m_hasRand = true; -#endif + cpuFromIsa = "core-avx-i"; } else if (!strcasecmp(isa, "avx1.1-i64x4")) { this->m_isa = Target::AVX11; @@ -588,20 +501,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; - this->m_attributes = "+avx,+popcnt,+cmov,+f16c" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+rdrnd" -#else - ",+rdrand" -#endif - ; this->m_maskingIsFree = false; this->m_maskBitCount = 64; this->m_hasHalf = true; -#if !defined(LLVM_3_1) - // LLVM 3.2+ only this->m_hasRand = true; -#endif + cpuFromIsa = "core-avx-i"; } else if (!strcasecmp(isa, "avx2") || !strcasecmp(isa, "avx2-i32x8")) { @@ -610,24 +514,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; - this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+rdrnd" -#else - ",+rdrand" -#endif -#ifndef LLVM_3_1 - ",+fma" -#endif // !LLVM_3_1 - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; this->m_hasHalf = true; -#if !defined(LLVM_3_1) - // LLVM 3.2+ only this->m_hasRand = true; this->m_hasGather = true; -#endif + cpuFromIsa = "core-avx2"; } else if (!strcasecmp(isa, "avx2-x2") || !strcasecmp(isa, "avx2-i32x16")) { @@ -636,24 +528,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; - this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+rdrnd" -#else - ",+rdrand" -#endif -#ifndef LLVM_3_1 - ",+fma" -#endif // !LLVM_3_1 - ; this->m_maskingIsFree = false; this->m_maskBitCount = 32; this->m_hasHalf = true; -#if !defined(LLVM_3_1) - // LLVM 3.2+ only this->m_hasRand = true; this->m_hasGather = true; -#endif + cpuFromIsa = "core-avx2"; } else if (!strcasecmp(isa, "avx2-i64x4")) { this->m_isa = Target::AVX2; @@ -661,24 +541,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; - this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" -#if defined(LLVM_3_4) || defined(LLVM_3_5) - ",+rdrnd" -#else - ",+rdrand" -#endif -#ifndef LLVM_3_1 - ",+fma" -#endif // !LLVM_3_1 - ; this->m_maskingIsFree = false; this->m_maskBitCount = 64; this->m_hasHalf = true; -#if !defined(LLVM_3_1) - // LLVM 3.2+ only this->m_hasRand = true; this->m_hasGather = true; -#endif + cpuFromIsa = "core-avx2"; } #ifdef ISPC_ARM_ENABLED else if (!strcasecmp(isa, "neon-i8x16")) { @@ -722,6 +590,47 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : error = true; } +#if defined(ISPC_ARM_ENABLED) && !defined(__arm__) + if (cpu == NULL && !strncmp(isa, "neon", 4)) + + cpu = "cortex-a9"; +#endif + + if (cpu == NULL) { +#ifndef ISPC_ARM_ENABLED + if (isa == NULL) { +#endif + std::string hostCPU = llvm::sys::getHostCPUName(); + if (hostCPU.size() > 0) + cpu = strdup(hostCPU.c_str()); + else { + Warning(SourcePos(), "Unable to determine host CPU!\n"); + cpu = "generic"; + } +#ifndef ISPC_ARM_ENABLED + } + else { + cpu = cpuFromIsa; + } +#endif + } + else { + bool foundCPU = false; + for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0])); + ++i) { + if (!strcmp(cpu, supportedCPUs[i])) { + foundCPU = true; + break; + } + } + if (foundCPU == false) { + Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: " + "%s.", cpu, SupportedCPUs().c_str()); + return; + } + } + this->m_cpu = cpu; + if (!error) { // Create TargetMachine std::string triple = GetTripleString(); From 76ea59b40bdcec009a726842a33c36e820d9ba86 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 18 Jun 2014 17:53:42 +0400 Subject: [PATCH 23/26] support LLVM build --- builtins.cpp | 2 +- builtins/util.m4 | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 4795590e..472c2ce8 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -707,7 +707,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length, llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb); #if defined(LLVM_3_5) llvm::ErrorOr ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx); - if (llvm::error_code EC = ModuleOrErr.getError()) + if (std::error_code EC = ModuleOrErr.getError()) Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str()); else { llvm::Module *bcModule = ModuleOrErr.get(); diff --git a/builtins/util.m4 b/builtins/util.m4 index 01f4e03f..b014645e 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1498,7 +1498,8 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, %cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE %val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE ifelse(LLVM_VERSION,LLVM_3_5,` - %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst + %r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst + %r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0 ',` %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst ') @@ -1513,7 +1514,8 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp, $2 %val) nounwind alwaysinline { ifelse(LLVM_VERSION,LLVM_3_5,` - %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst + %r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst + %r = extractvalue { $2, i1 } %r_t, 0 ',` %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst ') From 1a8002cf6531457e0624d9719f13fcb50790f18e Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 25 Jun 2014 23:44:00 -0600 Subject: [PATCH 24/26] fix LLVM_VERSION for minor versions != 0 llvm version 3.4.2 got converted to 3_4.2 and not 3_4 as intended. see https://bugs.gentoo.org/show_bug.cgi?id=515114 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b03fc05e..e6e742a3 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ endif ARCH_TYPE = $(shell arch) LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags) -LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn// -e s/\.0//) +LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//') LLVM_VERSION_DEF=-D$(LLVM_VERSION) LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker From c2d65f7ad2427ae611099f7da989a6bb058c8d93 Mon Sep 17 00:00:00 2001 From: Anton Mitrokhin Date: Fri, 4 Jul 2014 15:10:36 +0400 Subject: [PATCH 25/26] Fixed multiple message sending and added more verbouse warning regarding inconsistent ISPC_HOME --- alloy.py | 10 ++++++++-- common.py | 9 +++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/alloy.py b/alloy.py index 2b2c82b5..4d55e0bf 100755 --- a/alloy.py +++ b/alloy.py @@ -84,6 +84,10 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, # Here we understand what and where do we want to build current_path = os.getcwd() llvm_home = os.environ["LLVM_HOME"] + + + make_sure_dir_exists(llvm_home) + os.chdir(llvm_home) FOLDER_NAME=version_LLVM if version_LLVM == "trunk": @@ -583,7 +587,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, msg.attach(text) attach_mail_file(msg, alloy_build, "alloy_build.log") s = smtplib.SMTP(smtp_server) - s.sendmail('ISPC_test_system', options.notify, msg.as_string()) + s.sendmail('ISPC_test_system', options.notify.split(" "), msg.as_string()) s.quit() def Main(): @@ -630,7 +634,8 @@ def Main(): current_path = os.getcwd() make = "make -j" + options.speed if os.environ["ISPC_HOME"] != os.getcwd(): - error("you ISPC_HOME and your current path are different!\n", 2) + error("you ISPC_HOME and your current path are different! (" + os.environ["ISPC_HOME"] + " is not equal to " + os.getcwd() + + ")\n", 2) if options.perf_llvm == True: if options.branch == "master": options.branch = "trunk" @@ -679,6 +684,7 @@ import common error = common.error take_lines = common.take_lines print_debug = common.print_debug +make_sure_dir_exists = common.make_sure_dir_exists if __name__ == '__main__': # parsing options class MyParser(OptionParser): diff --git a/common.py b/common.py index 2a788722..6ec33f33 100755 --- a/common.py +++ b/common.py @@ -34,6 +34,7 @@ # // Author: Filippov Ilia import sys import os +import errno import shutil def write_to_file(filename, line): @@ -49,6 +50,14 @@ def remove_if_exists(filename): else: os.remove(filename) +def make_sure_dir_exists(path): + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + # detect version which is printed after command def take_lines(command, which): os.system(command + " > " + "temp_detect_version") From 4dacd7e7a26c8723be15d358c9f6b097f0a65e7e Mon Sep 17 00:00:00 2001 From: Anton Mitrokhin Date: Fri, 4 Jul 2014 15:19:45 +0400 Subject: [PATCH 26/26] Added some basic test subprocess exception handling and remapped error messages to the e-mail --- alloy.py | 8 +++++++- run_tests.py | 24 ++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/alloy.py b/alloy.py index 4d55e0bf..8d04ecc9 100755 --- a/alloy.py +++ b/alloy.py @@ -473,7 +473,10 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, for i2 in range(0,len(opts)): stability.arch = arch[i1] stability.no_opt = opts[i2] - execute_stability(stability, R, print_version) + try: + execute_stability(stability, R, print_version) + except: + print_debug("Exception in execute_stability - maybe some test subprocess terminated before it should have\n", False, stability_log) print_version = 0 for j in range(0,len(sde_targets)): stability.target = sde_targets[j][1] @@ -580,6 +583,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, f_lines = fp.readlines() fp.close() line = "" + if not sys.exc_info()[0] == None: + line = line + "Last exception: " + str(sys.exc_info()) + '\n' for i in range(0,len(f_lines)): line = line + f_lines[i][:-1] line = line + ' \n' @@ -664,6 +669,7 @@ from optparse import OptionParser from optparse import OptionGroup import sys import os +import errno import operator import time import glob diff --git a/run_tests.py b/run_tests.py index e33548b6..c131b19d 100755 --- a/run_tests.py +++ b/run_tests.py @@ -59,10 +59,15 @@ def run_command(cmd): lexer.whitespace_split = True lexer.escape = '' arg_list = list(lexer) - - sp = subprocess.Popen(arg_list, stdin=None, + + try: + sp = subprocess.Popen(arg_list, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except: + print_debug("ERROR: The child (%s) raised an esception: %s\n" % (cmd, sys.exc_info()[1]), s, run_tests_log) + raise + out = sp.communicate() output = "" output += out[0].decode("utf-8") @@ -325,7 +330,11 @@ def run_tasks_from_queue(queue, queue_ret, queue_skip, total_tests_arg, max_test sys.exit(0) if check_test(filename): - (compile_error, run_error) = run_test(filename) + try: + (compile_error, run_error) = run_test(filename) + except: + sys.exit(-1) # This is in case the child has unexpectedly died + if compile_error != 0: compile_error_files += [ filename ] if run_error != 0: @@ -646,8 +655,9 @@ def run_tests(options1, args, print_version): task_threads = [0] * nthreads for x in range(nthreads): task_threads[x] = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests, - max_test_length, finished_tests_counter, finished_tests_counter_lock, glob_var)) + max_test_length, finished_tests_counter, finished_tests_counter_lock, glob_var)) task_threads[x].start() + # wait for them to all finish and then return the number that failed # (i.e. return 0 if all is ok) for t in task_threads: @@ -655,6 +665,12 @@ def run_tests(options1, args, print_version): if options.non_interactive == False: print_debug("\n", s, run_tests_log) + + for jb in task_threads: + if not jb.exitcode == 0: + raise OSError(2, 'Some test subprocess has thrown an exception', '') + + temp_time = (time.time() - start_time) elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time))