Merge branch 'master' into nvptx_merge
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2010-2013, Intel Corporation
|
||||
Copyright (c) 2010-2014, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -77,7 +77,7 @@ covered by the following license:
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
|
||||
Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
35
Makefile
35
Makefile
@@ -34,11 +34,41 @@
|
||||
# ispc Makefile
|
||||
#
|
||||
|
||||
define newline
|
||||
|
||||
|
||||
endef
|
||||
|
||||
define WARNING_BODY
|
||||
============================== !!! WARNING !!! =============================== \n
|
||||
Location of LLVM files in your PATH is different than path in LLVM_HOME \n
|
||||
variable (or LLVM_HOME is not set). The most likely this means that you are \n
|
||||
using default LLVM installation on your system, which is very bad sign. \n
|
||||
Note, that ISPC uses LLVM optimizer and is highly dependent on it. We recommend \n
|
||||
using *patched* version of LLVM 3.3 or 3.4. Patches are availible in \n
|
||||
llvm_patches folder. You can build LLVM manually, or run our scripts, which \n
|
||||
will do all the work for you. Do the following: \n
|
||||
1. Create a folder, where LLVM will reside and set LLVM_HOME variable to its \n
|
||||
path. \n
|
||||
2. Set ISPC_HOME variable to your ISPC location (probably current folder).
|
||||
3. Run alloy.py tool to checkout and build LLVM: \n
|
||||
alloy.py -b --version=3.4 \n
|
||||
4. Add $$LLVM_HOME/bin-3.4/bin path to your PATH. \n
|
||||
==============================================================================
|
||||
endef
|
||||
|
||||
# If you have your own special version of llvm and/or clang, change
|
||||
# these variables to match.
|
||||
LLVM_CONFIG=$(shell which llvm-config)
|
||||
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
||||
|
||||
RIGHT_LLVM = $(WARNING_BODY)
|
||||
ifdef LLVM_HOME
|
||||
ifeq ($(findstring $(LLVM_HOME), $(LLVM_CONFIG)), $(LLVM_HOME))
|
||||
RIGHT_LLVM = LLVM from $$LLVM_HOME is used.
|
||||
endif
|
||||
endif
|
||||
|
||||
# Enable ARM by request
|
||||
# To enable: make ARM_ENABLED=1
|
||||
ARM_ENABLED=0
|
||||
@@ -56,7 +86,7 @@ endif
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn// -e s/\.0//)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
|
||||
@@ -84,7 +114,7 @@ ifeq ($(LLVM_VERSION),LLVM_3_4)
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM_VERSION),LLVM_3_5)
|
||||
ISPC_LIBS += -lcurses
|
||||
ISPC_LIBS += -lcurses -lz
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
@@ -188,6 +218,7 @@ llvm_check:
|
||||
echo "ERROR: llvm-config not found in your PATH"; \
|
||||
echo "******************************************"; \
|
||||
echo; exit 1)
|
||||
@echo -e '$(subst $(newline), ,$(RIGHT_LLVM))'
|
||||
|
||||
print_llvm_src: llvm_check
|
||||
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
||||
|
||||
20
alloy.py
20
alloy.py
@@ -84,6 +84,10 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
# Here we understand what and where do we want to build
|
||||
current_path = os.getcwd()
|
||||
llvm_home = os.environ["LLVM_HOME"]
|
||||
|
||||
|
||||
make_sure_dir_exists(llvm_home)
|
||||
|
||||
os.chdir(llvm_home)
|
||||
FOLDER_NAME=version_LLVM
|
||||
if version_LLVM == "trunk":
|
||||
@@ -292,6 +296,8 @@ def build_ispc(version_LLVM, make):
|
||||
p_temp = os.getenv("LLVM_INSTALL_DIR")
|
||||
v_temp = os.getenv("LLVM_VERSION")
|
||||
os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM
|
||||
if version_LLVM == "3.2":
|
||||
temp = "3_2"
|
||||
if version_LLVM == "3.3":
|
||||
temp = "3_3"
|
||||
if version_LLVM == "3.4":
|
||||
@@ -467,7 +473,10 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
for i2 in range(0,len(opts)):
|
||||
stability.arch = arch[i1]
|
||||
stability.no_opt = opts[i2]
|
||||
execute_stability(stability, R, print_version)
|
||||
try:
|
||||
execute_stability(stability, R, print_version)
|
||||
except:
|
||||
print_debug("Exception in execute_stability - maybe some test subprocess terminated before it should have\n", False, stability_log)
|
||||
print_version = 0
|
||||
for j in range(0,len(sde_targets)):
|
||||
stability.target = sde_targets[j][1]
|
||||
@@ -574,6 +583,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
f_lines = fp.readlines()
|
||||
fp.close()
|
||||
line = ""
|
||||
if not sys.exc_info()[0] == None:
|
||||
line = line + "Last exception: " + str(sys.exc_info()) + '\n'
|
||||
for i in range(0,len(f_lines)):
|
||||
line = line + f_lines[i][:-1]
|
||||
line = line + ' \n'
|
||||
@@ -581,7 +592,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
msg.attach(text)
|
||||
attach_mail_file(msg, alloy_build, "alloy_build.log")
|
||||
s = smtplib.SMTP(smtp_server)
|
||||
s.sendmail('ISPC_test_system', options.notify, msg.as_string())
|
||||
s.sendmail('ISPC_test_system', options.notify.split(" "), msg.as_string())
|
||||
s.quit()
|
||||
|
||||
def Main():
|
||||
@@ -628,7 +639,8 @@ def Main():
|
||||
current_path = os.getcwd()
|
||||
make = "make -j" + options.speed
|
||||
if os.environ["ISPC_HOME"] != os.getcwd():
|
||||
error("you ISPC_HOME and your current path are different!\n", 2)
|
||||
error("you ISPC_HOME and your current path are different! (" + os.environ["ISPC_HOME"] + " is not equal to " + os.getcwd() +
|
||||
")\n", 2)
|
||||
if options.perf_llvm == True:
|
||||
if options.branch == "master":
|
||||
options.branch = "trunk"
|
||||
@@ -657,6 +669,7 @@ from optparse import OptionParser
|
||||
from optparse import OptionGroup
|
||||
import sys
|
||||
import os
|
||||
import errno
|
||||
import operator
|
||||
import time
|
||||
import glob
|
||||
@@ -677,6 +690,7 @@ import common
|
||||
error = common.error
|
||||
take_lines = common.take_lines
|
||||
print_debug = common.print_debug
|
||||
make_sure_dir_exists = common.make_sure_dir_exists
|
||||
if __name__ == '__main__':
|
||||
# parsing options
|
||||
class MyParser(OptionParser):
|
||||
|
||||
5
ast.cpp
5
ast.cpp
@@ -389,6 +389,11 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<PrintStmt *>(node) != NULL) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<NewExpr *>(node) != NULL ||
|
||||
dynamic_cast<DeleteStmt *>(node) != NULL) {
|
||||
// We definitely don't want to run the uniform variants of these if
|
||||
|
||||
@@ -765,7 +765,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
|
||||
#if defined(LLVM_3_5)
|
||||
llvm::ErrorOr<llvm::Module *> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
|
||||
if (llvm::error_code EC = ModuleOrErr.getError())
|
||||
if (std::error_code EC = ModuleOrErr.getError())
|
||||
Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
|
||||
else {
|
||||
llvm::Module *bcModule = ModuleOrErr.get();
|
||||
|
||||
@@ -1498,7 +1498,8 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||
%cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE
|
||||
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
||||
ifelse(LLVM_VERSION,LLVM_3_5,`
|
||||
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
||||
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
||||
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
|
||||
',`
|
||||
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
|
||||
')
|
||||
@@ -1513,7 +1514,8 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||
define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
||||
$2 %val) nounwind alwaysinline {
|
||||
ifelse(LLVM_VERSION,LLVM_3_5,`
|
||||
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
||||
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
||||
%r = extractvalue { $2, i1 } %r_t, 0
|
||||
',`
|
||||
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
|
||||
')
|
||||
|
||||
25
cbackend.cpp
25
cbackend.cpp
@@ -69,6 +69,7 @@
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#else
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
@@ -1769,7 +1770,11 @@ std::string CWriter::GetValueName(const llvm::Value *Operand) {
|
||||
|
||||
// Resolve potential alias.
|
||||
if (const llvm::GlobalAlias *GA = llvm::dyn_cast<llvm::GlobalAlias>(Operand)) {
|
||||
#if defined(LLVM_3_5)
|
||||
if (const llvm::Value *V = GA->getAliasee())
|
||||
#else
|
||||
if (const llvm::Value *V = GA->resolveAliasedGlobal(false))
|
||||
#endif
|
||||
Operand = V;
|
||||
}
|
||||
|
||||
@@ -2158,7 +2163,13 @@ static SpecialGlobalClass getGlobalVariableClass(const llvm::GlobalVariable *GV)
|
||||
|
||||
// Otherwise, if it is other metadata, don't print it. This catches things
|
||||
// like debug information.
|
||||
#if defined(LLVM_3_5)
|
||||
// Here we compare char *
|
||||
if (!strcmp(GV->getSection(), "llvm.metadata"))
|
||||
#else
|
||||
// Here we compare strings
|
||||
if (GV->getSection() == "llvm.metadata")
|
||||
#endif
|
||||
return NotPrinted;
|
||||
|
||||
return NotSpecial;
|
||||
@@ -3282,10 +3293,16 @@ void CWriter::visitBinaryOperator(llvm::Instruction &I) {
|
||||
if ((I.getOpcode() == llvm::Instruction::Shl ||
|
||||
I.getOpcode() == llvm::Instruction::LShr ||
|
||||
I.getOpcode() == llvm::Instruction::AShr)) {
|
||||
if (LLVMVectorValuesAllEqual(I.getOperand(1))) {
|
||||
Out << "__extract_element(";
|
||||
writeOperand(I.getOperand(1));
|
||||
Out << ", 0) ";
|
||||
llvm::Value *splat = NULL;
|
||||
if (LLVMVectorValuesAllEqual(I.getOperand(1), &splat)) {
|
||||
if (splat) {
|
||||
// Avoid __extract_element(splat(value), 0), if possible.
|
||||
writeOperand(splat);
|
||||
} else {
|
||||
Out << "__extract_element(";
|
||||
writeOperand(I.getOperand(1));
|
||||
Out << ", 0) ";
|
||||
}
|
||||
}
|
||||
else
|
||||
writeOperand(I.getOperand(1));
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
# // Author: Filippov Ilia
|
||||
import sys
|
||||
import os
|
||||
import errno
|
||||
import shutil
|
||||
|
||||
def write_to_file(filename, line):
|
||||
@@ -49,6 +50,14 @@ def remove_if_exists(filename):
|
||||
else:
|
||||
os.remove(filename)
|
||||
|
||||
def make_sure_dir_exists(path):
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as exception:
|
||||
if exception.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
|
||||
# detect version which is printed after command
|
||||
def take_lines(command, which):
|
||||
os.system(command + " > " + "temp_detect_version")
|
||||
|
||||
@@ -1,3 +1,47 @@
|
||||
=== v1.7.0 === (18 April 2014)
|
||||
|
||||
A major new version of ISPC with several language and library extensions and
|
||||
fixes in debug info support. Binaries for all platforms are based on patched
|
||||
version on LLVM 3.4. There also performance improvements beyond switchover to
|
||||
LLVM 3.4.
|
||||
|
||||
The list of language and library changes:
|
||||
|
||||
* Support for varying types in exported functions was added. See documentation
|
||||
for more details.
|
||||
|
||||
* get_programCount() function was moved from stdlib.ispc to
|
||||
examples/util/util.isph, which needs to be included somewhere in your
|
||||
project, if you want to use it.
|
||||
|
||||
* Library functions for saturated arithmetic were added. add/sub/mul/div
|
||||
operations are supported for signed and unsigned 8/16/32/64 integer types
|
||||
(both uniform and varying).
|
||||
|
||||
* The algorithm for selecting overloaded function was extended to cover more
|
||||
types of overloading. Handling of reference types in overloaded functions was
|
||||
fixed. The rules for selecting the best match were changed to match C++,
|
||||
which requires the function to be the best match for all parameters. In
|
||||
ambiguous cases, a warning is issued, but it will be converted to an error
|
||||
in the next release.
|
||||
|
||||
* Explicit typecasts between any two reference types were allowed.
|
||||
|
||||
* Implicit cast of pointer to const type to void* was disallowed.
|
||||
|
||||
The list of other notable changes is:
|
||||
|
||||
* Number of fixes for better debug info support.
|
||||
|
||||
* Memory corruption bug was fixed, which caused rare but not reproducible
|
||||
compile time fails.
|
||||
|
||||
* Alias analysis was enabled (more aggressive optimizations are expected).
|
||||
|
||||
* A bug involving inaccurate handling of "const" qualifier was fixed. As a
|
||||
result, more "const" qualifiers may appear in .h files, which may cause
|
||||
compilation errors.
|
||||
|
||||
=== v1.6.0 === (19 December 2013)
|
||||
|
||||
A major new version of ISPC with major improvements in performance and
|
||||
|
||||
@@ -50,6 +50,7 @@ Contents:
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.7.0`_
|
||||
|
||||
* `Getting Started with ISPC`_
|
||||
|
||||
@@ -299,6 +300,32 @@ becomes a keyword and it potentially creates a conflict with existing user
|
||||
function. Also a new library function packed_store_active2() was introduced,
|
||||
which also may create a conflict with existing user functions.
|
||||
|
||||
Updating ISPC Programs For Changes In ISPC 1.7.0
|
||||
------------------------------------------------
|
||||
|
||||
This release contains several changes that may affect compatibility with
|
||||
older versions:
|
||||
|
||||
* The algorithm for selecting overloaded functions was extended to cover more
|
||||
types of overloading, and handling of reference types was fixed. At the same
|
||||
time the old scheme, which blindly used the function with "the best score"
|
||||
summed for all arguments, was switched to the C++ approach, which requires
|
||||
"the best score" for each argument. If the best function doesn't exist, a
|
||||
warning is issued in this version. It will be turned into an error in the
|
||||
next version. A simple example: Suppose we have two functions: max(int, int)
|
||||
and max(unsigned int, unsigned int). The new rules lead to an error when
|
||||
calling max(int, unsigned int), as the best choice is ambiguous.
|
||||
|
||||
* Implicit cast of pointer to const type to void* was disallowed. Use explicit
|
||||
cast if needed.
|
||||
|
||||
* A bug which prevented "const" qualifiers from appearing in emitted .h files
|
||||
was fixed. Consequently, "const" qualifiers now properly appearing in emitted
|
||||
.h files may cause compile errors in pre-existing codes.
|
||||
|
||||
* get_ProgramCount() was moved from stdlib to examples/util/util.isph file. You
|
||||
need to include this file to be able to use this function.
|
||||
|
||||
|
||||
Getting Started with ISPC
|
||||
=========================
|
||||
|
||||
@@ -2,6 +2,14 @@
|
||||
ispc News
|
||||
=========
|
||||
|
||||
ispc 1.7.0 is Released
|
||||
----------------------
|
||||
|
||||
A major new version of ISPC with several language and library extensions and
|
||||
fixes in debug info support. Binaries for all platforms are based on patched
|
||||
version on LLVM 3.4. There also performance improvements beyond switchover to
|
||||
LLVM 3.4.
|
||||
|
||||
ispc 1.6.0 is Released
|
||||
----------------------
|
||||
|
||||
@@ -11,7 +19,6 @@ a number of language and library extensions. Released binaries are based on
|
||||
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
|
||||
to Release Notes for complete set of changes.
|
||||
|
||||
|
||||
ispc 1.5.0 is Released
|
||||
----------------------
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
%(body)s
|
||||
</div>
|
||||
<div class="clearfix"></div>
|
||||
<div id="footer"> © 2011-2013 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<div id="footer"> © 2011-2014 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<!-- Please Do Not remove this link, thank u -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
%(body)s
|
||||
</div>
|
||||
<div class="clearfix"></div>
|
||||
<div id="footer"> © 2011-2013 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<div id="footer"> © 2011-2014 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<!-- Please Do Not remove this link, thank u -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
%(body)s
|
||||
</div>
|
||||
<div class="clearfix"></div>
|
||||
<div id="footer"> © 2011-2013 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<div id="footer"> © 2011-2014 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<!-- Please Do Not remove this link, thank u -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.6.1dev
|
||||
PROJECT_NUMBER = 1.7.1dev
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -183,7 +183,7 @@ struct TaskInfo {
|
||||
void *data;
|
||||
int taskIndex;
|
||||
int taskCount3d[3];
|
||||
#if defined(ISPC_IS_WINDOWS)
|
||||
#if defined( ISPC_USE_CONCRT)
|
||||
event taskEvent;
|
||||
#endif
|
||||
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||
@@ -1003,7 +1003,7 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// Actually run the task.
|
||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
int threadCount = ti->taskCount();
|
||||
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
@@ -1033,7 +1033,7 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
|
||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
int threadCount = ti->taskCount();
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
|
||||
@@ -1,5 +1,42 @@
|
||||
/*
|
||||
Copyright (c) 2014, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef UTIL_ISPH
|
||||
#define UTIL_ISPH
|
||||
|
||||
// utility function to read the value of programCount from C/C++
|
||||
export uniform int32 get_programCount() {
|
||||
return programCount;
|
||||
}
|
||||
|
||||
#endif // UTIL_ISPH
|
||||
|
||||
19
expr.cpp
19
expr.cpp
@@ -5143,9 +5143,18 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos,
|
||||
exprType->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (CastType<StructType>(exprType) != NULL)
|
||||
if (CastType<StructType>(exprType) != NULL) {
|
||||
const StructType *st = CastType<StructType>(exprType);
|
||||
if (st->IsDefined()) {
|
||||
return new StructMemberExpr(e, id, p, idpos, derefLValue);
|
||||
}
|
||||
else {
|
||||
Error(p, "Member operator \"%s\" can't be applied to declared "
|
||||
"struct \"%s\" containing an undefined struct type.", derefLValue ? "->" : ".",
|
||||
exprType->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else if (CastType<VectorType>(exprType) != NULL)
|
||||
return new VectorMemberExpr(e, id, p, idpos, derefLValue);
|
||||
else if (CastType<UndefinedStructType>(exprType)) {
|
||||
@@ -8719,6 +8728,12 @@ NewExpr::TypeCheck() {
|
||||
"but not defined type \"%s\".", allocType->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
const StructType *st = CastType<StructType>(allocType);
|
||||
if (st != NULL && !st->IsDefined()) {
|
||||
Error(pos, "Can't dynamically allocate storage for declared "
|
||||
"type \"%s\" containing undefined member type.", allocType->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Otherwise we only need to make sure that if we have an expression
|
||||
// giving a number of elements to allocate that it can be converted to
|
||||
|
||||
214
ispc.cpp
214
ispc.cpp
@@ -241,40 +241,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
||||
// If we're compiling NEON on an x86 host and the CPU wasn't
|
||||
// supplied, don't go and set the CPU based on the host...
|
||||
cpu = "cortex-a9";
|
||||
#endif
|
||||
|
||||
if (cpu == NULL) {
|
||||
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||
if (hostCPU.size() > 0)
|
||||
cpu = strdup(hostCPU.c_str());
|
||||
else {
|
||||
Warning(SourcePos(), "Unable to determine host CPU!\n");
|
||||
cpu = "generic";
|
||||
}
|
||||
}
|
||||
else {
|
||||
bool foundCPU = false;
|
||||
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
||||
++i) {
|
||||
if (!strcmp(cpu, supportedCPUs[i])) {
|
||||
foundCPU = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundCPU == false) {
|
||||
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
||||
"%s.", cpu, SupportedCPUs().c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
this->m_cpu = cpu;
|
||||
|
||||
if (arch == NULL) {
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
if (!strncmp(isa, "neon", 4))
|
||||
@@ -311,6 +277,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_arch = arch;
|
||||
}
|
||||
|
||||
const char * cpuFromIsa;
|
||||
|
||||
// Check default LLVM generated targets
|
||||
if (!strcasecmp(isa, "sse2") ||
|
||||
!strcasecmp(isa, "sse2-i32x4")) {
|
||||
@@ -319,15 +287,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",-sse4.1,-sse4.2"
|
||||
#else
|
||||
",-sse41,-sse42"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "core2";
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse2-x2") ||
|
||||
!strcasecmp(isa, "sse2-i32x8")) {
|
||||
@@ -336,15 +298,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",-sse4.1,-sse4.2"
|
||||
#else
|
||||
",-sse41,-sse42"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "core2";
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4") ||
|
||||
!strcasecmp(isa, "sse4-i32x4")) {
|
||||
@@ -353,16 +309,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
// TODO: why not sse42 and popcnt?
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+sse4.1,-sse4.2"
|
||||
#else
|
||||
",+sse41,-sse42"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7";
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4x2") ||
|
||||
!strcasecmp(isa, "sse4-x2") ||
|
||||
@@ -372,15 +321,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+sse4.1,-sse4.2"
|
||||
#else
|
||||
",+sse41,-sse42"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7";
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4-i8x16")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
@@ -388,15 +331,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 8;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+sse4.1,-sse4.2"
|
||||
#else
|
||||
",+sse41,-sse42"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 8;
|
||||
cpuFromIsa = "corei7";
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4-i16x8")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
@@ -404,15 +341,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 16;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+sse4.1,-sse4.2"
|
||||
#else
|
||||
",+sse41,-sse42"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 16;
|
||||
cpuFromIsa = "corei7";
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-4") ||
|
||||
!strcasecmp(isa, "generic-x4")) {
|
||||
@@ -452,7 +383,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_maskBitCount = 1;
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasTranscendentals = true;
|
||||
this->m_hasTrigonometry = true;
|
||||
// It's set to false, because stdlib implementation of math functions
|
||||
// is faster on MIC, than "native" implementation profided by the
|
||||
// icc compiler.
|
||||
this->m_hasTrigonometry = false;
|
||||
this->m_hasGather = this->m_hasScatter = true;
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||
}
|
||||
@@ -499,9 +433,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx") ||
|
||||
!strcasecmp(isa, "avx1") ||
|
||||
@@ -511,9 +445,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-i64x4") ||
|
||||
!strcasecmp(isa, "avx1-i64x4")) {
|
||||
@@ -522,9 +456,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 64;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 64;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-x2") ||
|
||||
!strcasecmp(isa, "avx1-x2") ||
|
||||
@@ -534,9 +468,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1") ||
|
||||
!strcasecmp(isa, "avx1.1-i32x8")) {
|
||||
@@ -545,20 +479,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
#endif
|
||||
cpuFromIsa = "core-avx-i";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1-x2") ||
|
||||
!strcasecmp(isa, "avx1.1-i32x16")) {
|
||||
@@ -567,20 +492,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
#endif
|
||||
cpuFromIsa = "core-avx-i";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
|
||||
this->m_isa = Target::AVX11;
|
||||
@@ -588,20 +504,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 64;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 64;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
#endif
|
||||
cpuFromIsa = "core-avx-i";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2") ||
|
||||
!strcasecmp(isa, "avx2-i32x8")) {
|
||||
@@ -610,24 +517,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
#ifndef LLVM_3_1
|
||||
",+fma"
|
||||
#endif // !LLVM_3_1
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
#endif
|
||||
cpuFromIsa = "core-avx2";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2") ||
|
||||
!strcasecmp(isa, "avx2-i32x16")) {
|
||||
@@ -636,24 +531,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
#ifndef LLVM_3_1
|
||||
",+fma"
|
||||
#endif // !LLVM_3_1
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
#endif
|
||||
cpuFromIsa = "core-avx2";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-i64x4")) {
|
||||
this->m_isa = Target::AVX2;
|
||||
@@ -661,24 +544,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 64;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
#ifndef LLVM_3_1
|
||||
",+fma"
|
||||
#endif // !LLVM_3_1
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 64;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
#endif
|
||||
cpuFromIsa = "core-avx2";
|
||||
}
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||
@@ -736,6 +607,47 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
error = true;
|
||||
}
|
||||
|
||||
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
||||
|
||||
cpu = "cortex-a9";
|
||||
#endif
|
||||
|
||||
if (cpu == NULL) {
|
||||
#ifndef ISPC_ARM_ENABLED
|
||||
if (isa == NULL) {
|
||||
#endif
|
||||
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||
if (hostCPU.size() > 0)
|
||||
cpu = strdup(hostCPU.c_str());
|
||||
else {
|
||||
Warning(SourcePos(), "Unable to determine host CPU!\n");
|
||||
cpu = "generic";
|
||||
}
|
||||
#ifndef ISPC_ARM_ENABLED
|
||||
}
|
||||
else {
|
||||
cpu = cpuFromIsa;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
bool foundCPU = false;
|
||||
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
||||
++i) {
|
||||
if (!strcmp(cpu, supportedCPUs[i])) {
|
||||
foundCPU = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundCPU == false) {
|
||||
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
||||
"%s.", cpu, SupportedCPUs().c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
this->m_cpu = cpu;
|
||||
|
||||
if (!error) {
|
||||
// Create TargetMachine
|
||||
std::string triple = GetTripleString();
|
||||
|
||||
2
ispc.h
2
ispc.h
@@ -38,7 +38,7 @@
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#define ISPC_VERSION "1.6.1dev"
|
||||
#define ISPC_VERSION "1.7.1dev"
|
||||
|
||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
||||
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"
|
||||
|
||||
@@ -105,7 +105,6 @@
|
||||
<ClInclude Include="sym.h" />
|
||||
<ClInclude Include="type.h" />
|
||||
<ClInclude Include="util.h" />
|
||||
<ClInclude Include="winstuff\unistd.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib.ispc">
|
||||
@@ -396,7 +395,7 @@
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>NOMINMAX;%LLVM_VERSION%</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624;4244</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@@ -415,7 +414,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>NOMINMAX;%LLVM_VERSION%</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624;4244</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
||||
54
llvmutil.cpp
54
llvmutil.cpp
@@ -818,7 +818,8 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
|
||||
|
||||
static bool
|
||||
lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis);
|
||||
std::vector<llvm::PHINode *> &seenPhis,
|
||||
llvm::Value **splatValue = NULL);
|
||||
|
||||
|
||||
/** This function checks to see if the given (scalar or vector) value is an
|
||||
@@ -1068,20 +1069,37 @@ lVectorShiftRightAllEqual(llvm::Value *val, llvm::Value *shift,
|
||||
|
||||
static bool
|
||||
lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis) {
|
||||
std::vector<llvm::PHINode *> &seenPhis,
|
||||
llvm::Value **splatValue) {
|
||||
if (vectorLength == 1)
|
||||
return true;
|
||||
|
||||
if (llvm::isa<llvm::ConstantAggregateZero>(v))
|
||||
if (llvm::isa<llvm::ConstantAggregateZero>(v)) {
|
||||
if (splatValue) {
|
||||
llvm::ConstantAggregateZero *caz =
|
||||
llvm::dyn_cast<llvm::ConstantAggregateZero>(v);
|
||||
*splatValue = caz->getSequentialElement();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
||||
if (cv != NULL)
|
||||
return (cv->getSplatValue() != NULL);
|
||||
if (cv != NULL) {
|
||||
llvm::Value* splat = cv->getSplatValue();
|
||||
if (splat != NULL && splatValue) {
|
||||
*splatValue = splat;
|
||||
}
|
||||
return (splat != NULL);
|
||||
}
|
||||
|
||||
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
if (cdv != NULL)
|
||||
return (cdv->getSplatValue() != NULL);
|
||||
if (cdv != NULL) {
|
||||
llvm::Value* splat = cdv->getSplatValue();
|
||||
if (splat != NULL && splatValue) {
|
||||
*splatValue = splat;
|
||||
}
|
||||
return (splat != NULL);
|
||||
}
|
||||
|
||||
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
|
||||
if (bop != NULL) {
|
||||
@@ -1178,14 +1196,14 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
where the values are actually all equal.
|
||||
*/
|
||||
bool
|
||||
LLVMVectorValuesAllEqual(llvm::Value *v) {
|
||||
LLVMVectorValuesAllEqual(llvm::Value *v, llvm::Value **splat) {
|
||||
llvm::VectorType *vt =
|
||||
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
||||
Assert(vt != NULL);
|
||||
int vectorLength = vt->getNumElements();
|
||||
|
||||
std::vector<llvm::PHINode *> seenPhis;
|
||||
bool equal = lVectorValuesAllEqual(v, vectorLength, seenPhis);
|
||||
bool equal = lVectorValuesAllEqual(v, vectorLength, seenPhis, splat);
|
||||
|
||||
Debug(SourcePos(), "LLVMVectorValuesAllEqual(%s) -> %s.",
|
||||
v->getName().str().c_str(), equal ? "true" : "false");
|
||||
@@ -1551,6 +1569,8 @@ lExtractFirstVectorElement(llvm::Value *v,
|
||||
phiMap);
|
||||
llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1),
|
||||
phiMap);
|
||||
Assert(v0 != NULL);
|
||||
Assert(v1 != NULL);
|
||||
// Note that the new binary operator is inserted immediately before
|
||||
// the previous vector one
|
||||
return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1,
|
||||
@@ -1597,10 +1617,22 @@ lExtractFirstVectorElement(llvm::Value *v,
|
||||
return scalarPhi;
|
||||
}
|
||||
|
||||
// We should consider "shuffle" case and "insertElement" case separately.
|
||||
// For example we can have shuffle(mul, undef, zero) but function
|
||||
// "LLVMFlattenInsertChain" can handle only case shuffle(insertElement, undef, zero).
|
||||
// Also if we have insertElement under shuffle we will handle it the next call of
|
||||
// "lExtractFirstVectorElement" function.
|
||||
if (llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||
llvm::ShuffleVectorInst *shuf = llvm::dyn_cast<llvm::ShuffleVectorInst>(v);
|
||||
llvm::Value *indices = shuf->getOperand(2);
|
||||
if (llvm::isa<llvm::ConstantAggregateZero>(indices)) {
|
||||
return lExtractFirstVectorElement(shuf->getOperand(0), phiMap);
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a chain of insertelement instructions, then we can just
|
||||
// flatten them out and grab the value for the first one.
|
||||
if (llvm::isa<llvm::InsertElementInst>(v) ||
|
||||
llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||
if (llvm::isa<llvm::InsertElementInst>(v)) {
|
||||
return LLVMFlattenInsertChain(v, vt->getNumElements(), false);
|
||||
}
|
||||
|
||||
|
||||
@@ -228,7 +228,8 @@ extern llvm::Constant *LLVMMaskAllOff;
|
||||
/** Tests to see if all of the elements of the vector in the 'v' parameter
|
||||
are equal. Like lValuesAreEqual(), this is a conservative test and may
|
||||
return false for arrays where the values are actually all equal. */
|
||||
extern bool LLVMVectorValuesAllEqual(llvm::Value *v);
|
||||
extern bool LLVMVectorValuesAllEqual(llvm::Value *v,
|
||||
llvm::Value **splat = NULL);
|
||||
|
||||
/** Given vector of integer-typed values, this function returns true if it
|
||||
can determine that the elements of the vector have a step of 'stride'
|
||||
|
||||
2
opt.cpp
2
opt.cpp
@@ -541,8 +541,6 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createScalarReplAggregatesPass());
|
||||
optPM.add(llvm::createEarlyCSEPass());
|
||||
optPM.add(llvm::createLowerExpectIntrinsicPass());
|
||||
optPM.add(llvm::createTypeBasedAliasAnalysisPass());
|
||||
optPM.add(llvm::createBasicAliasAnalysisPass());
|
||||
|
||||
// Early optimizations to try to reduce the total amount of code to
|
||||
// work with if we can
|
||||
|
||||
24
run_tests.py
24
run_tests.py
@@ -60,9 +60,14 @@ def run_command(cmd):
|
||||
lexer.escape = ''
|
||||
arg_list = list(lexer)
|
||||
|
||||
sp = subprocess.Popen(arg_list, stdin=None,
|
||||
try:
|
||||
sp = subprocess.Popen(arg_list, stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
except:
|
||||
print_debug("ERROR: The child (%s) raised an esception: %s\n" % (cmd, sys.exc_info()[1]), s, run_tests_log)
|
||||
raise
|
||||
|
||||
out = sp.communicate()
|
||||
output = ""
|
||||
output += out[0].decode("utf-8")
|
||||
@@ -213,7 +218,7 @@ def run_test(testname):
|
||||
obj_name = "%s.obj" % os.path.basename(filename)
|
||||
exe_name = "%s.exe" % os.path.basename(filename)
|
||||
|
||||
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
|
||||
cc_cmd = "%s /I. /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
|
||||
(options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name)
|
||||
if should_fail:
|
||||
cc_cmd += " /DEXPECT_FAILURE"
|
||||
@@ -355,7 +360,11 @@ def run_tasks_from_queue(queue, queue_ret, queue_skip, total_tests_arg, max_test
|
||||
sys.exit(0)
|
||||
|
||||
if check_test(filename):
|
||||
(compile_error, run_error) = run_test(filename)
|
||||
try:
|
||||
(compile_error, run_error) = run_test(filename)
|
||||
except:
|
||||
sys.exit(-1) # This is in case the child has unexpectedly died
|
||||
|
||||
if compile_error != 0:
|
||||
compile_error_files += [ filename ]
|
||||
if run_error != 0:
|
||||
@@ -680,8 +689,9 @@ def run_tests(options1, args, print_version):
|
||||
task_threads = [0] * nthreads
|
||||
for x in range(nthreads):
|
||||
task_threads[x] = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests,
|
||||
max_test_length, finished_tests_counter, finished_tests_counter_lock, glob_var))
|
||||
max_test_length, finished_tests_counter, finished_tests_counter_lock, glob_var))
|
||||
task_threads[x].start()
|
||||
|
||||
# wait for them to all finish and then return the number that failed
|
||||
# (i.e. return 0 if all is ok)
|
||||
for t in task_threads:
|
||||
@@ -689,6 +699,12 @@ def run_tests(options1, args, print_version):
|
||||
if options.non_interactive == False:
|
||||
print_debug("\n", s, run_tests_log)
|
||||
|
||||
|
||||
for jb in task_threads:
|
||||
if not jb.exitcode == 0:
|
||||
raise OSError(2, 'Some test subprocess has thrown an exception', '')
|
||||
|
||||
|
||||
temp_time = (time.time() - start_time)
|
||||
elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time))
|
||||
|
||||
|
||||
25
type.cpp
25
type.cpp
@@ -826,7 +826,7 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
|
||||
llvm::DIFile diFile = pos.GetDIFile();
|
||||
llvm::DIType diType =
|
||||
m->diBuilder->createEnumerationType(scope, name, diFile, pos.first_line,
|
||||
m->diBuilder->createEnumerationType(diFile, name, diFile, pos.first_line,
|
||||
32 /* size in bits */,
|
||||
32 /* align in bits */,
|
||||
elementArray
|
||||
@@ -1956,6 +1956,25 @@ StructType::IsConstType() const {
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
StructType::IsDefined() const {
|
||||
for (int i = 0; i < GetElementCount(); i++) {
|
||||
const Type *t = GetElementType(i);
|
||||
const UndefinedStructType *ust = CastType<UndefinedStructType>(t);
|
||||
if (ust != NULL) {
|
||||
return false;
|
||||
}
|
||||
const StructType *st = CastType<StructType>(t);
|
||||
if (st != NULL) {
|
||||
if (!st->IsDefined()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
StructType::GetBaseType() const {
|
||||
return this;
|
||||
@@ -2179,7 +2198,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes);
|
||||
llvm::DIFile diFile = pos.GetDIFile();
|
||||
return m->diBuilder->createStructType(
|
||||
scope,
|
||||
diFile,
|
||||
name,
|
||||
diFile,
|
||||
pos.first_line, // Line number
|
||||
@@ -2422,7 +2441,7 @@ UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
llvm::DIFile diFile = pos.GetDIFile();
|
||||
llvm::DIArray elements;
|
||||
return m->diBuilder->createStructType(
|
||||
scope,
|
||||
diFile,
|
||||
name,
|
||||
diFile,
|
||||
pos.first_line, // Line number
|
||||
|
||||
19
type.h
19
type.h
@@ -81,15 +81,15 @@ struct Variability {
|
||||
/** Enumerant that records each of the types that inherit from the Type
|
||||
baseclass. */
|
||||
enum TypeId {
|
||||
ATOMIC_TYPE,
|
||||
ENUM_TYPE,
|
||||
POINTER_TYPE,
|
||||
ARRAY_TYPE,
|
||||
VECTOR_TYPE,
|
||||
STRUCT_TYPE,
|
||||
UNDEFINED_STRUCT_TYPE,
|
||||
REFERENCE_TYPE,
|
||||
FUNCTION_TYPE
|
||||
ATOMIC_TYPE, // 0
|
||||
ENUM_TYPE, // 1
|
||||
POINTER_TYPE, // 2
|
||||
ARRAY_TYPE, // 3
|
||||
VECTOR_TYPE, // 4
|
||||
STRUCT_TYPE, // 5
|
||||
UNDEFINED_STRUCT_TYPE, // 6
|
||||
REFERENCE_TYPE, // 7
|
||||
FUNCTION_TYPE // 8
|
||||
};
|
||||
|
||||
|
||||
@@ -675,6 +675,7 @@ public:
|
||||
bool IsIntType() const;
|
||||
bool IsUnsignedType() const;
|
||||
bool IsConstType() const;
|
||||
bool IsDefined() const;
|
||||
|
||||
const Type *GetBaseType() const;
|
||||
const StructType *GetAsVaryingType() const;
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#ifndef MY_STDINT_H
|
||||
#define MY_STDINT_H 1
|
||||
|
||||
typedef signed char int8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
#endif // MY_STDINT_H
|
||||
@@ -1,6 +0,0 @@
|
||||
#ifndef MY_UNISTD_H
|
||||
#define MY_UNISTD_H 1
|
||||
|
||||
inline bool isatty(int) { return false; }
|
||||
|
||||
#endif // MY_UNISTD_H
|
||||
Reference in New Issue
Block a user