Merge branch 'master' into nvptx_merge
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2010-2013, Intel Corporation
|
Copyright (c) 2010-2014, Intel Corporation
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -77,7 +77,7 @@ covered by the following license:
|
|||||||
University of Illinois/NCSA
|
University of Illinois/NCSA
|
||||||
Open Source License
|
Open Source License
|
||||||
|
|
||||||
Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
|
Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Developed by:
|
Developed by:
|
||||||
|
|||||||
35
Makefile
35
Makefile
@@ -34,11 +34,41 @@
|
|||||||
# ispc Makefile
|
# ispc Makefile
|
||||||
#
|
#
|
||||||
|
|
||||||
|
define newline
|
||||||
|
|
||||||
|
|
||||||
|
endef
|
||||||
|
|
||||||
|
define WARNING_BODY
|
||||||
|
============================== !!! WARNING !!! =============================== \n
|
||||||
|
Location of LLVM files in your PATH is different than path in LLVM_HOME \n
|
||||||
|
variable (or LLVM_HOME is not set). The most likely this means that you are \n
|
||||||
|
using default LLVM installation on your system, which is very bad sign. \n
|
||||||
|
Note, that ISPC uses LLVM optimizer and is highly dependent on it. We recommend \n
|
||||||
|
using *patched* version of LLVM 3.3 or 3.4. Patches are availible in \n
|
||||||
|
llvm_patches folder. You can build LLVM manually, or run our scripts, which \n
|
||||||
|
will do all the work for you. Do the following: \n
|
||||||
|
1. Create a folder, where LLVM will reside and set LLVM_HOME variable to its \n
|
||||||
|
path. \n
|
||||||
|
2. Set ISPC_HOME variable to your ISPC location (probably current folder).
|
||||||
|
3. Run alloy.py tool to checkout and build LLVM: \n
|
||||||
|
alloy.py -b --version=3.4 \n
|
||||||
|
4. Add $$LLVM_HOME/bin-3.4/bin path to your PATH. \n
|
||||||
|
==============================================================================
|
||||||
|
endef
|
||||||
|
|
||||||
# If you have your own special version of llvm and/or clang, change
|
# If you have your own special version of llvm and/or clang, change
|
||||||
# these variables to match.
|
# these variables to match.
|
||||||
LLVM_CONFIG=$(shell which llvm-config)
|
LLVM_CONFIG=$(shell which llvm-config)
|
||||||
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
||||||
|
|
||||||
|
RIGHT_LLVM = $(WARNING_BODY)
|
||||||
|
ifdef LLVM_HOME
|
||||||
|
ifeq ($(findstring $(LLVM_HOME), $(LLVM_CONFIG)), $(LLVM_HOME))
|
||||||
|
RIGHT_LLVM = LLVM from $$LLVM_HOME is used.
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
# Enable ARM by request
|
# Enable ARM by request
|
||||||
# To enable: make ARM_ENABLED=1
|
# To enable: make ARM_ENABLED=1
|
||||||
ARM_ENABLED=0
|
ARM_ENABLED=0
|
||||||
@@ -56,7 +86,7 @@ endif
|
|||||||
ARCH_TYPE = $(shell arch)
|
ARCH_TYPE = $(shell arch)
|
||||||
|
|
||||||
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn// -e s/\.0//)
|
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
|
||||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||||
|
|
||||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
|
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
|
||||||
@@ -84,7 +114,7 @@ ifeq ($(LLVM_VERSION),LLVM_3_4)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(LLVM_VERSION),LLVM_3_5)
|
ifeq ($(LLVM_VERSION),LLVM_3_5)
|
||||||
ISPC_LIBS += -lcurses
|
ISPC_LIBS += -lcurses -lz
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH_OS),Linux)
|
ifeq ($(ARCH_OS),Linux)
|
||||||
@@ -188,6 +218,7 @@ llvm_check:
|
|||||||
echo "ERROR: llvm-config not found in your PATH"; \
|
echo "ERROR: llvm-config not found in your PATH"; \
|
||||||
echo "******************************************"; \
|
echo "******************************************"; \
|
||||||
echo; exit 1)
|
echo; exit 1)
|
||||||
|
@echo -e '$(subst $(newline), ,$(RIGHT_LLVM))'
|
||||||
|
|
||||||
print_llvm_src: llvm_check
|
print_llvm_src: llvm_check
|
||||||
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
||||||
|
|||||||
20
alloy.py
20
alloy.py
@@ -84,6 +84,10 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
# Here we understand what and where do we want to build
|
# Here we understand what and where do we want to build
|
||||||
current_path = os.getcwd()
|
current_path = os.getcwd()
|
||||||
llvm_home = os.environ["LLVM_HOME"]
|
llvm_home = os.environ["LLVM_HOME"]
|
||||||
|
|
||||||
|
|
||||||
|
make_sure_dir_exists(llvm_home)
|
||||||
|
|
||||||
os.chdir(llvm_home)
|
os.chdir(llvm_home)
|
||||||
FOLDER_NAME=version_LLVM
|
FOLDER_NAME=version_LLVM
|
||||||
if version_LLVM == "trunk":
|
if version_LLVM == "trunk":
|
||||||
@@ -292,6 +296,8 @@ def build_ispc(version_LLVM, make):
|
|||||||
p_temp = os.getenv("LLVM_INSTALL_DIR")
|
p_temp = os.getenv("LLVM_INSTALL_DIR")
|
||||||
v_temp = os.getenv("LLVM_VERSION")
|
v_temp = os.getenv("LLVM_VERSION")
|
||||||
os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM
|
os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM
|
||||||
|
if version_LLVM == "3.2":
|
||||||
|
temp = "3_2"
|
||||||
if version_LLVM == "3.3":
|
if version_LLVM == "3.3":
|
||||||
temp = "3_3"
|
temp = "3_3"
|
||||||
if version_LLVM == "3.4":
|
if version_LLVM == "3.4":
|
||||||
@@ -467,7 +473,10 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
|||||||
for i2 in range(0,len(opts)):
|
for i2 in range(0,len(opts)):
|
||||||
stability.arch = arch[i1]
|
stability.arch = arch[i1]
|
||||||
stability.no_opt = opts[i2]
|
stability.no_opt = opts[i2]
|
||||||
execute_stability(stability, R, print_version)
|
try:
|
||||||
|
execute_stability(stability, R, print_version)
|
||||||
|
except:
|
||||||
|
print_debug("Exception in execute_stability - maybe some test subprocess terminated before it should have\n", False, stability_log)
|
||||||
print_version = 0
|
print_version = 0
|
||||||
for j in range(0,len(sde_targets)):
|
for j in range(0,len(sde_targets)):
|
||||||
stability.target = sde_targets[j][1]
|
stability.target = sde_targets[j][1]
|
||||||
@@ -574,6 +583,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
|||||||
f_lines = fp.readlines()
|
f_lines = fp.readlines()
|
||||||
fp.close()
|
fp.close()
|
||||||
line = ""
|
line = ""
|
||||||
|
if not sys.exc_info()[0] == None:
|
||||||
|
line = line + "Last exception: " + str(sys.exc_info()) + '\n'
|
||||||
for i in range(0,len(f_lines)):
|
for i in range(0,len(f_lines)):
|
||||||
line = line + f_lines[i][:-1]
|
line = line + f_lines[i][:-1]
|
||||||
line = line + ' \n'
|
line = line + ' \n'
|
||||||
@@ -581,7 +592,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
|||||||
msg.attach(text)
|
msg.attach(text)
|
||||||
attach_mail_file(msg, alloy_build, "alloy_build.log")
|
attach_mail_file(msg, alloy_build, "alloy_build.log")
|
||||||
s = smtplib.SMTP(smtp_server)
|
s = smtplib.SMTP(smtp_server)
|
||||||
s.sendmail('ISPC_test_system', options.notify, msg.as_string())
|
s.sendmail('ISPC_test_system', options.notify.split(" "), msg.as_string())
|
||||||
s.quit()
|
s.quit()
|
||||||
|
|
||||||
def Main():
|
def Main():
|
||||||
@@ -628,7 +639,8 @@ def Main():
|
|||||||
current_path = os.getcwd()
|
current_path = os.getcwd()
|
||||||
make = "make -j" + options.speed
|
make = "make -j" + options.speed
|
||||||
if os.environ["ISPC_HOME"] != os.getcwd():
|
if os.environ["ISPC_HOME"] != os.getcwd():
|
||||||
error("you ISPC_HOME and your current path are different!\n", 2)
|
error("you ISPC_HOME and your current path are different! (" + os.environ["ISPC_HOME"] + " is not equal to " + os.getcwd() +
|
||||||
|
")\n", 2)
|
||||||
if options.perf_llvm == True:
|
if options.perf_llvm == True:
|
||||||
if options.branch == "master":
|
if options.branch == "master":
|
||||||
options.branch = "trunk"
|
options.branch = "trunk"
|
||||||
@@ -657,6 +669,7 @@ from optparse import OptionParser
|
|||||||
from optparse import OptionGroup
|
from optparse import OptionGroup
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import errno
|
||||||
import operator
|
import operator
|
||||||
import time
|
import time
|
||||||
import glob
|
import glob
|
||||||
@@ -677,6 +690,7 @@ import common
|
|||||||
error = common.error
|
error = common.error
|
||||||
take_lines = common.take_lines
|
take_lines = common.take_lines
|
||||||
print_debug = common.print_debug
|
print_debug = common.print_debug
|
||||||
|
make_sure_dir_exists = common.make_sure_dir_exists
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# parsing options
|
# parsing options
|
||||||
class MyParser(OptionParser):
|
class MyParser(OptionParser):
|
||||||
|
|||||||
5
ast.cpp
5
ast.cpp
@@ -389,6 +389,11 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dynamic_cast<PrintStmt *>(node) != NULL) {
|
||||||
|
*okPtr = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (dynamic_cast<NewExpr *>(node) != NULL ||
|
if (dynamic_cast<NewExpr *>(node) != NULL ||
|
||||||
dynamic_cast<DeleteStmt *>(node) != NULL) {
|
dynamic_cast<DeleteStmt *>(node) != NULL) {
|
||||||
// We definitely don't want to run the uniform variants of these if
|
// We definitely don't want to run the uniform variants of these if
|
||||||
|
|||||||
@@ -765,7 +765,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
|||||||
llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
|
llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
|
||||||
#if defined(LLVM_3_5)
|
#if defined(LLVM_3_5)
|
||||||
llvm::ErrorOr<llvm::Module *> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
|
llvm::ErrorOr<llvm::Module *> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
|
||||||
if (llvm::error_code EC = ModuleOrErr.getError())
|
if (std::error_code EC = ModuleOrErr.getError())
|
||||||
Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
|
Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
|
||||||
else {
|
else {
|
||||||
llvm::Module *bcModule = ModuleOrErr.get();
|
llvm::Module *bcModule = ModuleOrErr.get();
|
||||||
|
|||||||
@@ -1498,7 +1498,8 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
|||||||
%cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE
|
%cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE
|
||||||
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
||||||
ifelse(LLVM_VERSION,LLVM_3_5,`
|
ifelse(LLVM_VERSION,LLVM_3_5,`
|
||||||
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
||||||
|
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
|
||||||
',`
|
',`
|
||||||
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
|
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
|
||||||
')
|
')
|
||||||
@@ -1513,7 +1514,8 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
|||||||
define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
||||||
$2 %val) nounwind alwaysinline {
|
$2 %val) nounwind alwaysinline {
|
||||||
ifelse(LLVM_VERSION,LLVM_3_5,`
|
ifelse(LLVM_VERSION,LLVM_3_5,`
|
||||||
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
||||||
|
%r = extractvalue { $2, i1 } %r_t, 0
|
||||||
',`
|
',`
|
||||||
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
|
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
|
||||||
')
|
')
|
||||||
|
|||||||
25
cbackend.cpp
25
cbackend.cpp
@@ -69,6 +69,7 @@
|
|||||||
#include "llvm/IR/CallSite.h"
|
#include "llvm/IR/CallSite.h"
|
||||||
#include "llvm/IR/CFG.h"
|
#include "llvm/IR/CFG.h"
|
||||||
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
#else
|
#else
|
||||||
#include "llvm/Analysis/Verifier.h"
|
#include "llvm/Analysis/Verifier.h"
|
||||||
#include <llvm/Assembly/PrintModulePass.h>
|
#include <llvm/Assembly/PrintModulePass.h>
|
||||||
@@ -1769,7 +1770,11 @@ std::string CWriter::GetValueName(const llvm::Value *Operand) {
|
|||||||
|
|
||||||
// Resolve potential alias.
|
// Resolve potential alias.
|
||||||
if (const llvm::GlobalAlias *GA = llvm::dyn_cast<llvm::GlobalAlias>(Operand)) {
|
if (const llvm::GlobalAlias *GA = llvm::dyn_cast<llvm::GlobalAlias>(Operand)) {
|
||||||
|
#if defined(LLVM_3_5)
|
||||||
|
if (const llvm::Value *V = GA->getAliasee())
|
||||||
|
#else
|
||||||
if (const llvm::Value *V = GA->resolveAliasedGlobal(false))
|
if (const llvm::Value *V = GA->resolveAliasedGlobal(false))
|
||||||
|
#endif
|
||||||
Operand = V;
|
Operand = V;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2158,7 +2163,13 @@ static SpecialGlobalClass getGlobalVariableClass(const llvm::GlobalVariable *GV)
|
|||||||
|
|
||||||
// Otherwise, if it is other metadata, don't print it. This catches things
|
// Otherwise, if it is other metadata, don't print it. This catches things
|
||||||
// like debug information.
|
// like debug information.
|
||||||
|
#if defined(LLVM_3_5)
|
||||||
|
// Here we compare char *
|
||||||
|
if (!strcmp(GV->getSection(), "llvm.metadata"))
|
||||||
|
#else
|
||||||
|
// Here we compare strings
|
||||||
if (GV->getSection() == "llvm.metadata")
|
if (GV->getSection() == "llvm.metadata")
|
||||||
|
#endif
|
||||||
return NotPrinted;
|
return NotPrinted;
|
||||||
|
|
||||||
return NotSpecial;
|
return NotSpecial;
|
||||||
@@ -3282,10 +3293,16 @@ void CWriter::visitBinaryOperator(llvm::Instruction &I) {
|
|||||||
if ((I.getOpcode() == llvm::Instruction::Shl ||
|
if ((I.getOpcode() == llvm::Instruction::Shl ||
|
||||||
I.getOpcode() == llvm::Instruction::LShr ||
|
I.getOpcode() == llvm::Instruction::LShr ||
|
||||||
I.getOpcode() == llvm::Instruction::AShr)) {
|
I.getOpcode() == llvm::Instruction::AShr)) {
|
||||||
if (LLVMVectorValuesAllEqual(I.getOperand(1))) {
|
llvm::Value *splat = NULL;
|
||||||
Out << "__extract_element(";
|
if (LLVMVectorValuesAllEqual(I.getOperand(1), &splat)) {
|
||||||
writeOperand(I.getOperand(1));
|
if (splat) {
|
||||||
Out << ", 0) ";
|
// Avoid __extract_element(splat(value), 0), if possible.
|
||||||
|
writeOperand(splat);
|
||||||
|
} else {
|
||||||
|
Out << "__extract_element(";
|
||||||
|
writeOperand(I.getOperand(1));
|
||||||
|
Out << ", 0) ";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
writeOperand(I.getOperand(1));
|
writeOperand(I.getOperand(1));
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
# // Author: Filippov Ilia
|
# // Author: Filippov Ilia
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import errno
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
def write_to_file(filename, line):
|
def write_to_file(filename, line):
|
||||||
@@ -49,6 +50,14 @@ def remove_if_exists(filename):
|
|||||||
else:
|
else:
|
||||||
os.remove(filename)
|
os.remove(filename)
|
||||||
|
|
||||||
|
def make_sure_dir_exists(path):
|
||||||
|
try:
|
||||||
|
os.makedirs(path)
|
||||||
|
except OSError as exception:
|
||||||
|
if exception.errno != errno.EEXIST:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
# detect version which is printed after command
|
# detect version which is printed after command
|
||||||
def take_lines(command, which):
|
def take_lines(command, which):
|
||||||
os.system(command + " > " + "temp_detect_version")
|
os.system(command + " > " + "temp_detect_version")
|
||||||
|
|||||||
@@ -1,3 +1,47 @@
|
|||||||
|
=== v1.7.0 === (18 April 2014)
|
||||||
|
|
||||||
|
A major new version of ISPC with several language and library extensions and
|
||||||
|
fixes in debug info support. Binaries for all platforms are based on patched
|
||||||
|
version on LLVM 3.4. There also performance improvements beyond switchover to
|
||||||
|
LLVM 3.4.
|
||||||
|
|
||||||
|
The list of language and library changes:
|
||||||
|
|
||||||
|
* Support for varying types in exported functions was added. See documentation
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
* get_programCount() function was moved from stdlib.ispc to
|
||||||
|
examples/util/util.isph, which needs to be included somewhere in your
|
||||||
|
project, if you want to use it.
|
||||||
|
|
||||||
|
* Library functions for saturated arithmetic were added. add/sub/mul/div
|
||||||
|
operations are supported for signed and unsigned 8/16/32/64 integer types
|
||||||
|
(both uniform and varying).
|
||||||
|
|
||||||
|
* The algorithm for selecting overloaded function was extended to cover more
|
||||||
|
types of overloading. Handling of reference types in overloaded functions was
|
||||||
|
fixed. The rules for selecting the best match were changed to match C++,
|
||||||
|
which requires the function to be the best match for all parameters. In
|
||||||
|
ambiguous cases, a warning is issued, but it will be converted to an error
|
||||||
|
in the next release.
|
||||||
|
|
||||||
|
* Explicit typecasts between any two reference types were allowed.
|
||||||
|
|
||||||
|
* Implicit cast of pointer to const type to void* was disallowed.
|
||||||
|
|
||||||
|
The list of other notable changes is:
|
||||||
|
|
||||||
|
* Number of fixes for better debug info support.
|
||||||
|
|
||||||
|
* Memory corruption bug was fixed, which caused rare but not reproducible
|
||||||
|
compile time fails.
|
||||||
|
|
||||||
|
* Alias analysis was enabled (more aggressive optimizations are expected).
|
||||||
|
|
||||||
|
* A bug involving inaccurate handling of "const" qualifier was fixed. As a
|
||||||
|
result, more "const" qualifiers may appear in .h files, which may cause
|
||||||
|
compilation errors.
|
||||||
|
|
||||||
=== v1.6.0 === (19 December 2013)
|
=== v1.6.0 === (19 December 2013)
|
||||||
|
|
||||||
A major new version of ISPC with major improvements in performance and
|
A major new version of ISPC with major improvements in performance and
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ Contents:
|
|||||||
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
|
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
|
||||||
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
|
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
|
||||||
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
|
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
|
||||||
|
+ `Updating ISPC Programs For Changes In ISPC 1.7.0`_
|
||||||
|
|
||||||
* `Getting Started with ISPC`_
|
* `Getting Started with ISPC`_
|
||||||
|
|
||||||
@@ -299,6 +300,32 @@ becomes a keyword and it potentially creates a conflict with existing user
|
|||||||
function. Also a new library function packed_store_active2() was introduced,
|
function. Also a new library function packed_store_active2() was introduced,
|
||||||
which also may create a conflict with existing user functions.
|
which also may create a conflict with existing user functions.
|
||||||
|
|
||||||
|
Updating ISPC Programs For Changes In ISPC 1.7.0
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
This release contains several changes that may affect compatibility with
|
||||||
|
older versions:
|
||||||
|
|
||||||
|
* The algorithm for selecting overloaded functions was extended to cover more
|
||||||
|
types of overloading, and handling of reference types was fixed. At the same
|
||||||
|
time the old scheme, which blindly used the function with "the best score"
|
||||||
|
summed for all arguments, was switched to the C++ approach, which requires
|
||||||
|
"the best score" for each argument. If the best function doesn't exist, a
|
||||||
|
warning is issued in this version. It will be turned into an error in the
|
||||||
|
next version. A simple example: Suppose we have two functions: max(int, int)
|
||||||
|
and max(unsigned int, unsigned int). The new rules lead to an error when
|
||||||
|
calling max(int, unsigned int), as the best choice is ambiguous.
|
||||||
|
|
||||||
|
* Implicit cast of pointer to const type to void* was disallowed. Use explicit
|
||||||
|
cast if needed.
|
||||||
|
|
||||||
|
* A bug which prevented "const" qualifiers from appearing in emitted .h files
|
||||||
|
was fixed. Consequently, "const" qualifiers now properly appearing in emitted
|
||||||
|
.h files may cause compile errors in pre-existing codes.
|
||||||
|
|
||||||
|
* get_ProgramCount() was moved from stdlib to examples/util/util.isph file. You
|
||||||
|
need to include this file to be able to use this function.
|
||||||
|
|
||||||
|
|
||||||
Getting Started with ISPC
|
Getting Started with ISPC
|
||||||
=========================
|
=========================
|
||||||
|
|||||||
@@ -2,6 +2,14 @@
|
|||||||
ispc News
|
ispc News
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
ispc 1.7.0 is Released
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
A major new version of ISPC with several language and library extensions and
|
||||||
|
fixes in debug info support. Binaries for all platforms are based on patched
|
||||||
|
version on LLVM 3.4. There also performance improvements beyond switchover to
|
||||||
|
LLVM 3.4.
|
||||||
|
|
||||||
ispc 1.6.0 is Released
|
ispc 1.6.0 is Released
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
@@ -11,7 +19,6 @@ a number of language and library extensions. Released binaries are based on
|
|||||||
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
|
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
|
||||||
to Release Notes for complete set of changes.
|
to Release Notes for complete set of changes.
|
||||||
|
|
||||||
|
|
||||||
ispc 1.5.0 is Released
|
ispc 1.5.0 is Released
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|||||||
@@ -57,7 +57,7 @@
|
|||||||
%(body)s
|
%(body)s
|
||||||
</div>
|
</div>
|
||||||
<div class="clearfix"></div>
|
<div class="clearfix"></div>
|
||||||
<div id="footer"> © 2011-2013 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
<div id="footer"> © 2011-2014 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||||
<!-- Please Do Not remove this link, thank u -->
|
<!-- Please Do Not remove this link, thank u -->
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -57,7 +57,7 @@
|
|||||||
%(body)s
|
%(body)s
|
||||||
</div>
|
</div>
|
||||||
<div class="clearfix"></div>
|
<div class="clearfix"></div>
|
||||||
<div id="footer"> © 2011-2013 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
<div id="footer"> © 2011-2014 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||||
<!-- Please Do Not remove this link, thank u -->
|
<!-- Please Do Not remove this link, thank u -->
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -57,7 +57,7 @@
|
|||||||
%(body)s
|
%(body)s
|
||||||
</div>
|
</div>
|
||||||
<div class="clearfix"></div>
|
<div class="clearfix"></div>
|
||||||
<div id="footer"> © 2011-2013 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
<div id="footer"> © 2011-2014 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||||
<!-- Please Do Not remove this link, thank u -->
|
<!-- Please Do Not remove this link, thank u -->
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
|||||||
# This could be handy for archiving the generated documentation or
|
# This could be handy for archiving the generated documentation or
|
||||||
# if some version control system is used.
|
# if some version control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = 1.6.1dev
|
PROJECT_NUMBER = 1.7.1dev
|
||||||
|
|
||||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||||
# base path where the generated documentation will be put.
|
# base path where the generated documentation will be put.
|
||||||
|
|||||||
@@ -183,7 +183,7 @@ struct TaskInfo {
|
|||||||
void *data;
|
void *data;
|
||||||
int taskIndex;
|
int taskIndex;
|
||||||
int taskCount3d[3];
|
int taskCount3d[3];
|
||||||
#if defined(ISPC_IS_WINDOWS)
|
#if defined( ISPC_USE_CONCRT)
|
||||||
event taskEvent;
|
event taskEvent;
|
||||||
#endif
|
#endif
|
||||||
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||||
@@ -1003,7 +1003,7 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
// Actually run the task.
|
// Actually run the task.
|
||||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||||
int threadIndex = ti->taskIndex;
|
int threadIndex = ti->taskIndex;
|
||||||
int threadCount = ti->taskCount;
|
int threadCount = ti->taskCount();
|
||||||
|
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
@@ -1033,7 +1033,7 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
|
|
||||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||||
int threadIndex = ti->taskIndex;
|
int threadIndex = ti->taskIndex;
|
||||||
int threadCount = ti->taskCount;
|
int threadCount = ti->taskCount();
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
|
|||||||
@@ -1,5 +1,42 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef UTIL_ISPH
|
||||||
|
#define UTIL_ISPH
|
||||||
|
|
||||||
// utility function to read the value of programCount from C/C++
|
// utility function to read the value of programCount from C/C++
|
||||||
export uniform int32 get_programCount() {
|
export uniform int32 get_programCount() {
|
||||||
return programCount;
|
return programCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif // UTIL_ISPH
|
||||||
|
|||||||
19
expr.cpp
19
expr.cpp
@@ -5143,9 +5143,18 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos,
|
|||||||
exprType->GetString().c_str());
|
exprType->GetString().c_str());
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
if (CastType<StructType>(exprType) != NULL) {
|
||||||
if (CastType<StructType>(exprType) != NULL)
|
const StructType *st = CastType<StructType>(exprType);
|
||||||
|
if (st->IsDefined()) {
|
||||||
return new StructMemberExpr(e, id, p, idpos, derefLValue);
|
return new StructMemberExpr(e, id, p, idpos, derefLValue);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Error(p, "Member operator \"%s\" can't be applied to declared "
|
||||||
|
"struct \"%s\" containing an undefined struct type.", derefLValue ? "->" : ".",
|
||||||
|
exprType->GetString().c_str());
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (CastType<VectorType>(exprType) != NULL)
|
else if (CastType<VectorType>(exprType) != NULL)
|
||||||
return new VectorMemberExpr(e, id, p, idpos, derefLValue);
|
return new VectorMemberExpr(e, id, p, idpos, derefLValue);
|
||||||
else if (CastType<UndefinedStructType>(exprType)) {
|
else if (CastType<UndefinedStructType>(exprType)) {
|
||||||
@@ -8719,6 +8728,12 @@ NewExpr::TypeCheck() {
|
|||||||
"but not defined type \"%s\".", allocType->GetString().c_str());
|
"but not defined type \"%s\".", allocType->GetString().c_str());
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
const StructType *st = CastType<StructType>(allocType);
|
||||||
|
if (st != NULL && !st->IsDefined()) {
|
||||||
|
Error(pos, "Can't dynamically allocate storage for declared "
|
||||||
|
"type \"%s\" containing undefined member type.", allocType->GetString().c_str());
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
// Otherwise we only need to make sure that if we have an expression
|
// Otherwise we only need to make sure that if we have an expression
|
||||||
// giving a number of elements to allocate that it can be converted to
|
// giving a number of elements to allocate that it can be converted to
|
||||||
|
|||||||
214
ispc.cpp
214
ispc.cpp
@@ -241,40 +241,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
|
||||||
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
|
||||||
// If we're compiling NEON on an x86 host and the CPU wasn't
|
|
||||||
// supplied, don't go and set the CPU based on the host...
|
|
||||||
cpu = "cortex-a9";
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (cpu == NULL) {
|
|
||||||
std::string hostCPU = llvm::sys::getHostCPUName();
|
|
||||||
if (hostCPU.size() > 0)
|
|
||||||
cpu = strdup(hostCPU.c_str());
|
|
||||||
else {
|
|
||||||
Warning(SourcePos(), "Unable to determine host CPU!\n");
|
|
||||||
cpu = "generic";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
bool foundCPU = false;
|
|
||||||
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
|
||||||
++i) {
|
|
||||||
if (!strcmp(cpu, supportedCPUs[i])) {
|
|
||||||
foundCPU = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (foundCPU == false) {
|
|
||||||
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
|
||||||
"%s.", cpu, SupportedCPUs().c_str());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
this->m_cpu = cpu;
|
|
||||||
|
|
||||||
if (arch == NULL) {
|
if (arch == NULL) {
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
if (!strncmp(isa, "neon", 4))
|
if (!strncmp(isa, "neon", 4))
|
||||||
@@ -311,6 +277,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_arch = arch;
|
this->m_arch = arch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char * cpuFromIsa;
|
||||||
|
|
||||||
// Check default LLVM generated targets
|
// Check default LLVM generated targets
|
||||||
if (!strcasecmp(isa, "sse2") ||
|
if (!strcasecmp(isa, "sse2") ||
|
||||||
!strcasecmp(isa, "sse2-i32x4")) {
|
!strcasecmp(isa, "sse2-i32x4")) {
|
||||||
@@ -319,15 +287,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 16;
|
this->m_nativeVectorAlignment = 16;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 4;
|
this->m_vectorWidth = 4;
|
||||||
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",-sse4.1,-sse4.2"
|
|
||||||
#else
|
|
||||||
",-sse41,-sse42"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "core2";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse2-x2") ||
|
else if (!strcasecmp(isa, "sse2-x2") ||
|
||||||
!strcasecmp(isa, "sse2-i32x8")) {
|
!strcasecmp(isa, "sse2-i32x8")) {
|
||||||
@@ -336,15 +298,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 16;
|
this->m_nativeVectorAlignment = 16;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 8;
|
this->m_vectorWidth = 8;
|
||||||
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",-sse4.1,-sse4.2"
|
|
||||||
#else
|
|
||||||
",-sse41,-sse42"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "core2";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse4") ||
|
else if (!strcasecmp(isa, "sse4") ||
|
||||||
!strcasecmp(isa, "sse4-i32x4")) {
|
!strcasecmp(isa, "sse4-i32x4")) {
|
||||||
@@ -353,16 +309,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 16;
|
this->m_nativeVectorAlignment = 16;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 4;
|
this->m_vectorWidth = 4;
|
||||||
// TODO: why not sse42 and popcnt?
|
|
||||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+sse4.1,-sse4.2"
|
|
||||||
#else
|
|
||||||
",+sse41,-sse42"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "corei7";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse4x2") ||
|
else if (!strcasecmp(isa, "sse4x2") ||
|
||||||
!strcasecmp(isa, "sse4-x2") ||
|
!strcasecmp(isa, "sse4-x2") ||
|
||||||
@@ -372,15 +321,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 16;
|
this->m_nativeVectorAlignment = 16;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 8;
|
this->m_vectorWidth = 8;
|
||||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+sse4.1,-sse4.2"
|
|
||||||
#else
|
|
||||||
",+sse41,-sse42"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "corei7";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse4-i8x16")) {
|
else if (!strcasecmp(isa, "sse4-i8x16")) {
|
||||||
this->m_isa = Target::SSE4;
|
this->m_isa = Target::SSE4;
|
||||||
@@ -388,15 +331,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 16;
|
this->m_nativeVectorAlignment = 16;
|
||||||
this->m_dataTypeWidth = 8;
|
this->m_dataTypeWidth = 8;
|
||||||
this->m_vectorWidth = 16;
|
this->m_vectorWidth = 16;
|
||||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+sse4.1,-sse4.2"
|
|
||||||
#else
|
|
||||||
",+sse41,-sse42"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 8;
|
this->m_maskBitCount = 8;
|
||||||
|
cpuFromIsa = "corei7";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse4-i16x8")) {
|
else if (!strcasecmp(isa, "sse4-i16x8")) {
|
||||||
this->m_isa = Target::SSE4;
|
this->m_isa = Target::SSE4;
|
||||||
@@ -404,15 +341,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 16;
|
this->m_nativeVectorAlignment = 16;
|
||||||
this->m_dataTypeWidth = 16;
|
this->m_dataTypeWidth = 16;
|
||||||
this->m_vectorWidth = 8;
|
this->m_vectorWidth = 8;
|
||||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+sse4.1,-sse4.2"
|
|
||||||
#else
|
|
||||||
",+sse41,-sse42"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 16;
|
this->m_maskBitCount = 16;
|
||||||
|
cpuFromIsa = "corei7";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "generic-4") ||
|
else if (!strcasecmp(isa, "generic-4") ||
|
||||||
!strcasecmp(isa, "generic-x4")) {
|
!strcasecmp(isa, "generic-x4")) {
|
||||||
@@ -452,7 +383,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_maskBitCount = 1;
|
this->m_maskBitCount = 1;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
this->m_hasTranscendentals = true;
|
this->m_hasTranscendentals = true;
|
||||||
this->m_hasTrigonometry = true;
|
// It's set to false, because stdlib implementation of math functions
|
||||||
|
// is faster on MIC, than "native" implementation profided by the
|
||||||
|
// icc compiler.
|
||||||
|
this->m_hasTrigonometry = false;
|
||||||
this->m_hasGather = this->m_hasScatter = true;
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
}
|
}
|
||||||
@@ -499,9 +433,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 4;
|
this->m_vectorWidth = 4;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "corei7-avx";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx") ||
|
else if (!strcasecmp(isa, "avx") ||
|
||||||
!strcasecmp(isa, "avx1") ||
|
!strcasecmp(isa, "avx1") ||
|
||||||
@@ -511,9 +445,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 8;
|
this->m_vectorWidth = 8;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "corei7-avx";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx-i64x4") ||
|
else if (!strcasecmp(isa, "avx-i64x4") ||
|
||||||
!strcasecmp(isa, "avx1-i64x4")) {
|
!strcasecmp(isa, "avx1-i64x4")) {
|
||||||
@@ -522,9 +456,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 64;
|
this->m_dataTypeWidth = 64;
|
||||||
this->m_vectorWidth = 4;
|
this->m_vectorWidth = 4;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 64;
|
this->m_maskBitCount = 64;
|
||||||
|
cpuFromIsa = "corei7-avx";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx-x2") ||
|
else if (!strcasecmp(isa, "avx-x2") ||
|
||||||
!strcasecmp(isa, "avx1-x2") ||
|
!strcasecmp(isa, "avx1-x2") ||
|
||||||
@@ -534,9 +468,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 16;
|
this->m_vectorWidth = 16;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
|
cpuFromIsa = "corei7-avx";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx1.1") ||
|
else if (!strcasecmp(isa, "avx1.1") ||
|
||||||
!strcasecmp(isa, "avx1.1-i32x8")) {
|
!strcasecmp(isa, "avx1.1-i32x8")) {
|
||||||
@@ -545,20 +479,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 8;
|
this->m_vectorWidth = 8;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+rdrnd"
|
|
||||||
#else
|
|
||||||
",+rdrand"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
#if !defined(LLVM_3_1)
|
|
||||||
// LLVM 3.2+ only
|
|
||||||
this->m_hasRand = true;
|
this->m_hasRand = true;
|
||||||
#endif
|
cpuFromIsa = "core-avx-i";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx1.1-x2") ||
|
else if (!strcasecmp(isa, "avx1.1-x2") ||
|
||||||
!strcasecmp(isa, "avx1.1-i32x16")) {
|
!strcasecmp(isa, "avx1.1-i32x16")) {
|
||||||
@@ -567,20 +492,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 16;
|
this->m_vectorWidth = 16;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+rdrnd"
|
|
||||||
#else
|
|
||||||
",+rdrand"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
#if !defined(LLVM_3_1)
|
|
||||||
// LLVM 3.2+ only
|
|
||||||
this->m_hasRand = true;
|
this->m_hasRand = true;
|
||||||
#endif
|
cpuFromIsa = "core-avx-i";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
|
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
|
||||||
this->m_isa = Target::AVX11;
|
this->m_isa = Target::AVX11;
|
||||||
@@ -588,20 +504,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 64;
|
this->m_dataTypeWidth = 64;
|
||||||
this->m_vectorWidth = 4;
|
this->m_vectorWidth = 4;
|
||||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+rdrnd"
|
|
||||||
#else
|
|
||||||
",+rdrand"
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 64;
|
this->m_maskBitCount = 64;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
#if !defined(LLVM_3_1)
|
|
||||||
// LLVM 3.2+ only
|
|
||||||
this->m_hasRand = true;
|
this->m_hasRand = true;
|
||||||
#endif
|
cpuFromIsa = "core-avx-i";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx2") ||
|
else if (!strcasecmp(isa, "avx2") ||
|
||||||
!strcasecmp(isa, "avx2-i32x8")) {
|
!strcasecmp(isa, "avx2-i32x8")) {
|
||||||
@@ -610,24 +517,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 8;
|
this->m_vectorWidth = 8;
|
||||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+rdrnd"
|
|
||||||
#else
|
|
||||||
",+rdrand"
|
|
||||||
#endif
|
|
||||||
#ifndef LLVM_3_1
|
|
||||||
",+fma"
|
|
||||||
#endif // !LLVM_3_1
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
#if !defined(LLVM_3_1)
|
|
||||||
// LLVM 3.2+ only
|
|
||||||
this->m_hasRand = true;
|
this->m_hasRand = true;
|
||||||
this->m_hasGather = true;
|
this->m_hasGather = true;
|
||||||
#endif
|
cpuFromIsa = "core-avx2";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx2-x2") ||
|
else if (!strcasecmp(isa, "avx2-x2") ||
|
||||||
!strcasecmp(isa, "avx2-i32x16")) {
|
!strcasecmp(isa, "avx2-i32x16")) {
|
||||||
@@ -636,24 +531,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 32;
|
this->m_dataTypeWidth = 32;
|
||||||
this->m_vectorWidth = 16;
|
this->m_vectorWidth = 16;
|
||||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+rdrnd"
|
|
||||||
#else
|
|
||||||
",+rdrand"
|
|
||||||
#endif
|
|
||||||
#ifndef LLVM_3_1
|
|
||||||
",+fma"
|
|
||||||
#endif // !LLVM_3_1
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
#if !defined(LLVM_3_1)
|
|
||||||
// LLVM 3.2+ only
|
|
||||||
this->m_hasRand = true;
|
this->m_hasRand = true;
|
||||||
this->m_hasGather = true;
|
this->m_hasGather = true;
|
||||||
#endif
|
cpuFromIsa = "core-avx2";
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx2-i64x4")) {
|
else if (!strcasecmp(isa, "avx2-i64x4")) {
|
||||||
this->m_isa = Target::AVX2;
|
this->m_isa = Target::AVX2;
|
||||||
@@ -661,24 +544,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_dataTypeWidth = 64;
|
this->m_dataTypeWidth = 64;
|
||||||
this->m_vectorWidth = 4;
|
this->m_vectorWidth = 4;
|
||||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
|
||||||
#if defined(LLVM_3_4) || defined(LLVM_3_5)
|
|
||||||
",+rdrnd"
|
|
||||||
#else
|
|
||||||
",+rdrand"
|
|
||||||
#endif
|
|
||||||
#ifndef LLVM_3_1
|
|
||||||
",+fma"
|
|
||||||
#endif // !LLVM_3_1
|
|
||||||
;
|
|
||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 64;
|
this->m_maskBitCount = 64;
|
||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
#if !defined(LLVM_3_1)
|
|
||||||
// LLVM 3.2+ only
|
|
||||||
this->m_hasRand = true;
|
this->m_hasRand = true;
|
||||||
this->m_hasGather = true;
|
this->m_hasGather = true;
|
||||||
#endif
|
cpuFromIsa = "core-avx2";
|
||||||
}
|
}
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||||
@@ -736,6 +607,47 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
error = true;
|
error = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||||
|
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
||||||
|
|
||||||
|
cpu = "cortex-a9";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (cpu == NULL) {
|
||||||
|
#ifndef ISPC_ARM_ENABLED
|
||||||
|
if (isa == NULL) {
|
||||||
|
#endif
|
||||||
|
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||||
|
if (hostCPU.size() > 0)
|
||||||
|
cpu = strdup(hostCPU.c_str());
|
||||||
|
else {
|
||||||
|
Warning(SourcePos(), "Unable to determine host CPU!\n");
|
||||||
|
cpu = "generic";
|
||||||
|
}
|
||||||
|
#ifndef ISPC_ARM_ENABLED
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cpu = cpuFromIsa;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bool foundCPU = false;
|
||||||
|
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
||||||
|
++i) {
|
||||||
|
if (!strcmp(cpu, supportedCPUs[i])) {
|
||||||
|
foundCPU = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (foundCPU == false) {
|
||||||
|
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
||||||
|
"%s.", cpu, SupportedCPUs().c_str());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this->m_cpu = cpu;
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
// Create TargetMachine
|
// Create TargetMachine
|
||||||
std::string triple = GetTripleString();
|
std::string triple = GetTripleString();
|
||||||
|
|||||||
2
ispc.h
2
ispc.h
@@ -38,7 +38,7 @@
|
|||||||
#ifndef ISPC_H
|
#ifndef ISPC_H
|
||||||
#define ISPC_H
|
#define ISPC_H
|
||||||
|
|
||||||
#define ISPC_VERSION "1.6.1dev"
|
#define ISPC_VERSION "1.7.1dev"
|
||||||
|
|
||||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
||||||
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"
|
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"
|
||||||
|
|||||||
@@ -105,7 +105,6 @@
|
|||||||
<ClInclude Include="sym.h" />
|
<ClInclude Include="sym.h" />
|
||||||
<ClInclude Include="type.h" />
|
<ClInclude Include="type.h" />
|
||||||
<ClInclude Include="util.h" />
|
<ClInclude Include="util.h" />
|
||||||
<ClInclude Include="winstuff\unistd.h" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="stdlib.ispc">
|
<CustomBuild Include="stdlib.ispc">
|
||||||
@@ -396,7 +395,7 @@
|
|||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<PreprocessorDefinitions>NOMINMAX;%LLVM_VERSION%</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NOMINMAX;%LLVM_VERSION%</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624;4244</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4146;4800;4996;4355;4624;4244</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
@@ -415,7 +414,7 @@
|
|||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
<PreprocessorDefinitions>NOMINMAX;%LLVM_VERSION%</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NOMINMAX;%LLVM_VERSION%</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624;4244</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4146;4800;4996;4355;4624;4244</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
|
|||||||
54
llvmutil.cpp
54
llvmutil.cpp
@@ -818,7 +818,8 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
|
|||||||
|
|
||||||
static bool
|
static bool
|
||||||
lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||||
std::vector<llvm::PHINode *> &seenPhis);
|
std::vector<llvm::PHINode *> &seenPhis,
|
||||||
|
llvm::Value **splatValue = NULL);
|
||||||
|
|
||||||
|
|
||||||
/** This function checks to see if the given (scalar or vector) value is an
|
/** This function checks to see if the given (scalar or vector) value is an
|
||||||
@@ -1068,20 +1069,37 @@ lVectorShiftRightAllEqual(llvm::Value *val, llvm::Value *shift,
|
|||||||
|
|
||||||
static bool
|
static bool
|
||||||
lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||||
std::vector<llvm::PHINode *> &seenPhis) {
|
std::vector<llvm::PHINode *> &seenPhis,
|
||||||
|
llvm::Value **splatValue) {
|
||||||
if (vectorLength == 1)
|
if (vectorLength == 1)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (llvm::isa<llvm::ConstantAggregateZero>(v))
|
if (llvm::isa<llvm::ConstantAggregateZero>(v)) {
|
||||||
|
if (splatValue) {
|
||||||
|
llvm::ConstantAggregateZero *caz =
|
||||||
|
llvm::dyn_cast<llvm::ConstantAggregateZero>(v);
|
||||||
|
*splatValue = caz->getSequentialElement();
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
||||||
if (cv != NULL)
|
if (cv != NULL) {
|
||||||
return (cv->getSplatValue() != NULL);
|
llvm::Value* splat = cv->getSplatValue();
|
||||||
|
if (splat != NULL && splatValue) {
|
||||||
|
*splatValue = splat;
|
||||||
|
}
|
||||||
|
return (splat != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||||
if (cdv != NULL)
|
if (cdv != NULL) {
|
||||||
return (cdv->getSplatValue() != NULL);
|
llvm::Value* splat = cdv->getSplatValue();
|
||||||
|
if (splat != NULL && splatValue) {
|
||||||
|
*splatValue = splat;
|
||||||
|
}
|
||||||
|
return (splat != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
|
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
|
||||||
if (bop != NULL) {
|
if (bop != NULL) {
|
||||||
@@ -1178,14 +1196,14 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
|||||||
where the values are actually all equal.
|
where the values are actually all equal.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
LLVMVectorValuesAllEqual(llvm::Value *v) {
|
LLVMVectorValuesAllEqual(llvm::Value *v, llvm::Value **splat) {
|
||||||
llvm::VectorType *vt =
|
llvm::VectorType *vt =
|
||||||
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
||||||
Assert(vt != NULL);
|
Assert(vt != NULL);
|
||||||
int vectorLength = vt->getNumElements();
|
int vectorLength = vt->getNumElements();
|
||||||
|
|
||||||
std::vector<llvm::PHINode *> seenPhis;
|
std::vector<llvm::PHINode *> seenPhis;
|
||||||
bool equal = lVectorValuesAllEqual(v, vectorLength, seenPhis);
|
bool equal = lVectorValuesAllEqual(v, vectorLength, seenPhis, splat);
|
||||||
|
|
||||||
Debug(SourcePos(), "LLVMVectorValuesAllEqual(%s) -> %s.",
|
Debug(SourcePos(), "LLVMVectorValuesAllEqual(%s) -> %s.",
|
||||||
v->getName().str().c_str(), equal ? "true" : "false");
|
v->getName().str().c_str(), equal ? "true" : "false");
|
||||||
@@ -1551,6 +1569,8 @@ lExtractFirstVectorElement(llvm::Value *v,
|
|||||||
phiMap);
|
phiMap);
|
||||||
llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1),
|
llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1),
|
||||||
phiMap);
|
phiMap);
|
||||||
|
Assert(v0 != NULL);
|
||||||
|
Assert(v1 != NULL);
|
||||||
// Note that the new binary operator is inserted immediately before
|
// Note that the new binary operator is inserted immediately before
|
||||||
// the previous vector one
|
// the previous vector one
|
||||||
return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1,
|
return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1,
|
||||||
@@ -1597,10 +1617,22 @@ lExtractFirstVectorElement(llvm::Value *v,
|
|||||||
return scalarPhi;
|
return scalarPhi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We should consider "shuffle" case and "insertElement" case separately.
|
||||||
|
// For example we can have shuffle(mul, undef, zero) but function
|
||||||
|
// "LLVMFlattenInsertChain" can handle only case shuffle(insertElement, undef, zero).
|
||||||
|
// Also if we have insertElement under shuffle we will handle it the next call of
|
||||||
|
// "lExtractFirstVectorElement" function.
|
||||||
|
if (llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||||
|
llvm::ShuffleVectorInst *shuf = llvm::dyn_cast<llvm::ShuffleVectorInst>(v);
|
||||||
|
llvm::Value *indices = shuf->getOperand(2);
|
||||||
|
if (llvm::isa<llvm::ConstantAggregateZero>(indices)) {
|
||||||
|
return lExtractFirstVectorElement(shuf->getOperand(0), phiMap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If we have a chain of insertelement instructions, then we can just
|
// If we have a chain of insertelement instructions, then we can just
|
||||||
// flatten them out and grab the value for the first one.
|
// flatten them out and grab the value for the first one.
|
||||||
if (llvm::isa<llvm::InsertElementInst>(v) ||
|
if (llvm::isa<llvm::InsertElementInst>(v)) {
|
||||||
llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
|
||||||
return LLVMFlattenInsertChain(v, vt->getNumElements(), false);
|
return LLVMFlattenInsertChain(v, vt->getNumElements(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -228,7 +228,8 @@ extern llvm::Constant *LLVMMaskAllOff;
|
|||||||
/** Tests to see if all of the elements of the vector in the 'v' parameter
|
/** Tests to see if all of the elements of the vector in the 'v' parameter
|
||||||
are equal. Like lValuesAreEqual(), this is a conservative test and may
|
are equal. Like lValuesAreEqual(), this is a conservative test and may
|
||||||
return false for arrays where the values are actually all equal. */
|
return false for arrays where the values are actually all equal. */
|
||||||
extern bool LLVMVectorValuesAllEqual(llvm::Value *v);
|
extern bool LLVMVectorValuesAllEqual(llvm::Value *v,
|
||||||
|
llvm::Value **splat = NULL);
|
||||||
|
|
||||||
/** Given vector of integer-typed values, this function returns true if it
|
/** Given vector of integer-typed values, this function returns true if it
|
||||||
can determine that the elements of the vector have a step of 'stride'
|
can determine that the elements of the vector have a step of 'stride'
|
||||||
|
|||||||
2
opt.cpp
2
opt.cpp
@@ -541,8 +541,6 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createScalarReplAggregatesPass());
|
optPM.add(llvm::createScalarReplAggregatesPass());
|
||||||
optPM.add(llvm::createEarlyCSEPass());
|
optPM.add(llvm::createEarlyCSEPass());
|
||||||
optPM.add(llvm::createLowerExpectIntrinsicPass());
|
optPM.add(llvm::createLowerExpectIntrinsicPass());
|
||||||
optPM.add(llvm::createTypeBasedAliasAnalysisPass());
|
|
||||||
optPM.add(llvm::createBasicAliasAnalysisPass());
|
|
||||||
|
|
||||||
// Early optimizations to try to reduce the total amount of code to
|
// Early optimizations to try to reduce the total amount of code to
|
||||||
// work with if we can
|
// work with if we can
|
||||||
|
|||||||
24
run_tests.py
24
run_tests.py
@@ -60,9 +60,14 @@ def run_command(cmd):
|
|||||||
lexer.escape = ''
|
lexer.escape = ''
|
||||||
arg_list = list(lexer)
|
arg_list = list(lexer)
|
||||||
|
|
||||||
sp = subprocess.Popen(arg_list, stdin=None,
|
try:
|
||||||
|
sp = subprocess.Popen(arg_list, stdin=None,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
|
except:
|
||||||
|
print_debug("ERROR: The child (%s) raised an esception: %s\n" % (cmd, sys.exc_info()[1]), s, run_tests_log)
|
||||||
|
raise
|
||||||
|
|
||||||
out = sp.communicate()
|
out = sp.communicate()
|
||||||
output = ""
|
output = ""
|
||||||
output += out[0].decode("utf-8")
|
output += out[0].decode("utf-8")
|
||||||
@@ -213,7 +218,7 @@ def run_test(testname):
|
|||||||
obj_name = "%s.obj" % os.path.basename(filename)
|
obj_name = "%s.obj" % os.path.basename(filename)
|
||||||
exe_name = "%s.exe" % os.path.basename(filename)
|
exe_name = "%s.exe" % os.path.basename(filename)
|
||||||
|
|
||||||
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
|
cc_cmd = "%s /I. /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
|
||||||
(options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name)
|
(options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name)
|
||||||
if should_fail:
|
if should_fail:
|
||||||
cc_cmd += " /DEXPECT_FAILURE"
|
cc_cmd += " /DEXPECT_FAILURE"
|
||||||
@@ -355,7 +360,11 @@ def run_tasks_from_queue(queue, queue_ret, queue_skip, total_tests_arg, max_test
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
if check_test(filename):
|
if check_test(filename):
|
||||||
(compile_error, run_error) = run_test(filename)
|
try:
|
||||||
|
(compile_error, run_error) = run_test(filename)
|
||||||
|
except:
|
||||||
|
sys.exit(-1) # This is in case the child has unexpectedly died
|
||||||
|
|
||||||
if compile_error != 0:
|
if compile_error != 0:
|
||||||
compile_error_files += [ filename ]
|
compile_error_files += [ filename ]
|
||||||
if run_error != 0:
|
if run_error != 0:
|
||||||
@@ -680,8 +689,9 @@ def run_tests(options1, args, print_version):
|
|||||||
task_threads = [0] * nthreads
|
task_threads = [0] * nthreads
|
||||||
for x in range(nthreads):
|
for x in range(nthreads):
|
||||||
task_threads[x] = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests,
|
task_threads[x] = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests,
|
||||||
max_test_length, finished_tests_counter, finished_tests_counter_lock, glob_var))
|
max_test_length, finished_tests_counter, finished_tests_counter_lock, glob_var))
|
||||||
task_threads[x].start()
|
task_threads[x].start()
|
||||||
|
|
||||||
# wait for them to all finish and then return the number that failed
|
# wait for them to all finish and then return the number that failed
|
||||||
# (i.e. return 0 if all is ok)
|
# (i.e. return 0 if all is ok)
|
||||||
for t in task_threads:
|
for t in task_threads:
|
||||||
@@ -689,6 +699,12 @@ def run_tests(options1, args, print_version):
|
|||||||
if options.non_interactive == False:
|
if options.non_interactive == False:
|
||||||
print_debug("\n", s, run_tests_log)
|
print_debug("\n", s, run_tests_log)
|
||||||
|
|
||||||
|
|
||||||
|
for jb in task_threads:
|
||||||
|
if not jb.exitcode == 0:
|
||||||
|
raise OSError(2, 'Some test subprocess has thrown an exception', '')
|
||||||
|
|
||||||
|
|
||||||
temp_time = (time.time() - start_time)
|
temp_time = (time.time() - start_time)
|
||||||
elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time))
|
elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time))
|
||||||
|
|
||||||
|
|||||||
25
type.cpp
25
type.cpp
@@ -826,7 +826,7 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
|
|
||||||
llvm::DIFile diFile = pos.GetDIFile();
|
llvm::DIFile diFile = pos.GetDIFile();
|
||||||
llvm::DIType diType =
|
llvm::DIType diType =
|
||||||
m->diBuilder->createEnumerationType(scope, name, diFile, pos.first_line,
|
m->diBuilder->createEnumerationType(diFile, name, diFile, pos.first_line,
|
||||||
32 /* size in bits */,
|
32 /* size in bits */,
|
||||||
32 /* align in bits */,
|
32 /* align in bits */,
|
||||||
elementArray
|
elementArray
|
||||||
@@ -1956,6 +1956,25 @@ StructType::IsConstType() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
StructType::IsDefined() const {
|
||||||
|
for (int i = 0; i < GetElementCount(); i++) {
|
||||||
|
const Type *t = GetElementType(i);
|
||||||
|
const UndefinedStructType *ust = CastType<UndefinedStructType>(t);
|
||||||
|
if (ust != NULL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const StructType *st = CastType<StructType>(t);
|
||||||
|
if (st != NULL) {
|
||||||
|
if (!st->IsDefined()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const Type *
|
const Type *
|
||||||
StructType::GetBaseType() const {
|
StructType::GetBaseType() const {
|
||||||
return this;
|
return this;
|
||||||
@@ -2179,7 +2198,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes);
|
llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes);
|
||||||
llvm::DIFile diFile = pos.GetDIFile();
|
llvm::DIFile diFile = pos.GetDIFile();
|
||||||
return m->diBuilder->createStructType(
|
return m->diBuilder->createStructType(
|
||||||
scope,
|
diFile,
|
||||||
name,
|
name,
|
||||||
diFile,
|
diFile,
|
||||||
pos.first_line, // Line number
|
pos.first_line, // Line number
|
||||||
@@ -2422,7 +2441,7 @@ UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
llvm::DIFile diFile = pos.GetDIFile();
|
llvm::DIFile diFile = pos.GetDIFile();
|
||||||
llvm::DIArray elements;
|
llvm::DIArray elements;
|
||||||
return m->diBuilder->createStructType(
|
return m->diBuilder->createStructType(
|
||||||
scope,
|
diFile,
|
||||||
name,
|
name,
|
||||||
diFile,
|
diFile,
|
||||||
pos.first_line, // Line number
|
pos.first_line, // Line number
|
||||||
|
|||||||
19
type.h
19
type.h
@@ -81,15 +81,15 @@ struct Variability {
|
|||||||
/** Enumerant that records each of the types that inherit from the Type
|
/** Enumerant that records each of the types that inherit from the Type
|
||||||
baseclass. */
|
baseclass. */
|
||||||
enum TypeId {
|
enum TypeId {
|
||||||
ATOMIC_TYPE,
|
ATOMIC_TYPE, // 0
|
||||||
ENUM_TYPE,
|
ENUM_TYPE, // 1
|
||||||
POINTER_TYPE,
|
POINTER_TYPE, // 2
|
||||||
ARRAY_TYPE,
|
ARRAY_TYPE, // 3
|
||||||
VECTOR_TYPE,
|
VECTOR_TYPE, // 4
|
||||||
STRUCT_TYPE,
|
STRUCT_TYPE, // 5
|
||||||
UNDEFINED_STRUCT_TYPE,
|
UNDEFINED_STRUCT_TYPE, // 6
|
||||||
REFERENCE_TYPE,
|
REFERENCE_TYPE, // 7
|
||||||
FUNCTION_TYPE
|
FUNCTION_TYPE // 8
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -675,6 +675,7 @@ public:
|
|||||||
bool IsIntType() const;
|
bool IsIntType() const;
|
||||||
bool IsUnsignedType() const;
|
bool IsUnsignedType() const;
|
||||||
bool IsConstType() const;
|
bool IsConstType() const;
|
||||||
|
bool IsDefined() const;
|
||||||
|
|
||||||
const Type *GetBaseType() const;
|
const Type *GetBaseType() const;
|
||||||
const StructType *GetAsVaryingType() const;
|
const StructType *GetAsVaryingType() const;
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
#ifndef MY_STDINT_H
|
|
||||||
#define MY_STDINT_H 1
|
|
||||||
|
|
||||||
typedef signed char int8_t;
|
|
||||||
typedef signed __int16 int16_t;
|
|
||||||
typedef signed __int32 int32_t;
|
|
||||||
typedef signed __int64 int64_t;
|
|
||||||
typedef unsigned char uint8_t;
|
|
||||||
|
|
||||||
typedef unsigned __int16 uint16_t;
|
|
||||||
typedef unsigned __int32 uint32_t;
|
|
||||||
typedef unsigned __int64 uint64_t;
|
|
||||||
|
|
||||||
#endif // MY_STDINT_H
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#ifndef MY_UNISTD_H
|
|
||||||
#define MY_UNISTD_H 1
|
|
||||||
|
|
||||||
inline bool isatty(int) { return false; }
|
|
||||||
|
|
||||||
#endif // MY_UNISTD_H
|
|
||||||
Reference in New Issue
Block a user