39 Commits

Author SHA1 Message Date
4182fa2967 python regex-based preprocessor proof of concept 2017-04-18 22:28:48 -04:00
d6cf38a929 Ignore llvm build directory 2017-04-11 14:08:00 -04:00
Dmitry Babokin
8c97883317 Merge pull request #1264 from dbabokin/attributelist
Renaming AttributeSet to AttributeList to follow trunk changes.
2017-03-28 17:02:28 -07:00
Dmitry Babokin
455a29c491 Renaming AttributeSet to AttributeList to follow trunk changes. 2017-03-28 16:58:49 -07:00
Dmitry Babokin
a618ad45bf Merge pull request #1263 from dbabokin/cbackend
Better fix for cbackend.
2017-03-22 13:29:06 -07:00
Dmitry Babokin
0ff8ae4596 Better fix cbackend. 2017-03-22 13:27:26 -07:00
Dmitry Babokin
a5b689439b Merge pull request #1262 from dbabokin/trunk_fix
Trunk fix
2017-03-22 12:33:39 -07:00
Dmitry Babokin
f9947541a1 Whitespace fixes 2017-03-22 12:32:26 -07:00
Dmitry Babokin
c2b2b38081 Fix for trunk. Probably it's temporary if they fix -- operator for arg_iterator in trunk. 2017-03-22 12:31:41 -07:00
Dmitry Babokin
7884c7da04 Merge pull request #1260 from dbabokin/alloy
For naming folders with llvm use dot instead of underscore.
2017-03-02 13:25:21 -08:00
Dmitry Babokin
b471e97a10 For naming folders with llvm use dot instead of underscore. 2017-03-02 13:24:15 -08:00
Dmitry Babokin
611fe0bc42 Merge pull request #1259 from dbabokin/llvm50
Enabling LLVM 5.0 and making fixes to track changes in LLVM for the past couple months.
2017-03-01 11:39:53 -08:00
Dmitry Babokin
6d649e1dff Enabling LLVM 5.0 and making fixes to track changes in LLVM for the past
couple months.
The changes are tested with LLVM 3.9, 4.0 and trunk on MacOS (sse4,
avx2, skx).
2017-03-01 11:10:34 -08:00
Dmitry Babokin
d0bfe7738a Merge pull request #1245 from dbabokin/git
Adding support for git repository instead of svn.
2016-12-01 22:11:58 +03:00
Dmitry Babokin
95d33554db Merge pull request #1244 from dbabokin/trunk_fix
Fix for trunk - change in DIBuilder interface
2016-12-01 22:10:45 +03:00
Dmitry Babokin
4298e3d0cd Fix for trunk - change in DIBuilder interface 2016-12-01 22:00:36 +03:00
Dmitry Babokin
a7fd70fa21 Adding support for using git repository instead of svn.
This is experimental for now, but going forward this will become
primary way of working with LLVM, as they are going to switch to
git in not too distant future.
2016-12-01 18:10:57 +03:00
Dmitry Babokin
60dc47e0a6 Merge pull request #1242 from dbabokin/fixes
SVML support for AVX512 and a couple of script fixes
2016-12-01 00:33:34 +03:00
Dmitry Babokin
ff298f21b7 Adding SVML support to AVX512 targets 2016-11-30 05:27:10 +03:00
Dmitry Babokin
f04a04a7e3 Set sysroot for CMake build on MacOS 2016-11-29 21:04:46 +03:00
Dmitry Babokin
39e7f0c2d4 3.9.0 is better choice for us. 3.9.1 has couple regressions 2016-11-29 21:03:53 +03:00
Dmitry Babokin
726b260cd5 Merge pull request #1236 from suluke/llvm_change/BitcodeWriter
Support llvm 4.0: Bitcode/ReaderWriter.h -> BitCode/BitcodeWriter.h
2016-11-15 02:02:54 +03:00
Lukas Böhm
6a8ce4b412 Apply Bitcode/ReaderWriter renaming in builtins.cpp
This also fixes usage of parseBitcodeFile after [r286752](https://reviews.llvm.org/D26562)
2016-11-14 23:13:08 +01:00
Lukas Böhm
32626ea9e3 Support llvm 4.0: Bitcode/ReaderWriter.h -> BitCode/BitcodeWriter.h 2016-11-14 21:38:25 +01:00
Dmitry Babokin
d4a8afd6e8 Merge pull request #1230 from Shishpan/trunkFix
Trunk fix for Rev.283004
2016-10-05 14:29:21 +03:00
Andrey Shishpanov
8acfd92f92 Trunk fix for Rev.283004 2016-10-05 14:17:14 +03:00
Dmitry Babokin
7fb4188f51 Merge pull request #1229 from Shishpan/trunkFix
Trunk fix for Rev.281284-281285.
2016-09-26 20:47:23 +03:00
Andrey Shishpanov
8b525bb8bc Trunk fix for Rev.281284-281285. 2016-09-26 20:24:36 +03:00
Dmitry Babokin
a86a16600b Merge pull request #1228 from Shishpan/trunkFix
Trunk fix for Rev.280686.
2016-09-07 14:11:37 +03:00
Andrey Shishpanov
d0341754d6 Trunk fix for Rev.280686. 2016-09-07 13:08:04 +03:00
Dmitry Babokin
f968bc1b2a Merge pull request #1227 from ned14/arm-neon-code-quality-fix
Fix ARM NEON output not always being inlined. Also improved scope for ARM NEON optimisation by LLVM, gained about 2% on my code here.
2016-09-05 18:07:46 +03:00
Niall Douglas (s [underscore] sourceforge {at} nedprod [dot] com)
7af7659ac2 Fix ARM NEON output not always being inlined. Also improved scope for ARM NEON optimisation by LLVM, gained about 2% on my code here. 2016-09-05 15:56:25 +01:00
Dmitry Babokin
a6952fd651 Merge pull request #1226 from dbabokin/test-fix
Fixing off by 1 access to local array.
2016-08-31 19:47:51 +03:00
Dmitry Babokin
4c7fb35f57 Fixing off by 1 access to local array. 2016-08-31 19:38:33 +03:00
Dmitry Babokin
87efb27dc5 Merge pull request #1225 from dbabokin/llvm40
Adding support for LLVM 4.0 (trunk)
2016-07-20 22:19:56 +03:00
Dmitry Babokin
45b306480e -Adding support for LLVM 4.0
-Switching 3.9 support to branch/release_39
-Switching 3.8 support to tags/release_381
2016-07-20 22:16:50 +03:00
Dmitry Babokin
2a68fc6c48 Merge pull request #1222 from dbabokin/192dev
Bumping version to 1.9.2dev
2016-07-08 22:03:49 +03:00
Dmitry Babokin
30d88e1683 Bumping version to 1.9.2dev 2016-07-08 21:44:59 +03:00
Dmitry Babokin
a97a69c96e Typo in Release Notes 2016-07-08 19:52:22 +03:00
30 changed files with 959 additions and 318 deletions

4
.gitignore vendored
View File

@@ -4,11 +4,15 @@ depend
ispc
ispc_test
ispc_ref
llvm/
objs
docs/doxygen
docs/*.html
tests*/*cpp
tests*/*run
tests*/*.o
tests_ispcpp/*.h
tests_ispcpp/*pre*
logs/
notify_log.log
alloy_results_*

165
alloy.py
View File

@@ -33,6 +33,8 @@
# // Author: Filippov Ilia
import re
def tail_and_save(file_in, file_out, tail = 100):
with open(file_in, 'r') as f_in:
lines = f_in.readlines()[-tail:]
@@ -91,6 +93,7 @@ def check_LLVM(which_LLVM):
return answer
def try_do_LLVM(text, command, from_validation):
print_debug("Command line: "+command+"\n", True, alloy_build)
if from_validation == True:
text = text + "\n"
print_debug("Trying to " + text, from_validation, alloy_build)
@@ -108,7 +111,77 @@ def try_do_LLVM(text, command, from_validation):
error("can't " + text, 1)
print_debug("DONE.\n", from_validation, alloy_build)
def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, from_validation, force, make, gcc_toolchain_path):
def checkout_LLVM(component, use_git, version_LLVM, revision, target_dir, from_validation):
# Identify the component
GIT_REPO_BASE="http://llvm.org/git/"
#GIT_REPO_BASE="https://github.com/llvm-mirror/"
if component == "llvm":
SVN_REPO="http://llvm.org/svn/llvm-project/llvm/"
GIT_REPO=GIT_REPO_BASE+"llvm.git"
elif component == "clang":
SVN_REPO="http://llvm.org/svn/llvm-project/cfe/"
GIT_REPO=GIT_REPO_BASE+"clang.git"
elif component == "libcxx":
SVN_REPO="http://llvm.org/svn/llvm-project/libcxx/"
GIT_REPO=GIT_REPO_BASE+"libcxx.git"
elif component == "clang-tools-extra":
SVN_REPO="http://llvm.org/svn/llvm-project/clang-tools-extra/"
GIT_REPO=GIT_REPO_BASE+"clang-tools-extra.git"
elif component == "compiler-rt":
SVN_REPO="http://llvm.org/svn/llvm-project/compiler-rt/"
GIT_REPO=GIT_REPO_BASE+"compiler-rt.git"
else:
error("Trying to checkout unidentified component: " + component, 1)
# Identify the version
if version_LLVM == "trunk":
SVN_PATH="trunk"
GIT_BRANCH="master"
elif version_LLVM == "4_0":
SVN_PATH="branches/release_40"
GIT_BRANCH="release_40"
elif version_LLVM == "3_9":
SVN_PATH="tags/RELEASE_390/final"
GIT_BRANCH="release_39"
elif version_LLVM == "3_8":
SVN_PATH="tags/RELEASE_381/final"
GIT_BRANCH="release_38"
elif version_LLVM == "3_7":
SVN_PATH="tags/RELEASE_370/final"
GIT_BRANCH="release_37"
elif version_LLVM == "3_6":
SVN_PATH="tags/RELEASE_362/final"
GIT_BRANCH="release_36"
elif version_LLVM == "3_5":
SVN_PATH="tags/RELEASE_351/final"
GIT_BRANCH="release_35"
elif version_LLVM == "3_4":
SVN_PATH="tags/RELEASE_34/dot2-final"
GIT_BRANCH="release_34"
elif version_LLVM == "3_3":
SVN_PATH="tags/RELEASE_33/final"
GIT_BRANCH="release_33"
elif version_LLVM == "3_2":
SVN_PATH="tags/RELEASE_32/final"
GIT_BRANCH="release_32"
else:
error("Unsupported llvm version: " + version_LLVM, 1)
if use_git:
try_do_LLVM("clone "+component+" from "+GIT_REPO+" to "+target_dir+" ",
"git clone "+GIT_REPO+" "+target_dir,
from_validation)
if GIT_BRANCH != "master":
os.chdir(target_dir)
try_do_LLVM("switch to "+GIT_BRANCH+" branch ",
"git checkout -b "+GIT_BRANCH+" remotes/origin/"+GIT_BRANCH, from_validation)
os.chdir("..")
else:
try_do_LLVM("load "+component+" from "+SVN_REPO+SVN_PATH+" ",
"svn co "+revision+" "+SVN_REPO+SVN_PATH+" "+target_dir,
from_validation)
def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, from_validation, force, make, gcc_toolchain_path, use_git):
print_debug("Building LLVM. Version: " + version_LLVM + ". ", from_validation, alloy_build)
if revision != "":
print_debug("Revision: " + revision + ".\n", from_validation, alloy_build)
@@ -119,32 +192,11 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
llvm_home = os.environ["LLVM_HOME"]
make_sure_dir_exists(llvm_home)
FOLDER_NAME=version_LLVM
version_LLVM = re.sub('\.', '_', version_LLVM)
os.chdir(llvm_home)
FOLDER_NAME=version_LLVM
if version_LLVM == "trunk":
SVN_PATH="trunk"
if version_LLVM == "3.8":
SVN_PATH="tags/RELEASE_380/final"
version_LLVM = "3_8"
if version_LLVM == "3.7":
SVN_PATH="tags/RELEASE_370/final"
version_LLVM = "3_7"
if version_LLVM == "3.6":
SVN_PATH="tags/RELEASE_362/final"
version_LLVM = "3_6"
if version_LLVM == "3.5":
SVN_PATH="tags/RELEASE_351/final"
version_LLVM = "3_5"
if version_LLVM == "3.4":
SVN_PATH="tags/RELEASE_34/dot2-final"
version_LLVM = "3_4"
if version_LLVM == "3.3":
SVN_PATH="tags/RELEASE_33/final"
version_LLVM = "3_3"
if version_LLVM == "3.2":
SVN_PATH="tags/RELEASE_32/final"
version_LLVM = "3_2"
if revision != "":
FOLDER_NAME = FOLDER_NAME + "_" + revision
revision = "-" + revision
@@ -173,7 +225,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
if os.path.exists(os.path.join(path, "xcrun")):
found_xcrun = True
if found_xcrun:
mac_system_root = " --with-default-sysroot=`xcrun --show-sdk-path`"
mac_system_root = "`xcrun --show-sdk-path`"
else:
error("Can't find XCode (xcrun tool) - it's required on MacOS 10.9 and newer", 1)
@@ -184,13 +236,9 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
llvm_home + "\n", from_validation, alloy_build)
# load llvm
if tarball == "":
try_do_LLVM("load LLVM from http://llvm.org/svn/llvm-project/llvm/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/llvm/" + SVN_PATH + " " + LLVM_SRC,
from_validation)
checkout_LLVM("llvm", options.use_git, version_LLVM, revision, LLVM_SRC, from_validation)
os.chdir(LLVM_SRC + "/tools")
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
from_validation)
checkout_LLVM("clang", options.use_git, version_LLVM, revision, "clang", from_validation)
os.chdir("..")
if current_OS == "MacOS" and int(current_OS_version.split(".")[0]) >= 13:
# Starting with MacOS 10.9 Maverics, the system doesn't contain headers for standard C++ library and
@@ -202,19 +250,13 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
# to the linker explicitly (either through command line or environment variables). So we are not doing it
# currently to make the build process easier.
os.chdir("projects")
try_do_LLVM("load libcxx http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " libcxx",
from_validation)
checkout_LLVM("libcxx", options.use_git, version_LLVM, revision, "libcxx", from_validation)
os.chdir("..")
if extra == True:
os.chdir("tools/clang/tools")
try_do_LLVM("load extra clang extra tools ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
from_validation)
checkout_LLVM("clang-tools-extra", options.use_git, version_LLVM, revision, "extra", from_validation)
os.chdir("../../../projects")
try_do_LLVM("load extra clang compiler-rt ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
from_validation)
checkout_LLVM("compiler-rt", options.use_git, version_LLVM, revision, "compiler-rt", from_validation)
os.chdir("..")
else:
tar = tarball.split(" ")
@@ -249,7 +291,6 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
os.makedirs(LLVM_BIN_selfbuild)
os.chdir(LLVM_BUILD_selfbuild)
if version_LLVM not in LLVM_configure_capable:
# TODO: mac_root
try_do_LLVM("configure release version for selfbuild ",
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
" -DCMAKE_INSTALL_PREFIX=" + llvm_home + "/" + LLVM_BIN_selfbuild +
@@ -258,6 +299,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
((" -DGCC_INSTALL_PREFIX=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
((" -DCMAKE_C_COMPILER=" + gcc_toolchain_path+"/bin/gcc") if gcc_toolchain_path != "" else "") +
((" -DCMAKE_CXX_COMPILER=" + gcc_toolchain_path+"/bin/g++") if gcc_toolchain_path != "" else "") +
((" -DDEFAULT_SYSROOT=" + mac_system_root) if mac_system_root != "" else "") +
" -DLLVM_TARGETS_TO_BUILD=NVPTX\;X86" +
" ../" + LLVM_SRC,
from_validation)
@@ -269,7 +311,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
LLVM_BIN_selfbuild + " --enable-optimized" +
" --enable-targets=x86,x86_64,nvptx" +
((" --with-gcc-toolchain=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
mac_system_root,
((" --with-default-sysroot=" + mac_system_root) if mac_system_root != "" else ""),
from_validation)
selfbuild_compiler = ("CC=" +llvm_home+ "/" + LLVM_BIN_selfbuild + "/bin/clang " +
"CXX="+llvm_home+ "/" + LLVM_BIN_selfbuild + "/bin/clang++ ")
@@ -285,7 +327,6 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
if debug == False:
if current_OS != "Windows":
if version_LLVM not in LLVM_configure_capable:
# TODO: mac_root
try_do_LLVM("configure release version ",
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
selfbuild_compiler +
@@ -295,6 +336,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
((" -DGCC_INSTALL_PREFIX=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
((" -DCMAKE_C_COMPILER=" + gcc_toolchain_path+"/bin/gcc") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
((" -DCMAKE_CXX_COMPILER=" + gcc_toolchain_path+"/bin/g++") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
((" -DDEFAULT_SYSROOT=" + mac_system_root) if mac_system_root != "" else "") +
" -DLLVM_TARGETS_TO_BUILD=NVPTX\;X86" +
" ../" + LLVM_SRC,
from_validation)
@@ -304,7 +346,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
LLVM_BIN + " --enable-optimized" +
" --enable-targets=x86,x86_64,nvptx" +
((" --with-gcc-toolchain=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
mac_system_root,
((" --with-default-sysroot=" + mac_system_root) if mac_system_root != "" else ""),
from_validation)
else:
try_do_LLVM("configure release version ",
@@ -313,7 +355,6 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
from_validation)
else:
if version_LLVM not in LLVM_configure_capable:
# TODO: mac_root
try_do_LLVM("configure debug version ",
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
selfbuild_compiler +
@@ -323,6 +364,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
((" -DGCC_INSTALL_PREFIX=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
((" -DCMAKE_C_COMPILER=" + gcc_toolchain_path+"/bin/gcc") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
((" -DCMAKE_CXX_COMPILER=" + gcc_toolchain_path+"/bin/g++") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
((" -DDEFAULT_SYSROOT=" + mac_system_root) if mac_system_root != "" else "") +
" -DLLVM_TARGETS_TO_BUILD=NVPTX\;X86" +
" ../" + LLVM_SRC,
from_validation)
@@ -332,7 +374,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
" --enable-debug-runtime --enable-debug-symbols --enable-keep-symbols" +
" --enable-targets=x86,x86_64,nvptx" +
((" --with-gcc-toolchain=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
mac_system_root,
((" --with-default-sysroot=" + mac_system_root) if mac_system_root != "" else ""),
from_validation)
# building llvm
if current_OS != "Windows":
@@ -352,6 +394,8 @@ def unsupported_llvm_targets(LLVM_VERSION):
"3.7":["avx512skx-i32x16"],
"3.8":[],
"3.9":[],
"4.0":[],
"5.0":[],
"trunk":[]}
return prohibited_list[LLVM_VERSION]
@@ -476,8 +520,12 @@ def build_ispc(version_LLVM, make):
temp = "3_7"
if version_LLVM == "3.8":
temp = "3_8"
if version_LLVM == "trunk":
if version_LLVM == "3.9":
temp = "3_9"
if version_LLVM == "4.0":
temp = "4_0"
if version_LLVM == "trunk":
temp = "5_0"
os.environ["LLVM_VERSION"] = "LLVM_" + temp
try_do_LLVM("clean ISPC for building", "msbuild ispc.vcxproj /t:clean", True)
try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", "msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", True)
@@ -617,7 +665,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
archs.append("x86-64")
if "native" in only:
sde_targets_t = []
for i in ["3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8", "trunk"]:
for i in ["3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8", "3.9", "4.0", "trunk"]:
if i in only:
LLVM.append(i)
if "current" in only:
@@ -675,7 +723,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
gen_archs = ["x86-64"]
need_LLVM = check_LLVM(LLVM)
for i in range(0,len(need_LLVM)):
build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path)
build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path, False)
# begin validation run for stabitily
common.remove_if_exists(stability.in_file)
R = [[[],[]],[[],[]],[[],[]],[[],[]]]
@@ -789,7 +837,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
# prepare newest LLVM
need_LLVM = check_LLVM([newest_LLVM])
if len(need_LLVM) != 0:
build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path)
build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path, options.use_git)
if perf_llvm == False:
# prepare reference point. build both test and reference compilers
try_do_LLVM("apply git", "git branch", True)
@@ -903,7 +951,7 @@ def Main():
if os.environ.get("SMTP_ISPC") == None:
error("you have no SMTP_ISPC in your environment for option notify", 1)
if options.only != "":
test_only_r = " 3.2 3.3 3.4 3.5 3.6 3.7 3.8 trunk current build stability performance x86 x86-64 x86_64 -O0 -O2 native debug nodebug "
test_only_r = " 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 trunk current build stability performance x86 x86-64 x86_64 -O0 -O2 native debug nodebug "
test_only = options.only.split(" ")
for iterator in test_only:
if not (" " + iterator + " " in test_only_r):
@@ -929,11 +977,14 @@ def Main():
if options.perf_llvm == True:
if options.branch == "master":
options.branch = "trunk"
if options.use_git and options.revision != "":
error("--revision is not supported with --git", 1)
try:
start_time = time.time()
if options.build_llvm:
build_LLVM(options.version, options.revision, options.folder, options.tarball,
options.debug, options.selfbuild, options.extra, False, options.force, make, options.gcc_toolchain_path)
options.debug, options.selfbuild, options.extra, False, options.force, make, options.gcc_toolchain_path, options.use_git)
if options.validation_run:
validation_run(options.only, options.only_targets, options.branch,
options.number_for_performance, options.notify, options.update, int(options.speed),
@@ -1013,13 +1064,13 @@ if __name__ == '__main__':
llvm_group = OptionGroup(parser, "Options for building LLVM",
"These options must be used with -b option.")
llvm_group.add_option('--version', dest='version',
help='version of llvm to build: 3.2 3.3 3.4 3.5 3.6 3.7 3.8 trunk. Default: trunk', default="trunk")
help='version of llvm to build: 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 trunk. Default: trunk', default="trunk")
llvm_group.add_option('--with-gcc-toolchain', dest='gcc_toolchain_path',
help='GCC install dir to use when building clang. It is important to set when ' +
'you have alternative gcc installation. Note that otherwise gcc from standard ' +
'location will be used, not from your PATH', default="")
llvm_group.add_option('--revision', dest='revision',
help='revision of llvm to build in format r172870', default="")
help='revision of llvm to build in format r172870 (not supported with --git)', default="")
llvm_group.add_option('--debug', dest='debug',
help='debug build of LLVM?', default=False, action="store_true")
llvm_group.add_option('--folder', dest='folder',
@@ -1032,6 +1083,8 @@ if __name__ == '__main__':
help='rebuild LLVM', default=False, action='store_true')
llvm_group.add_option('--extra', dest='extra',
help='load extra clang tools', default=False, action='store_true')
llvm_group.add_option('--git', dest='use_git',
help='use git llvm repository instead of svn', default=False, action='store_true')
parser.add_option_group(llvm_group)
# options for activity "validation run"
run_group = OptionGroup(parser, "Options for validation run",
@@ -1054,7 +1107,7 @@ if __name__ == '__main__':
run_group.add_option('--only', dest='only',
help='set types of tests. Possible values:\n' +
'-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance),\n' +
'build (only build with different LLVM), 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, trunk, native (do not use SDE),\n' +
'build (only build with different LLVM), 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, trunk, native (do not use SDE),\n' +
'current (do not rebuild ISPC), debug (only with debug info), nodebug (only without debug info, default).',
default="")
run_group.add_option('--perf_LLVM', dest='perf_llvm',

View File

@@ -72,7 +72,11 @@
#include <llvm/Target/TargetMachine.h>
#include <llvm/ADT/Triple.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Bitcode/ReaderWriter.h>
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
#include <llvm/Bitcode/ReaderWriter.h>
#else
#include <llvm/Bitcode/BitcodeReader.h>
#endif
extern int yyparse();
struct yy_buffer_state;
@@ -800,7 +804,13 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
llvm::MemoryBufferRef bcBuf = llvm::MemoryBuffer::getMemBuffer(sb)->getMemBufferRef();
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
#if ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 // LLVM 4.0+
llvm::Expected<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
if (!ModuleOrErr) {
Error(SourcePos(), "Error parsing stdlib bitcode: %s", toString(ModuleOrErr.takeError()).c_str());
} else {
llvm::Module *bcModule = ModuleOrErr.get().release();
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
llvm::ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
if (std::error_code EC = ModuleOrErr.getError())
Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
@@ -989,7 +999,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
diType,
true /* static */,
sym_const_storagePtr);
#else // LLVM 3.7+
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
Assert(sym_const_storagePtr);
m->diBuilder->createGlobalVariable(
@@ -1001,6 +1011,17 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
diType,
true /* static */,
sym_const_storagePtr);
#else // LLVM 4.0+
llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
llvm::DIGlobalVariableExpression *var = m->diBuilder->createGlobalVariableExpression(
file,
name,
name,
file,
0 /* line */,
diType,
true /* static */);
sym_GV_storagePtr->addDebugInfo(var);
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
Assert(var.Verify());
@@ -1087,7 +1108,7 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
diType,
false /* static */,
sym->storagePtr);
#else // LLVM 3.7+
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
Assert(sym_const_storagePtr);
m->diBuilder->createGlobalVariable(
@@ -1099,7 +1120,18 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
diType,
false /* static */,
sym_const_storagePtr);
#endif
#else // LLVM 4.0+
llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
llvm::DIGlobalVariableExpression *var = m->diBuilder->createGlobalVariableExpression(
file,
sym->name.c_str(),
sym->name.c_str(),
file,
0 /* line */,
diType,
false /* static */);
sym_GV_storagePtr->addDebugInfo(var);
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
Assert(var.Verify());
#else // LLVM 3.7+

View File

@@ -151,6 +151,10 @@ define(`PTR_OP_ARGS',
LLVM_VERSION, LLVM_3_8,
``$1 , $1 *'',
LLVM_VERSION, LLVM_3_9,
``$1 , $1 *'',
LLVM_VERSION, LLVM_4_0,
``$1 , $1 *'',
LLVM_VERSION, LLVM_5_0,
``$1 , $1 *'',
``$1 *''
)

View File

@@ -617,16 +617,16 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
}
ctlztz()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
; or, use the macro to call the 4-wide ones twice with our 8-wide
; vectors...
;; TODO: should we use masked versions of SVML functions?
;; svml
include(`svml.m4')
svml_stubs(float,f,WIDTH)
svml_stubs(double,d,WIDTH)
svml_declare(float,f16,16)
svml_define(float,f16,16,f)
;; double precision
svml_declare(double,8,8)
svml_define_x(double,8,8,d,16)

View File

@@ -36,6 +36,10 @@ ifelse(LLVM_VERSION, LLVM_3_7,
LLVM_VERSION, LLVM_3_8,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_3_9,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_4_0,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_5_0,
`include(`target-avx512-common.ll')'
)
@@ -60,6 +64,10 @@ ifelse(LLVM_VERSION, LLVM_3_7,
LLVM_VERSION, LLVM_3_8,
rcp_rsqrt_varying_float_knl(),
LLVM_VERSION, LLVM_3_9,
rcp_rsqrt_varying_float_knl(),
LLVM_VERSION, LLVM_4_0,
rcp_rsqrt_varying_float_knl(),
LLVM_VERSION, LLVM_5_0,
rcp_rsqrt_varying_float_knl()
)

View File

@@ -42,12 +42,12 @@ include(`target-neon-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone alwaysinline {
unary4to8conv(r, i16, float, @llvm.arm.neon.vcvthf2fp, %v)
ret <8 x float> %r
}
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone alwaysinline {
unary4to8conv(r, float, i16, @llvm.arm.neon.vcvtfp2hf, %v)
ret <8 x i16> %r
}
@@ -115,13 +115,13 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__max_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone {
<WIDTH x float>) nounwind readnone alwaysinline {
binary4to8(r, float, @llvm.arm.neon.vmaxs.v4f32, %0, %1)
ret <WIDTH x float> %r
}
define <WIDTH x float> @__min_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone {
<WIDTH x float>) nounwind readnone alwaysinline {
binary4to8(r, float, @llvm.arm.neon.vmins.v4f32, %0, %1)
ret <WIDTH x float> %r
}
@@ -131,22 +131,22 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to8(r, i32, @llvm.arm.neon.vmins.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to8(r, i32, @llvm.arm.neon.vmaxs.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to8(r, i32, @llvm.arm.neon.vminu.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to8(r, i32, @llvm.arm.neon.vmaxu.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
@@ -156,7 +156,7 @@ define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwin
declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone {
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
unary4to8(x0, float, @llvm.arm.neon.vrecpe.v4f32, %d)
binary4to8(x0_nr, float, @llvm.arm.neon.vrecps.v4f32, %d, %x0)
%x1 = fmul <WIDTH x float> %x0, %x0_nr
@@ -168,7 +168,7 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnon
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone {
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
unary4to8(x0, float, @llvm.arm.neon.vrsqrte.v4f32, %d)
%x0_2 = fmul <WIDTH x float> %x0, %x0
binary4to8(x0_nr, float, @llvm.arm.neon.vrsqrts.v4f32, %d, %x0_2)
@@ -179,7 +179,7 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readn
ret <WIDTH x float> %x2
}
define float @__rsqrt_uniform_float(float) nounwind readnone {
define float @__rsqrt_uniform_float(float) nounwind readnone alwaysinline {
%v1 = bitcast float %0 to <1 x float>
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
@@ -189,7 +189,7 @@ define float @__rsqrt_uniform_float(float) nounwind readnone {
ret float %r
}
define float @__rcp_uniform_float(float) nounwind readnone {
define float @__rcp_uniform_float(float) nounwind readnone alwaysinline {
%v1 = bitcast float %0 to <1 x float>
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
@@ -201,7 +201,7 @@ define float @__rcp_uniform_float(float) nounwind readnone {
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone {
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline {
unary4to8(result, float, @llvm.sqrt.v4f32, %0)
;; this returns nan for v=0, which is undesirable..
;; %rsqrt = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %0)
@@ -211,7 +211,7 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone {
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline {
unary4to8(r, double, @llvm.sqrt.v4f64, %0)
ret <WIDTH x double> %r
}
@@ -219,7 +219,7 @@ define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readno
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reductions
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone {
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone alwaysinline {
%and_mask = and <WIDTH x i16> %0,
<i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128>
%v4 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %and_mask)
@@ -288,48 +288,48 @@ define(`neon_reduce', `
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @add_f32(float, float) {
define internal float @add_f32(float, float) nounwind readnone alwaysinline {
%r = fadd float %0, %1
ret float %r
}
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) {
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone alwaysinline {
%r = fadd <WIDTH x float> %0, %1
ret <WIDTH x float> %r
}
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone {
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone alwaysinline {
neon_reduce(float, @__add_varying_float, @llvm.arm.neon.vpadd.v2f32, @add_f32)
}
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @min_f32(float, float) {
define internal float @min_f32(float, float) nounwind readnone alwaysinline {
%cmp = fcmp olt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone {
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone alwaysinline {
neon_reduce(float, @__min_varying_float, @llvm.arm.neon.vpmins.v2f32, @min_f32)
}
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @max_f32(float, float) {
define internal float @max_f32(float, float) nounwind readnone alwaysinline {
%cmp = fcmp ugt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone {
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone alwaysinline {
neon_reduce(float, @__max_varying_float, @llvm.arm.neon.vpmaxs.v2f32, @max_f32)
}
declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone alwaysinline {
%a16 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %0)
%a32 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a16)
%a0 = extractelement <2 x i32> %a32, i32 0
@@ -341,7 +341,7 @@ define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<WIDTH x i16>)
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone alwaysinline {
%a1 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<WIDTH x i16> %0)
%a2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a1)
%aa = extractelement <2 x i64> %a2, i32 0
@@ -352,7 +352,7 @@ define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
v8tov4(i32, %0, %va, %vb)
%pa = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %va)
%pb = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %vb)
@@ -365,53 +365,53 @@ define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @min_si32(i32, i32) {
define internal i32 @min_si32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp slt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__min_varying_int32, @llvm.arm.neon.vpmins.v2i32, @min_si32)
}
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @max_si32(i32, i32) {
define internal i32 @max_si32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp sgt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__max_varying_int32, @llvm.arm.neon.vpmaxs.v2i32, @max_si32)
}
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @min_ui32(i32, i32) {
define internal i32 @min_ui32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ult i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__min_varying_uint32, @llvm.arm.neon.vpmins.v2i32, @min_ui32)
}
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @max_ui32(i32, i32) {
define internal i32 @max_ui32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ugt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__max_varying_uint32, @llvm.arm.neon.vpmaxs.v2i32, @max_ui32)
}
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone {
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone alwaysinline {
v8tov2(double, %0, %v0, %v1, %v2, %v3)
%v01 = fadd <2 x double> %v0, %v1
%v23 = fadd <2 x double> %v2, %v3
@@ -422,15 +422,15 @@ define double @__reduce_add_double(<WIDTH x double>) nounwind readnone {
ret double %m
}
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone {
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone alwaysinline {
reduce8(double, @__min_varying_double, @__min_uniform_double)
}
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone {
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone alwaysinline {
reduce8(double, @__max_varying_double, @__max_uniform_double)
}
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
v8tov2(i64, %0, %v0, %v1, %v2, %v3)
%v01 = add <2 x i64> %v0, %v1
%v23 = add <2 x i64> %v2, %v3
@@ -441,19 +441,19 @@ define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone {
ret i64 %m
}
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
}
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
}
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
}
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
@@ -462,56 +462,56 @@ define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_up_uint8(<8 x i8>, <8 x i8>) nounwind readnone {
define <8 x i8> @__avg_up_uint8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
%r = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_up_int8(<8 x i8>, <8 x i8>) nounwind readnone {
define <8 x i8> @__avg_up_int8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
%r = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_down_uint8(<8 x i8>, <8 x i8>) nounwind readnone {
define <8 x i8> @__avg_down_uint8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
%r = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_down_int8(<8 x i8>, <8 x i8>) nounwind readnone {
define <8 x i8> @__avg_down_int8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
%r = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) nounwind readnone {
define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
%r = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_up_int16(<8 x i16>, <8 x i16>) nounwind readnone {
define <8 x i16> @__avg_up_int16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
%r = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_down_uint16(<8 x i16>, <8 x i16>) nounwind readnone {
define <8 x i16> @__avg_down_uint16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
%r = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {
define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
%r = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}

View File

@@ -43,12 +43,12 @@ include(`target-neon-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
define <4 x float> @__half_to_float_varying(<4 x i16> %v) nounwind readnone {
define <4 x float> @__half_to_float_varying(<4 x i16> %v) nounwind readnone alwaysinline {
%r = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %v)
ret <4 x float> %r
}
define <4 x i16> @__float_to_half_varying(<4 x float> %v) nounwind readnone {
define <4 x i16> @__float_to_half_varying(<4 x float> %v) nounwind readnone alwaysinline {
%r = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %v)
ret <4 x i16> %r
}
@@ -106,13 +106,13 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__max_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone {
<WIDTH x float>) nounwind readnone alwaysinline {
%r = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %0, <4 x float> %1)
ret <WIDTH x float> %r
}
define <WIDTH x float> @__min_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone {
<WIDTH x float>) nounwind readnone alwaysinline {
%r = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %0, <4 x float> %1)
ret <WIDTH x float> %r
}
@@ -122,22 +122,22 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
%r = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %r
}
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
%r = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %r
}
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
%r = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %r
}
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
%r = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %r
}
@@ -147,7 +147,7 @@ define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwin
declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone {
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
%x0 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %d)
%x0_nr = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %d, <4 x float> %x0)
%x1 = fmul <4 x float> %x0, %x0_nr
@@ -159,7 +159,7 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnon
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone {
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
%x0 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %d)
%x0_2 = fmul <4 x float> %x0, %x0
%x0_nr = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %d, <4 x float> %x0_2)
@@ -170,7 +170,7 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readn
ret <4 x float> %x2
}
define float @__rsqrt_uniform_float(float) nounwind readnone {
define float @__rsqrt_uniform_float(float) nounwind readnone alwaysinline {
%v1 = bitcast float %0 to <1 x float>
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
<4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -179,7 +179,7 @@ define float @__rsqrt_uniform_float(float) nounwind readnone {
ret float %r
}
define float @__rcp_uniform_float(float) nounwind readnone {
define float @__rcp_uniform_float(float) nounwind readnone alwaysinline {
%v1 = bitcast float %0 to <1 x float>
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
<4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -190,7 +190,7 @@ define float @__rcp_uniform_float(float) nounwind readnone {
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone {
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline {
%result = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
;; this returns nan for v=0, which is undesirable..
;; %rsqrt = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %0)
@@ -200,7 +200,7 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone {
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline {
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %0)
ret <4 x double> %r
}
@@ -208,7 +208,7 @@ define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readno
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reductions
define i64 @__movmsk(<4 x MASK>) nounwind readnone {
define i64 @__movmsk(<4 x MASK>) nounwind readnone alwaysinline {
%and_mask = and <4 x MASK> %0, <MASK 1, MASK 2, MASK 4, MASK 8>
%v01 = shufflevector <4 x i32> %and_mask, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%v23 = shufflevector <4 x i32> %and_mask, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -264,42 +264,42 @@ define(`neon_reduce', `
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @add_f32(float, float) {
define internal float @add_f32(float, float) nounwind readnone alwaysinline {
%r = fadd float %0, %1
ret float %r
}
define float @__reduce_add_float(<4 x float>) nounwind readnone {
define float @__reduce_add_float(<4 x float>) nounwind readnone alwaysinline {
neon_reduce(float, @llvm.arm.neon.vpadd.v2f32, @add_f32)
}
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @min_f32(float, float) {
define internal float @min_f32(float, float) nounwind readnone alwaysinline {
%cmp = fcmp olt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__reduce_min_float(<4 x float>) nounwind readnone {
define float @__reduce_min_float(<4 x float>) nounwind readnone alwaysinline {
neon_reduce(float, @llvm.arm.neon.vpmins.v2f32, @min_f32)
}
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @max_f32(float, float) {
define internal float @max_f32(float, float) nounwind readnone alwaysinline {
%cmp = fcmp ugt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__reduce_max_float(<4 x float>) nounwind readnone {
define float @__reduce_max_float(<4 x float>) nounwind readnone alwaysinline {
neon_reduce(float, @llvm.arm.neon.vpmaxs.v2f32, @max_f32)
}
declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone alwaysinline {
%v8 = shufflevector <4 x i8> %0, <4 x i8> zeroinitializer,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
%a16 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %v8)
@@ -313,7 +313,7 @@ define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
define i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
define i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone alwaysinline {
%a32 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %0)
%a0 = extractelement <2 x i32> %a32, i32 0
%a1 = extractelement <2 x i32> %a32, i32 1
@@ -323,7 +323,7 @@ define i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
%a64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %0)
%a0 = extractelement <2 x i64> %a64, i32 0
%a1 = extractelement <2 x i64> %a64, i32 1
@@ -333,53 +333,53 @@ define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @min_si32(i32, i32) {
define internal i32 @min_si32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp slt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @llvm.arm.neon.vpmins.v2i32, @min_si32)
}
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @max_si32(i32, i32) {
define internal i32 @max_si32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp sgt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @llvm.arm.neon.vpmaxs.v2i32, @max_si32)
}
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @min_ui32(i32, i32) {
define internal i32 @min_ui32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ult i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @llvm.arm.neon.vpmins.v2i32, @min_ui32)
}
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @max_ui32(i32, i32) {
define internal i32 @max_ui32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ugt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @llvm.arm.neon.vpmaxs.v2i32, @max_ui32)
}
define double @__reduce_add_double(<4 x double>) nounwind readnone {
define double @__reduce_add_double(<4 x double>) nounwind readnone alwaysinline {
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
<2 x i32> <i32 0, i32 1>
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
@@ -391,15 +391,15 @@ define double @__reduce_add_double(<4 x double>) nounwind readnone {
ret double %m
}
define double @__reduce_min_double(<4 x double>) nounwind readnone {
define double @__reduce_min_double(<4 x double>) nounwind readnone alwaysinline {
reduce4(double, @__min_varying_double, @__min_uniform_double)
}
define double @__reduce_max_double(<4 x double>) nounwind readnone {
define double @__reduce_max_double(<4 x double>) nounwind readnone alwaysinline {
reduce4(double, @__max_varying_double, @__max_uniform_double)
}
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone alwaysinline {
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
<2 x i32> <i32 0, i32 1>
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
@@ -411,19 +411,19 @@ define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
ret i64 %m
}
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone alwaysinline {
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
}
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone alwaysinline {
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
}
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone alwaysinline {
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
}
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone alwaysinline {
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
@@ -432,56 +432,56 @@ define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
declare <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_up_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
define <4 x i8> @__avg_up_uint8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
%r = call <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_up_int8(<4 x i8>, <4 x i8>) nounwind readnone {
define <4 x i8> @__avg_up_int8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
%r = call <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_down_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
define <4 x i8> @__avg_down_uint8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
%r = call <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_down_int8(<4 x i8>, <4 x i8>) nounwind readnone {
define <4 x i8> @__avg_down_int8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
%r = call <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_up_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
define <4 x i16> @__avg_up_uint16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
%r = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_up_int16(<4 x i16>, <4 x i16>) nounwind readnone {
define <4 x i16> @__avg_up_int16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
%r = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_down_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
define <4 x i16> @__avg_down_uint16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
%r = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
%r = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}

View File

@@ -42,12 +42,12 @@ include(`target-neon-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone alwaysinline {
unary4to16conv(r, i16, float, @llvm.arm.neon.vcvthf2fp, %v)
ret <16 x float> %r
}
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone alwaysinline {
unary4to16conv(r, float, i16, @llvm.arm.neon.vcvtfp2hf, %v)
ret <16 x i16> %r
}
@@ -125,13 +125,13 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__max_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone {
<WIDTH x float>) nounwind readnone alwaysinline {
binary4to16(r, float, @llvm.arm.neon.vmaxs.v4f32, %0, %1)
ret <WIDTH x float> %r
}
define <WIDTH x float> @__min_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone {
<WIDTH x float>) nounwind readnone alwaysinline {
binary4to16(r, float, @llvm.arm.neon.vmins.v4f32, %0, %1)
ret <WIDTH x float> %r
}
@@ -141,22 +141,22 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to16(r, i32, @llvm.arm.neon.vmins.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to16(r, i32, @llvm.arm.neon.vmaxs.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to16(r, i32, @llvm.arm.neon.vminu.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
binary4to16(r, i32, @llvm.arm.neon.vmaxu.v4i32, %0, %1)
ret <WIDTH x i32> %r
}
@@ -166,7 +166,7 @@ define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwin
declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone {
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
unary4to16(x0, float, @llvm.arm.neon.vrecpe.v4f32, %d)
binary4to16(x0_nr, float, @llvm.arm.neon.vrecps.v4f32, %d, %x0)
%x1 = fmul <WIDTH x float> %x0, %x0_nr
@@ -178,7 +178,7 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnon
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone {
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
unary4to16(x0, float, @llvm.arm.neon.vrsqrte.v4f32, %d)
%x0_2 = fmul <WIDTH x float> %x0, %x0
binary4to16(x0_nr, float, @llvm.arm.neon.vrsqrts.v4f32, %d, %x0_2)
@@ -189,7 +189,7 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readn
ret <WIDTH x float> %x2
}
define float @__rsqrt_uniform_float(float) nounwind readnone {
define float @__rsqrt_uniform_float(float) nounwind readnone alwaysinline {
%v1 = bitcast float %0 to <1 x float>
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
<16 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
@@ -201,7 +201,7 @@ define float @__rsqrt_uniform_float(float) nounwind readnone {
ret float %r
}
define float @__rcp_uniform_float(float) nounwind readnone {
define float @__rcp_uniform_float(float) nounwind readnone alwaysinline {
%v1 = bitcast float %0 to <1 x float>
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
<16 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
@@ -215,7 +215,7 @@ define float @__rcp_uniform_float(float) nounwind readnone {
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone {
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline {
unary4to16(result, float, @llvm.sqrt.v4f32, %0)
;; this returns nan for v=0, which is undesirable..
;; %rsqrt = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %0)
@@ -225,7 +225,7 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone {
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline {
unary4to16(r, double, @llvm.sqrt.v4f64, %0)
ret <WIDTH x double> %r
}
@@ -233,7 +233,7 @@ define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readno
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reductions
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone {
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone alwaysinline {
%and_mask = and <WIDTH x i8> %0,
<i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128,
i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128>
@@ -327,41 +327,41 @@ define(`neon_reduce', `
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @add_f32(float, float) {
define internal float @add_f32(float, float) nounwind readnone alwaysinline {
%r = fadd float %0, %1
ret float %r
}
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) {
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone alwaysinline {
%r = fadd <WIDTH x float> %0, %1
ret <WIDTH x float> %r
}
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone {
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone alwaysinline {
neon_reduce(float, @__add_varying_float, @llvm.arm.neon.vpadd.v2f32, @add_f32)
}
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @min_f32(float, float) {
define internal float @min_f32(float, float) nounwind readnone alwaysinline {
%cmp = fcmp olt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone {
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone alwaysinline {
neon_reduce(float, @__min_varying_float, @llvm.arm.neon.vpmins.v2f32, @min_f32)
}
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
define internal float @max_f32(float, float) {
define internal float @max_f32(float, float) nounwind readnone alwaysinline {
%cmp = fcmp ugt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone {
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone alwaysinline {
neon_reduce(float, @__max_varying_float, @llvm.arm.neon.vpmaxs.v2f32, @max_f32)
}
@@ -369,7 +369,7 @@ declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnon
declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
define i64 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
define i64 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone alwaysinline {
%a16 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %0)
%a32 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a16)
%a64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a32)
@@ -379,7 +379,7 @@ define i64 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
ret i64 %r
}
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone alwaysinline {
v16tov8(i16, %0, %va, %vb)
%a32 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %va)
%b32 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %vb)
@@ -392,7 +392,7 @@ define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
ret i64 %r
}
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
v16tov4(i32, %0, %va, %vb, %vc, %vd)
%a64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %va)
%b64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %vb)
@@ -409,101 +409,101 @@ define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @min_si32(i32, i32) {
define internal i32 @min_si32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp slt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__min_varying_int32, @llvm.arm.neon.vpmins.v2i32, @min_si32)
}
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @max_si32(i32, i32) {
define internal i32 @max_si32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp sgt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__max_varying_int32, @llvm.arm.neon.vpmaxs.v2i32, @max_si32)
}
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @min_ui32(i32, i32) {
define internal i32 @min_ui32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ult i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__min_varying_uint32, @llvm.arm.neon.vpmins.v2i32, @min_ui32)
}
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
define internal i32 @max_ui32(i32, i32) {
define internal i32 @max_ui32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ugt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone {
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
neon_reduce(i32, @__max_varying_uint32, @llvm.arm.neon.vpmaxs.v2i32, @max_ui32)
}
define internal double @__add_uniform_double(double, double) {
define internal double @__add_uniform_double(double, double) nounwind readnone alwaysinline {
%r = fadd double %0, %1
ret double %r
}
define internal <WIDTH x double> @__add_varying_double(<WIDTH x double>, <WIDTH x double>) {
define internal <WIDTH x double> @__add_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone alwaysinline {
%r = fadd <WIDTH x double> %0, %1
ret <WIDTH x double> %r
}
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone {
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone alwaysinline {
reduce16(double, @__add_varying_double, @__add_uniform_double)
}
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone {
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone alwaysinline {
reduce16(double, @__min_varying_double, @__min_uniform_double)
}
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone {
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone alwaysinline {
reduce16(double, @__max_varying_double, @__max_uniform_double)
}
define internal i64 @__add_uniform_int64(i64, i64) {
define internal i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
%r = add i64 %0, %1
ret i64 %r
}
define internal <WIDTH x i64> @__add_varying_int64(<WIDTH x i64>, <WIDTH x i64>) {
define internal <WIDTH x i64> @__add_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
%r = add <WIDTH x i64> %0, %1
ret <WIDTH x i64> %r
}
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce16(i64, @__add_varying_int64, @__add_uniform_int64)
}
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce16(i64, @__min_varying_int64, @__min_uniform_int64)
}
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce16(i64, @__max_varying_int64, @__max_uniform_int64)
}
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce16(i64, @__min_varying_uint64, @__min_uniform_uint64)
}
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
reduce16(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
@@ -512,35 +512,35 @@ define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_up_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
define <16 x i8> @__avg_up_uint8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
%r = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_up_int8(<16 x i8>, <16 x i8>) nounwind readnone {
define <16 x i8> @__avg_up_int8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
%r = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_down_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
define <16 x i8> @__avg_down_uint8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
%r = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_down_int8(<16 x i8>, <16 x i8>) nounwind readnone {
define <16 x i8> @__avg_down_int8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
%r = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a0, <8 x i16> %a1)
@@ -551,7 +551,7 @@ define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone {
define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a0, <8 x i16> %a1)
@@ -562,7 +562,7 @@ define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone {
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a0, <8 x i16> %a1)
@@ -573,7 +573,7 @@ define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {
define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a0, <8 x i16> %a1)

View File

@@ -49,7 +49,7 @@ ctlztz()
declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
define float @__half_to_float_uniform(i16 %v) nounwind readnone alwaysinline {
%v1 = bitcast i16 %v to <1 x i16>
%vec = shufflevector <1 x i16> %v1, <1 x i16> undef,
<4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -58,7 +58,7 @@ define float @__half_to_float_uniform(i16 %v) nounwind readnone {
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
define i16 @__float_to_half_uniform(float %v) nounwind readnone alwaysinline {
%v1 = bitcast float %v to <1 x float>
%vec = shufflevector <1 x float> %v1, <1 x float> undef,
<4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -70,7 +70,14 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; math
define void @__fastmath() nounwind {
declare i32 @llvm.arm.get.fpscr() nounwind
declare void @llvm.arm.set.fpscr(i32) nounwind
define void @__fastmath() nounwind alwaysinline {
%x = call i32 @llvm.arm.get.fpscr()
; Turn on FTZ (bit 24) and default NaN (bit 25)
%y = or i32 %x, 50331648
call void @llvm.arm.set.fpscr(i32 %y)
ret void
}
@@ -120,111 +127,111 @@ declare double @__ceil_uniform_double(double) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; min/max
define float @__max_uniform_float(float, float) nounwind readnone {
define float @__max_uniform_float(float, float) nounwind readnone alwaysinline {
%cmp = fcmp ugt float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define float @__min_uniform_float(float, float) nounwind readnone {
define float @__min_uniform_float(float, float) nounwind readnone alwaysinline {
%cmp = fcmp ult float %0, %1
%r = select i1 %cmp, float %0, float %1
ret float %r
}
define i32 @__min_uniform_int32(i32, i32) nounwind readnone {
define i32 @__min_uniform_int32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp slt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__max_uniform_int32(i32, i32) nounwind readnone {
define i32 @__max_uniform_int32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp sgt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__min_uniform_uint32(i32, i32) nounwind readnone {
define i32 @__min_uniform_uint32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ult i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i32 @__max_uniform_uint32(i32, i32) nounwind readnone {
define i32 @__max_uniform_uint32(i32, i32) nounwind readnone alwaysinline {
%cmp = icmp ugt i32 %0, %1
%r = select i1 %cmp, i32 %0, i32 %1
ret i32 %r
}
define i64 @__min_uniform_int64(i64, i64) nounwind readnone {
define i64 @__min_uniform_int64(i64, i64) nounwind readnone alwaysinline {
%cmp = icmp slt i64 %0, %1
%r = select i1 %cmp, i64 %0, i64 %1
ret i64 %r
}
define i64 @__max_uniform_int64(i64, i64) nounwind readnone {
define i64 @__max_uniform_int64(i64, i64) nounwind readnone alwaysinline {
%cmp = icmp sgt i64 %0, %1
%r = select i1 %cmp, i64 %0, i64 %1
ret i64 %r
}
define i64 @__min_uniform_uint64(i64, i64) nounwind readnone {
define i64 @__min_uniform_uint64(i64, i64) nounwind readnone alwaysinline {
%cmp = icmp ult i64 %0, %1
%r = select i1 %cmp, i64 %0, i64 %1
ret i64 %r
}
define i64 @__max_uniform_uint64(i64, i64) nounwind readnone {
define i64 @__max_uniform_uint64(i64, i64) nounwind readnone alwaysinline {
%cmp = icmp ugt i64 %0, %1
%r = select i1 %cmp, i64 %0, i64 %1
ret i64 %r
}
define double @__min_uniform_double(double, double) nounwind readnone {
define double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
%cmp = fcmp olt double %0, %1
%r = select i1 %cmp, double %0, double %1
ret double %r
}
define double @__max_uniform_double(double, double) nounwind readnone {
define double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
%cmp = fcmp ogt double %0, %1
%r = select i1 %cmp, double %0, double %1
ret double %r
}
define <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
define <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
%m = icmp slt <WIDTH x i64> %0, %1
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
ret <WIDTH x i64> %r
}
define <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
define <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
%m = icmp sgt <WIDTH x i64> %0, %1
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
ret <WIDTH x i64> %r
}
define <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
define <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
%m = icmp ult <WIDTH x i64> %0, %1
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
ret <WIDTH x i64> %r
}
define <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
define <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
%m = icmp ugt <WIDTH x i64> %0, %1
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
ret <WIDTH x i64> %r
}
define <WIDTH x double> @__min_varying_double(<WIDTH x double>,
<WIDTH x double>) nounwind readnone {
<WIDTH x double>) nounwind readnone alwaysinline {
%m = fcmp olt <WIDTH x double> %0, %1
%r = select <WIDTH x i1> %m, <WIDTH x double> %0, <WIDTH x double> %1
ret <WIDTH x double> %r
}
define <WIDTH x double> @__max_varying_double(<WIDTH x double>,
<WIDTH x double>) nounwind readnone {
<WIDTH x double>) nounwind readnone alwaysinline {
%m = fcmp ogt <WIDTH x double> %0, %1
%r = select <WIDTH x i1> %m, <WIDTH x double> %0, <WIDTH x double> %1
ret <WIDTH x double> %r
@@ -234,14 +241,14 @@ define <WIDTH x double> @__max_varying_double(<WIDTH x double>,
declare float @llvm.sqrt.f32(float)
define float @__sqrt_uniform_float(float) nounwind readnone {
define float @__sqrt_uniform_float(float) nounwind readnone alwaysinline {
%r = call float @llvm.sqrt.f32(float %0)
ret float %r
}
declare double @llvm.sqrt.f64(double)
define double @__sqrt_uniform_double(double) nounwind readnone {
define double @__sqrt_uniform_double(double) nounwind readnone alwaysinline {
%r = call double @llvm.sqrt.f64(double %0)
ret double %r
}
@@ -251,12 +258,12 @@ define double @__sqrt_uniform_double(double) nounwind readnone {
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
define i32 @__popcnt_int32(i32) nounwind readnone {
define i32 @__popcnt_int32(i32) nounwind readnone alwaysinline {
%v = call i32 @llvm.ctpop.i32(i32 %0)
ret i32 %v
}
define i64 @__popcnt_int64(i64) nounwind readnone {
define i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
%v = call i64 @llvm.ctpop.i64(i64 %0)
ret i64 %v
}

View File

@@ -35,6 +35,10 @@ define(`WIDTH',`16')
ifelse(LLVM_VERSION, LLVM_3_8,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_3_9,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_4_0,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_5_0,
`include(`target-avx512-common.ll')'
)
@@ -80,6 +84,10 @@ define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly al
ifelse(LLVM_VERSION, LLVM_3_8,
rcp_rsqrt_varying_float_skx(),
LLVM_VERSION, LLVM_3_9,
rcp_rsqrt_varying_float_skx(),
LLVM_VERSION, LLVM_4_0,
rcp_rsqrt_varying_float_skx(),
LLVM_VERSION, LLVM_5_0,
rcp_rsqrt_varying_float_skx()
)

View File

@@ -54,9 +54,13 @@ define(`MASK_HIGH_BIT_ON',
define(`PTR_OP_ARGS',
ifelse(LLVM_VERSION, LLVM_3_7,
``$1 , $1 *'',
ifelse(LLVM_VERSION, LLVM_3_8,
LLVM_VERSION, LLVM_3_8,
``$1 , $1 *'',
ifelse(LLVM_VERSION, LLVM_3_9,
LLVM_VERSION, LLVM_3_9,
``$1 , $1 *'',
LLVM_VERSION, LLVM_4_0,
``$1 , $1 *'',
LLVM_VERSION, LLVM_5_0,
``$1 , $1 *'',
``$1 *''
)

View File

@@ -57,6 +57,10 @@ define(`PTR_OP_ARGS',
LLVM_VERSION, LLVM_3_8,
``$1 , $1 *'',
LLVM_VERSION, LLVM_3_9,
``$1 , $1 *'',
LLVM_VERSION, LLVM_4_0,
``$1 , $1 *'',
LLVM_VERSION, LLVM_5_0,
``$1 , $1 *'',
``$1 *''
)
@@ -69,6 +73,10 @@ define(`MdORi64',
``i64'',
LLVM_VERSION, LLVM_3_9,
``i64'',
LLVM_VERSION, LLVM_4_0,
``i64'',
LLVM_VERSION, LLVM_5_0,
``i64'',
``double''
)
)
@@ -78,6 +86,10 @@ define(`MfORi32',
``i32'',
LLVM_VERSION, LLVM_3_9,
``i32'',
LLVM_VERSION, LLVM_4_0,
``i32'',
LLVM_VERSION, LLVM_5_0,
``i32'',
``float''
)
)
@@ -1586,6 +1598,12 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
',LLVM_VERSION,LLVM_3_9,`
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
',LLVM_VERSION,LLVM_4_0,`
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
',LLVM_VERSION,LLVM_5_0,`
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
',`
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
')
@@ -1614,6 +1632,12 @@ define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
',LLVM_VERSION,LLVM_3_9,`
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
%r = extractvalue { $2, i1 } %r_t, 0
',LLVM_VERSION,LLVM_4_0,`
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
%r = extractvalue { $2, i1 } %r_t, 0
',LLVM_VERSION,LLVM_5_0,`
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
%r = extractvalue { $2, i1 } %r_t, 0
',`
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
')

View File

@@ -133,8 +133,8 @@
#define snprintf _snprintf
#endif
///////////////////////////////////////////////////////////////////////////////
// This part of code was in LLVM's ConstantsScanner.h,
// but it was removed in revision #232397
// This part of code was in LLVM's ConstantsScanner.h,
// but it was removed in revision #232397
namespace constant_scanner {
class constant_iterator : public std::iterator<std::forward_iterator_tag,
@@ -381,8 +381,8 @@ namespace {
};
} // end anonymous namespace
static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm::ArrayType*> &t,
std::vector<llvm::IntegerType*> &i, std::vector<bool> &IsVolatile,
static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm::ArrayType*> &t,
std::vector<llvm::IntegerType*> &i, std::vector<bool> &IsVolatile,
std::vector<int> &Alignment) {
TypeFinder(t, i, IsVolatile, Alignment).run(*m);
}
@@ -390,7 +390,7 @@ static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm
static bool is_vec16_i64_ty(llvm::Type *Ty) {
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(Ty);
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
VTy->getElementType()->getPrimitiveSizeInBits() == 64)
return true;
return false;
@@ -462,7 +462,11 @@ namespace {
VectorConstantIndex = 0;
}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
virtual const char *getPassName() const { return "C backend"; }
#else // LLVM 4.0+
virtual llvm::StringRef getPassName() const { return "C backend"; }
#endif
void getAnalysisUsage(llvm::AnalysisUsage &AU) const {
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // <= 3.6
@@ -525,8 +529,10 @@ namespace {
bool IgnoreName = false,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
const llvm::AttrListPtr &PAL = llvm::AttrListPtr()
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
const llvm::AttributeSet &PAL = llvm::AttributeSet()
#else // LLVM 5.0+
const llvm::AttributeList &PAL = llvm::AttributeList()
#endif
);
llvm::raw_ostream &printSimpleType(llvm::raw_ostream &Out, llvm::Type *Ty,
@@ -536,8 +542,10 @@ namespace {
void printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
const llvm::AttrListPtr &PAL,
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
const llvm::AttributeSet &PAL,
#else // LLVM 5.0+
const llvm::AttributeList &PAL,
#endif
llvm::PointerType *Ty);
@@ -782,8 +790,10 @@ std::string CWriter::getArrayName(llvm::ArrayType *AT) {
void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
const llvm::AttrListPtr &PAL,
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
const llvm::AttributeSet &PAL,
#else // LLVM 5.0+
const llvm::AttributeList &PAL,
#endif
llvm::PointerType *TheTy) {
llvm::FunctionType *FTy = llvm::cast<llvm::FunctionType>(TheTy->getElementType());
@@ -801,8 +811,10 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
llvm::Type *ArgTy = *I;
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
#else // LLVM 5.0+
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
#endif
assert(ArgTy->isPointerTy());
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
@@ -810,8 +822,10 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
printType(FunctionInnards, ArgTy,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
#else // LLVM 5.0+
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt),
#endif
"");
PrintedType = true;
@@ -827,8 +841,10 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
printType(Out, RetTy,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
#else // LLVM 5.0+
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::SExt),
#endif
FunctionInnards.str());
}
@@ -925,8 +941,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
bool IgnoreName,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
const llvm::AttrListPtr &PAL
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
const llvm::AttributeSet &PAL
#else // LLVM 5.0+
const llvm::AttributeList &PAL
#endif
) {
@@ -947,8 +965,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
llvm::Type *ArgTy = *I;
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
#else // LLVM 5.0+
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
#endif
assert(ArgTy->isPointerTy());
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
@@ -958,8 +978,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
printType(FunctionInnards, ArgTy,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
#else // LLVM 5.0+
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt),
#endif
"");
++Idx;
@@ -975,8 +997,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
printType(Out, FTy->getReturnType(),
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
#else // LLVM 5.0+
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::SExt),
#endif
FunctionInnards.str());
return Out;
@@ -1087,7 +1111,7 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
// vec16_i64 should be handled separately
if (is_vec16_i64_ty(CPA->getOperand(0)->getType())) {
Out << "/* vec16_i64 should be loaded carefully on knc */";
Out << "\n#if defined(KNC)\n";
@@ -1180,6 +1204,7 @@ void CWriter::printConstantDataSequential(llvm::ConstantDataSequential *CDS,
static inline std::string ftostr(const llvm::APFloat& V) {
std::string Buf;
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
if (&V.getSemantics() == &llvm::APFloat::IEEEdouble) {
llvm::raw_string_ostream(Buf) << V.convertToDouble();
return Buf;
@@ -1187,6 +1212,15 @@ static inline std::string ftostr(const llvm::APFloat& V) {
llvm::raw_string_ostream(Buf) << (double)V.convertToFloat();
return Buf;
}
#else // LLVM 4.0+
if (&V.getSemantics() == &llvm::APFloat::IEEEdouble()) {
llvm::raw_string_ostream(Buf) << V.convertToDouble();
return Buf;
} else if (&V.getSemantics() == &llvm::APFloat::IEEEsingle()) {
llvm::raw_string_ostream(Buf) << (double)V.convertToFloat();
return Buf;
}
#endif
return "<unknown format in ftostr>"; // error
}
@@ -1206,7 +1240,11 @@ static bool isFPCSafeToPrint(const llvm::ConstantFP *CFP) {
return false;
llvm::APFloat APF = llvm::APFloat(CFP->getValueAPF()); // copy
if (CFP->getType() == llvm::Type::getFloatTy(CFP->getContext()))
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
APF.convert(llvm::APFloat::IEEEdouble, llvm::APFloat::rmNearestTiesToEven, &ignored);
#else // LLVM 4.0+
APF.convert(llvm::APFloat::IEEEdouble(), llvm::APFloat::rmNearestTiesToEven, &ignored);
#endif
#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
char Buffer[100];
sprintf(Buffer, "%a", APF.convertToDouble());
@@ -1637,7 +1675,11 @@ void CWriter::printConstant(llvm::Constant *CPV, bool Static) {
// useful.
llvm::APFloat Tmp = FPC->getValueAPF();
bool LosesInfo;
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
Tmp.convert(llvm::APFloat::IEEEdouble, llvm::APFloat::rmTowardZero, &LosesInfo);
#else // LLVM 4.0+
Tmp.convert(llvm::APFloat::IEEEdouble(), llvm::APFloat::rmTowardZero, &LosesInfo);
#endif
V = Tmp.convertToDouble();
}
@@ -1819,11 +1861,11 @@ void CWriter::printConstant(llvm::Constant *CPV, bool Static) {
// when generating code for knl-generic in multitarget mode.
// Short vectors are mapped to "native" vectors and cause AVX-512 code
// generation in static block initialization (__vec16_* in ::init function).
bool isGenericKNL = g->target->getISA() == Target::GENERIC &&
bool isGenericKNL = g->target->getISA() == Target::GENERIC &&
!g->target->getTreatGenericAsSmth().empty() &&
g->mangleFunctionsWithTarget;
if (isGenericKNL && CPV->getOperand(0)->getType()->isVectorTy())
llvm::report_fatal_error("knl-generic-* target doesn's support short vectors");
if (isGenericKNL && CPV->getOperand(0)->getType()->isVectorTy())
llvm::report_fatal_error("knl-generic-* target doesn's support short vectors");
Out << ' ';
printConstant(llvm::cast<llvm::Constant>(CPV->getOperand(0)), Static);
for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
@@ -2024,7 +2066,7 @@ void CWriter::writeInstComputationInline(llvm::Instruction &I) {
if (NeedBoolTrunc)
Out << "((";
visit(I);
if (NeedBoolTrunc)
@@ -2798,7 +2840,7 @@ void CWriter::printFloatingPointConstants(llvm::Function &F) {
// the precision of the printed form, unless the printed form preserves
// precision.
//
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
E = constant_scanner::constant_end(&F); I != E; ++I)
printFloatingPointConstants(*I);
@@ -2865,7 +2907,7 @@ void CWriter::printFloatingPointConstants(const llvm::Constant *C) {
// loads to get their values, rather than tediously inserting the
// individual values into the vector.
void CWriter::printVectorConstants(llvm::Function &F) {
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
E = constant_scanner::constant_end(&F); I != E; ++I) {
const llvm::ConstantDataVector *CDV = llvm::dyn_cast<llvm::ConstantDataVector>(*I);
if (CDV == NULL)
@@ -3017,7 +3059,7 @@ void CWriter::printModuleTypes() {
Out << " struct " << Name << ";\n";
}
Out << "};\n";
for (unsigned i = 0, e = IntegerTypes.size(); i != e; ++i) {
llvm::IntegerType *IT = IntegerTypes[i];
if (IT->getIntegerBitWidth() <= 64 || Alignment[i] == 0)
@@ -3142,8 +3184,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
llvm::FunctionType *FT = llvm::cast<llvm::FunctionType>(F->getFunctionType());
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
const llvm::AttrListPtr &PAL = F->getAttributes();
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
const llvm::AttributeSet &PAL = F->getAttributes();
#else // LLVM 5.0+
const llvm::AttributeList &PAL = F->getAttributes();
#endif
std::string tstr;
@@ -3180,8 +3224,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
llvm::Type *ArgTy = I->getType();
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
#else // LLVM 5.0+
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
#endif
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
@@ -3193,8 +3239,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
printType(FunctionInnards, ArgTy,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
#else // LLVM 5.0+
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt),
#endif
ArgName);
PrintedArg = true;
@@ -3219,8 +3267,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
llvm::Type *ArgTy = *I;
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
#else // LLVM 5.0+
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
#endif
assert(ArgTy->isPointerTy());
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
@@ -3228,8 +3278,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
printType(FunctionInnards, ArgTy,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt)
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt)
#else // LLVM 5.0+
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt)
#endif
);
PrintedArg = true;
@@ -3265,8 +3317,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
printType(Out, RetTy,
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
#else // LLVM 5.0+
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::SExt),
#endif
FunctionInnards.str());
}
@@ -4061,17 +4115,17 @@ void CWriter::printIntrinsicDefinition(const llvm::Function &F, llvm::raw_ostrea
printType(Out, retT);
Out << "r;\n";
unsigned NumBits = llvm::cast<llvm::IntegerType>(elemT)->getBitWidth();
std::stringstream str_type;
if (NumBits <= 32)
if (NumBits <= 32)
str_type << "uint" << 2 * NumBits << "_t";
else {
assert(NumBits <= 64 && "Bit widths > 128 not implemented yet");
str_type << "llvmUInt128";
}
Out << " " << str_type.str() << " result = (" << str_type.str() << ") a * (" << str_type.str() << ") b;\n";
Out << " " << str_type.str() << " result = (" << str_type.str() << ") a * (" << str_type.str() << ") b;\n";
Out << " r.field0 = result;\n";
Out << " r.field1 = result >> " << NumBits << ";\n";
Out << " return r;\n}\n";
@@ -4201,8 +4255,10 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
// parameter instead of passing it to the call.
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
const llvm::AttrListPtr &PAL = I.getAttributes();
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
const llvm::AttributeSet &PAL = I.getAttributes();
#else // LLVM 5.0+
const llvm::AttributeList &PAL = I.getAttributes();
#endif
bool hasByVal = I.hasByValArgument();
@@ -4241,7 +4297,7 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
if (Callee->getName() == "malloc" ||
Callee->getName() == "_aligned_malloc")
Out << "(uint8_t *)";
// This 'if' will fix 'soa-18.ispc' test (fails with optimizations off)
// Yet the way the case is fixed is quite dirty and leads to many other fails
@@ -4302,7 +4358,7 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
for (; AI != AE; ++AI, ++ArgNo) {
if (PrintedArg) Out << ", ";
if (ArgNo == 0 &&
if (ArgNo == 0 &&
Callee->getName() == "posix_memalign") {
// uint8_t** is incompatible with void** without explicit cast.
// Should be do this any other functions?
@@ -4314,8 +4370,10 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
printType(Out, FTy->getParamType(ArgNo),
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
PAL.getParamAttributes(ArgNo+1).hasAttribute(llvm::Attributes::SExt)
#else /* LLVM 3.3+ */
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
PAL.getParamAttributes(ArgNo+1).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt)
#else // LLVM 5.0+
PAL.getParamAttributes(ArgNo+1).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt)
#endif
);
Out << ')';
@@ -4377,7 +4435,7 @@ bool CWriter::visitBuiltinCall(llvm::CallInst &I, llvm::Intrinsic::ID ID,
if (I.getParent()->getParent()->arg_empty())
Out << "vararg_dummy_arg";
else
writeOperand(&*(--I.getParent()->getParent()->arg_end()));
writeOperand(&*(std::prev(I.getParent()->getParent()->arg_end())));
Out << ')';
return true;
case llvm::Intrinsic::vaend:
@@ -4552,7 +4610,11 @@ void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I,
llvm::VectorType *LastIndexIsVector = 0;
{
for (llvm::gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
LastIndexIsVector = llvm::dyn_cast<llvm::VectorType>(*TmpI);
#else // LLVM 4.0+
LastIndexIsVector = llvm::dyn_cast<llvm::VectorType>(TmpI.getIndexedType());
#endif
}
Out << "(";
@@ -4581,7 +4643,11 @@ void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I,
// exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
if (isAddressExposed(Ptr)) {
writeOperandInternal(Ptr, Static);
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
} else if (I != E && (*I)->isStructTy()) {
#else // LLVM 4.0+
} else if (I != E && I.isStruct()) {
#endif
// If we didn't already emit the first operand, see if we can print it as
// P->f instead of "P[0].f"
writeOperand(Ptr);
@@ -4596,13 +4662,18 @@ void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I,
}
for (; I != E; ++I) {
if ((*I)->isStructTy()) {
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
llvm::Type *type = *I;
#else // LLVM 4.0+
llvm::Type *type = I.getIndexedType();
#endif
if (type->isStructTy()) {
Out << ".field" << llvm::cast<llvm::ConstantInt>(I.getOperand())->getZExtValue();
} else if ((*I)->isArrayTy()) {
} else if (type->isArrayTy()) {
Out << ".array[";
writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
Out << ']';
} else if (!(*I)->isVectorTy()) {
} else if (!type->isVectorTy()) {
Out << '[';
writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
Out << ']';
@@ -4633,7 +4704,7 @@ void CWriter::writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType,
Out << '*';
if (IsVolatile || IsUnaligned) {
Out << "((";
if (IsUnaligned && ITy && (ITy->getBitWidth() > 64))
if (IsUnaligned && ITy && (ITy->getBitWidth() > 64))
Out << "iN_" << ITy->getBitWidth() << "_align_" << Alignment << " *)";
else {
if (IsUnaligned)
@@ -4798,7 +4869,7 @@ void CWriter::visitShuffleVectorInst(llvm::ShuffleVectorInst &SVI) {
printType(Out, llvm::PointerType::getUnqual(EltTy));
Out << ")(&" << GetValueName(Op)
<< "))[" << SrcVal << "]";
Out << " \n#endif \n";
Out << " \n#endif \n";
}
}
}
@@ -4901,7 +4972,11 @@ public:
SmearCleanupPass(llvm::Module *m, int width)
: BasicBlockPass(ID) { module = m; vectorWidth = width; }
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
const char *getPassName() const { return "Smear Cleanup Pass"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Smear Cleanup Pass"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
static char ID;
@@ -4989,7 +5064,7 @@ SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const {
llvm::dyn_cast<llvm::Constant>(shuffleInst->getOperand(2));
// Check that the shuffle is a broadcast of the element of the first vector,
// i.e. mask vector is vector with equal elements of expected size.
// i.e. mask vector is vector with equal elements of expected size.
if (!(mask &&
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
(mask->isNullValue() || (shuffleInst->getMask()->getType()->isVectorTy() && llvm::dyn_cast<llvm::ConstantVector>(shuffleInst->getMask())->getSplatValue() != 0 ) ) &&
@@ -5014,34 +5089,34 @@ SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const {
if (operandVec && operandVec->getNumElements() == 1)
return NULL;
// Insert ExtractElementInstr to get value for smear
// Insert ExtractElementInstr to get value for smear
llvm::Function *extractFunc = module->getFunction("__extract_element");
if (extractFunc == NULL) {
// Declare the __extract_element function if needed; it takes a vector and
// Declare the __extract_element function if needed; it takes a vector and
// a scalar parameter and returns a scalar of the vector parameter type.
llvm::Constant *ef =
module->getOrInsertFunction("__extract_element",
shuffleInst->getOperand(0)->getType()->getVectorElementType(),
module->getOrInsertFunction("__extract_element",
shuffleInst->getOperand(0)->getType()->getVectorElementType(),
shuffleInst->getOperand(0)->getType(),
llvm::IntegerType::get(module->getContext(), 32), NULL);
extractFunc = llvm::dyn_cast<llvm::Function>(ef);
assert(extractFunc != NULL);
extractFunc->setDoesNotThrow();
extractFunc->setOnlyReadsMemory();
}
}
if (extractFunc == NULL) {
return NULL;
}
llvm::Instruction *extractCall =
llvm::ExtractElementInst::Create(shuffleInst->getOperand(0),
llvm::Instruction *extractCall =
llvm::ExtractElementInst::Create(shuffleInst->getOperand(0),
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
// mask is of VectorType
llvm::dyn_cast<llvm::ConstantVector>(mask)->getSplatValue(),
llvm::dyn_cast<llvm::ConstantVector>(mask)->getSplatValue(),
#else
mask->getSplatValue(),
mask->getSplatValue(),
#endif
"__extract_element", inst);
return extractCall;
@@ -5109,7 +5184,11 @@ public:
AndCmpCleanupPass()
: BasicBlockPass(ID) { }
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
const char *getPassName() const { return "AndCmp Cleanup Pass"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "AndCmp Cleanup Pass"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
static char ID;
@@ -5251,7 +5330,11 @@ public:
#endif
}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
const char *getPassName() const { return "MaskOps Cleanup Pass"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "MaskOps Cleanup Pass"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
private:

21
ctx.cpp
View File

@@ -385,10 +385,14 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
llvm::DISubroutineType *diSubprogramType_n =
llvm::cast<llvm::DISubroutineType>(getDICompositeType(diSubprogramType));
int flags = llvm::DINode::FlagPrototyped;
#else /* LLVM 3.8+ */
#elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
int flags = llvm::DINode::FlagPrototyped;
#else /* LLVM 4.0+ */
Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
llvm::DINode::DIFlags flags = llvm::DINode::FlagPrototyped;
#endif
@@ -417,7 +421,16 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
isStatic, true, /* is defn */
firstLine, flags,
isOptimized, llvmFunction);
#else /* LLVM 3.8+ */
#elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
diSubprogram =
m->diBuilder->createFunction(diFile /* scope */, funSym->name,
mangledName, diFile,
firstLine, diSubprogramType_n,
isStatic, true, /* is defn */
firstLine, flags,
isOptimized);
llvmFunction->setSubprogram(diSubprogram);
#else /* LLVM 4.0+ */
diSubprogram =
m->diBuilder->createFunction(diFile /* scope */, funSym->name,
mangledName, diFile,
@@ -1821,7 +1834,11 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) {
if (m->diBuilder == NULL)
return;
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
int flags = 0;
#else // LLVM 4.0+
llvm::DINode::DIFlags flags = llvm::DINode::FlagZero;
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
llvm::DIScope scope = diSubprogram;
llvm::DIType diType = sym->type->GetDIType(scope);

View File

@@ -4,7 +4,7 @@ An ISPC update with new native AVX512 target for future Xeon CPUs and
improvements for debugging, including new switch --dwarf-version to support
debugging on old systems.
The release is based on patched version LLVM 3.8.
The release is based on patched LLVM 3.8.
=== v1.9.0 === (12 Feb 2016)

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.9.1
PROJECT_NUMBER = 1.9.2dev
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -420,8 +420,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
(
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2 // 3.2
(function->getFnAttributes().hasAttribute(llvm::Attributes::AlwaysInline) == false)
#else // LLVM 3.3+
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
(function->getAttributes().getFnAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::AlwaysInline) == false)
#else // LLVM 5.0+
(function->getAttributes().getFnAttributes().hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline) == false)
#endif
&&
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);

View File

@@ -1158,11 +1158,19 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
#endif
attrBuilder.addAttribute("target-cpu", this->m_cpu);
attrBuilder.addAttribute("target-features", this->m_attributes);
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
this->m_tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get(
*g->ctx,
llvm::AttributeSet::FunctionIndex,
attrBuilder));
#else // LLVM 5.0+
this->m_tf_attributes = new llvm::AttributeList(
llvm::AttributeList::get(
*g->ctx,
llvm::AttributeList::FunctionIndex,
attrBuilder));
#endif
}
#endif
@@ -1477,7 +1485,11 @@ Target::StructOffset(llvm::Type *type, int element,
void Target::markFuncWithTargetAttr(llvm::Function* func) {
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
if (m_tf_attributes) {
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
#else // LLVM 5.0+
func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
#endif
}
#endif
}

14
ispc.h
View File

@@ -41,7 +41,7 @@
#include "ispc_version.h"
#if ISPC_LLVM_VERSION < OLDEST_SUPPORTED_LLVM || ISPC_LLVM_VERSION > LATEST_SUPPORTED_LLVM
#error "Only LLVM 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8 and 3.9 development branch are supported"
#error "Only LLVM 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 and 5.0 development branch are supported"
#endif
#if defined(_WIN32) || defined(_WIN64)
@@ -72,7 +72,11 @@
// Forward declarations of a number of widely-used LLVM types
namespace llvm {
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
class AttributeSet;
#else // LLVM 5.0+
class AttributeList;
#endif
class BasicBlock;
class Constant;
class ConstantValue;
@@ -86,13 +90,11 @@ namespace llvm {
class TargetMachine;
class Type;
class Value;
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
class DIFile;
class DIType;
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
class DIDescriptor;
#else // LLVM 3.7+
class DIFile;
class DIType;
class DIScope;
#endif
}
@@ -348,7 +350,11 @@ private:
/** Target-specific LLVM attribute, which has to be attached to every
function to ensure that it is generated for correct target architecture.
This is requirement was introduced in LLVM 3.3 */
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
llvm::AttributeSet* m_tf_attributes;
#else // LLVM 5.0+
llvm::AttributeList* m_tf_attributes;
#endif
#endif
/** Native vector width of the vector instruction set. Note that this

View File

@@ -38,7 +38,7 @@
#ifndef ISPC_VERSION_H
#define ISPC_VERSION_H
#define ISPC_VERSION "1.9.1"
#define ISPC_VERSION "1.9.2dev"
#include "llvm/Config/llvm-config.h"
#define ISPC_LLVM_VERSION ( LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 )
@@ -51,9 +51,11 @@
#define ISPC_LLVM_3_7 30700
#define ISPC_LLVM_3_8 30800
#define ISPC_LLVM_3_9 30900
#define ISPC_LLVM_4_0 40000
#define ISPC_LLVM_5_0 50000
#define OLDEST_SUPPORTED_LLVM ISPC_LLVM_3_2
#define LATEST_SUPPORTED_LLVM ISPC_LLVM_3_9
#define LATEST_SUPPORTED_LLVM ISPC_LLVM_5_0
#ifdef __ispc__xstr
#undef __ispc__xstr

View File

@@ -51,7 +51,11 @@
#include <llvm/IR/Constants.h>
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
#define PTYPE(p) (llvm::cast<llvm::SequentialType>((p)->getType()->getScalarType())->getElementType())
#else // LLVM 4.0+
#define PTYPE(p) (llvm::cast<llvm::PointerType>((p)->getType()->getScalarType())->getElementType())
#endif
namespace llvm {
class PHINode;

View File

@@ -124,10 +124,15 @@
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Frontend/Utils.h>
#include <clang/Basic/TargetInfo.h>
#include <clang/Lex/PreprocessorOptions.h>
#include <llvm/Support/ToolOutputFile.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Bitcode/ReaderWriter.h>
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
#include <llvm/Bitcode/ReaderWriter.h>
#else
#include <llvm/Bitcode/BitcodeWriter.h>
#endif
/*! list of files encountered by the parser. this allows emitting of
the module file's dependencies via the -MMM option */
@@ -426,9 +431,7 @@ Module::Module(const char *fn) {
sprintf(producerString, "ispc version %s (built on %s)",
ISPC_VERSION, __DATE__);
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
diCompileUnit =
#endif // LLVM_3_4+
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3
diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */
name, /* filename */
directory, /* directory */
@@ -436,6 +439,25 @@ Module::Module(const char *fn) {
g->opt.level > 0 /* is optimized */,
"-g", /* command line args */
0 /* run time version */);
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.4-3.9
diCompileUnit =
diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */
name, /* filename */
directory, /* directory */
producerString, /* producer */
g->opt.level > 0 /* is optimized */,
"-g", /* command line args */
0 /* run time version */);
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 // LLVM 4.0+
auto srcFile = diBuilder->createFile(name, directory);
diCompileUnit =
diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */
srcFile, /* filename */
producerString, /* producer */
g->opt.level > 0 /* is optimized */,
"-g", /* command line args */
0 /* run time version */);
#endif
}
}
else
@@ -734,7 +756,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
sym->type->GetDIType(file),
(sym->storageClass == SC_STATIC),
sym_const_storagePtr);
#else // LLVM 3.7+
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
llvm::DIFile *file = pos.GetDIFile();
//llvm::MDFile *file = pos.GetDIFile();
llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
@@ -748,6 +770,20 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
sym->type->GetDIType(file),
(sym->storageClass == SC_STATIC),
sym_const_storagePtr);
#else // LLVM 4.0+
llvm::DIFile *file = pos.GetDIFile();
//llvm::MDFile *file = pos.GetDIFile();
llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
Assert(sym_GV_storagePtr);
llvm::DIGlobalVariableExpression *var = diBuilder->createGlobalVariableExpression(
file,
name,
name,
file,
pos.first_line,
sym->type->GetDIType(file),
(sym->storageClass == SC_STATIC));
sym_GV_storagePtr->addDebugInfo(var);
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
Assert(var.Verify());

49
opt.cpp
View File

@@ -503,8 +503,13 @@ DebugPassManager::add(llvm::Pass * P, int stage = -1) {
if (g->debug_stages.find(number) != g->debug_stages.end()) {
// adding dump of LLVM IR after optimization
char buf[100];
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
number, P->getPassName());
#else // LLVM 4.0+
sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
number, P->getPassName().data());
#endif
PM.add(CreateDebugPass(buf));
}
@@ -943,7 +948,11 @@ class IntrinsicsOpt : public llvm::BasicBlockPass {
public:
IntrinsicsOpt() : BasicBlockPass(ID) {};
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Intrinsics Cleanup Optimization"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Intrinsics Cleanup Optimization"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
static char ID;
@@ -1257,7 +1266,11 @@ public:
InstructionSimplifyPass()
: BasicBlockPass(ID) { }
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Vector Select Optimization"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Vector Select Optimization"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
static char ID;
@@ -1426,7 +1439,11 @@ public:
static char ID;
ImproveMemoryOpsPass() : BasicBlockPass(ID) { }
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Improve Memory Ops"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Improve Memory Ops"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
@@ -3278,7 +3295,11 @@ public:
static char ID;
GatherCoalescePass() : BasicBlockPass(ID) { }
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Gather Coalescing"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Gather Coalescing"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
@@ -4336,7 +4357,11 @@ public:
static char ID;
ReplacePseudoMemoryOpsPass() : BasicBlockPass(ID) { }
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Replace Pseudo Memory Ops"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Replace Pseudo Memory Ops"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
@@ -4705,7 +4730,11 @@ public:
isLastTry = last;
}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Resolve \"is compile time constant\""; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Resolve \"is compile time constant\""; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
bool isLastTry;
@@ -4800,7 +4829,11 @@ public:
sprintf(str_output, "%s", output);
}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Dump LLVM IR"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Dump LLVM IR"; }
#endif
bool runOnModule(llvm::Module &m);
private:
@@ -4846,7 +4879,11 @@ public:
AU.setPreservesCFG();
}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Make internal funcs \"static\""; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Make internal funcs \"static\""; }
#endif
bool runOnModule(llvm::Module &m);
};
@@ -4953,7 +4990,11 @@ class PeepholePass : public llvm::BasicBlockPass {
public:
PeepholePass();
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Peephole Optimizations"; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Peephole Optimizations"; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
static char ID;
@@ -5354,7 +5395,11 @@ public:
ReplaceStdlibShiftPass() : BasicBlockPass(ID) {
}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Resolve \"replace extract insert chains\""; }
#endif
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
@@ -5453,7 +5498,11 @@ public:
static char ID;
FixBooleanSelectPass() :FunctionPass(ID) {}
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
#else // LLVM 4.0+
llvm::StringRef getPassName() const { return "Resolve \"replace extract insert chains\""; }
#endif
bool runOnFunction(llvm::Function &F);
private:

189
prepro.py Normal file
View File

@@ -0,0 +1,189 @@
import re
import sys
def floating2float(function, idx):
typ = 'floating<' + str(idx) + '>'
return function.replace(typ, 'float')
def floating2double(function, idx):
typ = 'floating<' + str(idx) + '>'
return function.replace(typ, 'double')
def number2float(function, idx):
typ = 'number<' + str(idx) + '>'
return function.replace(typ, 'float')
def number2double(function, idx):
typ = 'number<' + str(idx) + '>'
return function.replace(typ, 'double')
def number2int(function, idx):
typ = 'number<' + str(idx) + '>'
return function.replace(typ, 'int')
def number2long(function, idx):
typ = 'number<' + str(idx) + '>'
return function.replace(typ, 'long')
def number2short(function, idx):
typ = 'number<' + str(idx) + '>'
return function.replace(typ, 'short')
def number2char(function, idx):
typ = 'number<' + str(idx) + '>'
return function.replace(typ, 'char')
def integer2int(function, idx):
typ = 'integer<' + str(idx) + '>'
return function.replace(typ, 'int')
def integer2long(function, idx):
typ = 'integer<' + str(idx) + '>'
return function.replace(typ, 'long')
def integer2short(function, idx):
typ = 'integer<' + str(idx) + '>'
return function.replace(typ, 'short')
def integer2char(function, idx):
typ = 'integer<' + str(idx) + '>'
return function.replace(typ, 'char')
def strip_types(fun, i):
return fun.replace('export', '').replace('void', '').replace('float', '') \
.replace('double', '').replace('char','').replace('short','') \
.replace('int','').replace('long','').replace('[]','') \
.replace('*','').replace('(','_' + str(i) + '(') \
.replace('uniform','')
if __name__ == '__main__':
if (len(sys.argv) != 2):
print 'usage:\n\tpython ',
print sys.argv[0],
print '[file.ispc]'
exit(1)
f = open(sys.argv[1], 'r')
function = f.read()
fun_def = re.findall(r'export [^{]*\)', function)[0]
print fun_def
f.close()
floating = 0
number = 0
integer = 0
while ('floating<' + str(floating) + '>' in function):
floating += 1
while ('number<' + str(number) + '>' in function):
number += 1
while ('integer<' + str(integer) + '>' in function):
integer += 1
for i in range(len(re.findall(r'floating[\s+]', function))):
function = function.replace('floating ',
'floating<' + str(floating) + '> ', 1)
fun_def = fun_def.replace('floating ',
'floating<' + str(floating) + '> ', 1)
floating += 1
for i in range(len(re.findall(r'number[\s+]', function))):
function = function.replace('number ',
'number<' + str(number) + '> ', 1)
fun_def = fun_def.replace('number ',
'number<' + str(number) + '> ', 1)
number += 1
for i in range(len(re.findall(r'integer[\s+]', function))):
function = function.replace('integer ',
'integer<' + str(integer) + '> ', 1)
fun_def = fun_def.replace('integer ',
'integer<' + str(integer) + '> ', 1)
integer += 1
floats = [[(fun_def, function)]]
for i in range(floating):
floats.append([])
for (h, f) in floats[i]:
floats[i+1].append((floating2float(h, i),
floating2float(f, i)
))
floats[i+1].append((floating2double(h, i),
floating2double(f, i)
))
numbers = [[]]
for f in floats[floating]:
numbers[0].append(f)
for i in range(number):
numbers.append([])
for (h, f) in numbers[i]:
numbers[i+1].append((number2float(h, i),
number2float(f, i)
))
numbers[i+1].append((number2double(h, i),
number2double(f, i)
))
numbers[i+1].append((number2int(h, i),
number2int(f, i)
))
numbers[i+1].append((number2long(h, i),
number2long(f, i)
))
numbers[i+1].append((number2short(h, i),
number2short(f, i)
))
numbers[i+1].append((number2char(h, i),
number2char(f, i)
))
integers = [[]]
for f in numbers[number]:
integers[0].append(f)
for i in range(integer):
integers.append([])
for (h, f) in integers[i]:
integers[i+1].append((integer2int(h, i),
integer2int(f, i)
))
integers[i+1].append((integer2long(h, i),
integer2long(f, i)
))
integers[i+1].append((integer2short(h, i),
integer2short(f, i)
))
integers[i+1].append((integer2char(h, i),
integer2char(f, i)
))
o = open(sys.argv[1] + '.pre.ispc', 'w')
hdr = open(sys.argv[1] + '.h', 'w')
hdr.write('#include "' + '.'.join(sys.argv[1].split('.')[:-1]) + '.h"\n\n')
hdr.write('#ifndef _' + sys.argv[1].upper().replace('.', '_') + '_H_\n')
hdr.write('#define _' + sys.argv[1].upper().replace('.', '_') + '_H_\n')
hdr.write('namespace ispc {\n')
fun_name = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*\(', function)[0][:-1]
i = 0
for (h, f) in integers[integer]:
o.write(f.replace(fun_name, fun_name+'_'+str(i), 1))
hdr.write(h.replace('uniform','').replace('export','') \
+ '\n{\n\treturn ' + strip_types(h, i) + ';\n}\n\n')
i += 1
o.close()
hdr.write('}\n')
hdr.write('#endif\n')
hdr.close()
exit(0)

View File

@@ -5,8 +5,8 @@ struct Foo { float x; float y; };
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
float a = aa[programIndex];
uniform Foo foo[programCount];
for (uniform int i = 0; i < programCount; ++i) {
uniform Foo foo[programCount+1];
for (uniform int i = 0; i < programCount+1; ++i) {
foo[i].x = i;
foo[i].y = -1234 + i;
}

View File

@@ -5,8 +5,8 @@ typedef int<3> int3;
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
float a = aa[programIndex];
uniform int3 array[programCount];
for (uniform int i = 0; i < programCount + 5 - b; ++i) {
uniform int3 array[programCount+1];
for (uniform int i = 0; i < programCount + 6 - b; ++i) {
for (uniform int j = 0; j < 3; ++j)
array[i][j] = i+100*j;
}

23
tests_ispcpp/hello.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include <stdlib.h>
#include <stdio.h>
#include "hello.ispc.h"
int main() {
float A[100];
float B[100];
double result[100];
for (int i=0; i<100; i++) {
A[i] = 100 - i;
B[i] = i*i;
}
ispc::saxpy(100, 3.1415926535, (float*)&A, (float*)&B, (double*)&result);
for (int i=0; i<100; i++) {
printf("%.6f\n", result[i]);
}
return 0;
}

11
tests_ispcpp/hello.ispc Normal file
View File

@@ -0,0 +1,11 @@
export void saxpy(uniform int N,
uniform floating<0> scale,
uniform floating<1> X[],
uniform floating<1> Y[],
uniform floating<2> result[])
{
foreach (i = 0 ... N) {
floating<2> tmp = scale * X[i] + Y[i];
result[i] = tmp;
}
}

View File

@@ -541,6 +541,8 @@ llvm::DIType *AtomicType::GetDIType(llvm::DIScope *scope) const {
#else //LLVM 3.7++
return NULL;
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
case TYPE_BOOL:
return m->diBuilder->createBasicType("bool", 32 /* size */, 32 /* align */,
llvm::dwarf::DW_ATE_unsigned);
@@ -585,6 +587,53 @@ llvm::DIType *AtomicType::GetDIType(llvm::DIScope *scope) const {
return m->diBuilder->createBasicType("uint64", 64 /* size */, 64 /* align */,
llvm::dwarf::DW_ATE_unsigned);
break;
#else // LLVM 4.0+
case TYPE_BOOL:
return m->diBuilder->createBasicType("bool", 32 /* size */,
llvm::dwarf::DW_ATE_unsigned);
break;
case TYPE_INT8:
return m->diBuilder->createBasicType("int8", 8 /* size */,
llvm::dwarf::DW_ATE_signed);
break;
case TYPE_UINT8:
return m->diBuilder->createBasicType("uint8", 8 /* size */,
llvm::dwarf::DW_ATE_unsigned);
break;
case TYPE_INT16:
return m->diBuilder->createBasicType("int16", 16 /* size */,
llvm::dwarf::DW_ATE_signed);
break;
case TYPE_UINT16:
return m->diBuilder->createBasicType("uint16", 16 /* size */,
llvm::dwarf::DW_ATE_unsigned);
break;
case TYPE_INT32:
return m->diBuilder->createBasicType("int32", 32 /* size */,
llvm::dwarf::DW_ATE_signed);
break;
case TYPE_UINT32:
return m->diBuilder->createBasicType("uint32", 32 /* size */,
llvm::dwarf::DW_ATE_unsigned);
break;
case TYPE_FLOAT:
return m->diBuilder->createBasicType("float", 32 /* size */,
llvm::dwarf::DW_ATE_float);
break;
case TYPE_DOUBLE:
return m->diBuilder->createBasicType("double", 64 /* size */,
llvm::dwarf::DW_ATE_float);
break;
case TYPE_INT64:
return m->diBuilder->createBasicType("int64", 64 /* size */,
llvm::dwarf::DW_ATE_signed);
break;
case TYPE_UINT64:
return m->diBuilder->createBasicType("uint64", 64 /* size */,
llvm::dwarf::DW_ATE_unsigned);
break;
#endif
default:
FATAL("unhandled basic type in AtomicType::GetDIType()");
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
@@ -2307,9 +2356,15 @@ llvm::DIType *StructType::GetDIType(llvm::DIScope *scope) const {
llvm::DIFile *diFile = elementPositions[i].GetDIFile();
llvm::DIDerivedType *fieldType =
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
m->diBuilder->createMemberType(scope, elementNames[i], diFile,
line, eltSize, eltAlign,
currentSize, 0, eltType);
#else // LLVM 4.0+
m->diBuilder->createMemberType(scope, elementNames[i], diFile,
line, eltSize, eltAlign,
currentSize, llvm::DINode::FlagZero, eltType);
#endif
elementLLVMTypes.push_back(fieldType);
currentSize += eltSize;
@@ -2334,7 +2389,11 @@ llvm::DIType *StructType::GetDIType(llvm::DIScope *scope) const {
pos.first_line, // Line number
currentSize, // Size in bits
align, // Alignment in bits
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
0, // Flags
#else // LLVM 4.0+
llvm::DINode::FlagZero, // Flags
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
llvm::DIType(), // DerivedFrom
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7++
@@ -2584,7 +2643,11 @@ llvm::DIType *UndefinedStructType::GetDIType(llvm::DIScope *scope) const {
pos.first_line, // Line number
0, // Size
0, // Align
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
0, // Flags
#else // LLVM 4.0+
llvm::DINode::FlagZero, // Flags
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
llvm::DIType(), // DerivedFrom
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+