Compare commits
39 Commits
v1.9.1
...
concept-ch
| Author | SHA1 | Date | |
|---|---|---|---|
| 4182fa2967 | |||
| d6cf38a929 | |||
|
|
8c97883317 | ||
|
|
455a29c491 | ||
|
|
a618ad45bf | ||
|
|
0ff8ae4596 | ||
|
|
a5b689439b | ||
|
|
f9947541a1 | ||
|
|
c2b2b38081 | ||
|
|
7884c7da04 | ||
|
|
b471e97a10 | ||
|
|
611fe0bc42 | ||
|
|
6d649e1dff | ||
|
|
d0bfe7738a | ||
|
|
95d33554db | ||
|
|
4298e3d0cd | ||
|
|
a7fd70fa21 | ||
|
|
60dc47e0a6 | ||
|
|
ff298f21b7 | ||
|
|
f04a04a7e3 | ||
|
|
39e7f0c2d4 | ||
|
|
726b260cd5 | ||
|
|
6a8ce4b412 | ||
|
|
32626ea9e3 | ||
|
|
d4a8afd6e8 | ||
|
|
8acfd92f92 | ||
|
|
7fb4188f51 | ||
|
|
8b525bb8bc | ||
|
|
a86a16600b | ||
|
|
d0341754d6 | ||
|
|
f968bc1b2a | ||
|
|
7af7659ac2 | ||
|
|
a6952fd651 | ||
|
|
4c7fb35f57 | ||
|
|
87efb27dc5 | ||
|
|
45b306480e | ||
|
|
2a68fc6c48 | ||
|
|
30d88e1683 | ||
|
|
a97a69c96e |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -4,11 +4,15 @@ depend
|
||||
ispc
|
||||
ispc_test
|
||||
ispc_ref
|
||||
llvm/
|
||||
objs
|
||||
docs/doxygen
|
||||
docs/*.html
|
||||
tests*/*cpp
|
||||
tests*/*run
|
||||
tests*/*.o
|
||||
tests_ispcpp/*.h
|
||||
tests_ispcpp/*pre*
|
||||
logs/
|
||||
notify_log.log
|
||||
alloy_results_*
|
||||
|
||||
165
alloy.py
165
alloy.py
@@ -33,6 +33,8 @@
|
||||
|
||||
# // Author: Filippov Ilia
|
||||
|
||||
import re
|
||||
|
||||
def tail_and_save(file_in, file_out, tail = 100):
|
||||
with open(file_in, 'r') as f_in:
|
||||
lines = f_in.readlines()[-tail:]
|
||||
@@ -91,6 +93,7 @@ def check_LLVM(which_LLVM):
|
||||
return answer
|
||||
|
||||
def try_do_LLVM(text, command, from_validation):
|
||||
print_debug("Command line: "+command+"\n", True, alloy_build)
|
||||
if from_validation == True:
|
||||
text = text + "\n"
|
||||
print_debug("Trying to " + text, from_validation, alloy_build)
|
||||
@@ -108,7 +111,77 @@ def try_do_LLVM(text, command, from_validation):
|
||||
error("can't " + text, 1)
|
||||
print_debug("DONE.\n", from_validation, alloy_build)
|
||||
|
||||
def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, from_validation, force, make, gcc_toolchain_path):
|
||||
def checkout_LLVM(component, use_git, version_LLVM, revision, target_dir, from_validation):
|
||||
# Identify the component
|
||||
GIT_REPO_BASE="http://llvm.org/git/"
|
||||
#GIT_REPO_BASE="https://github.com/llvm-mirror/"
|
||||
if component == "llvm":
|
||||
SVN_REPO="http://llvm.org/svn/llvm-project/llvm/"
|
||||
GIT_REPO=GIT_REPO_BASE+"llvm.git"
|
||||
elif component == "clang":
|
||||
SVN_REPO="http://llvm.org/svn/llvm-project/cfe/"
|
||||
GIT_REPO=GIT_REPO_BASE+"clang.git"
|
||||
elif component == "libcxx":
|
||||
SVN_REPO="http://llvm.org/svn/llvm-project/libcxx/"
|
||||
GIT_REPO=GIT_REPO_BASE+"libcxx.git"
|
||||
elif component == "clang-tools-extra":
|
||||
SVN_REPO="http://llvm.org/svn/llvm-project/clang-tools-extra/"
|
||||
GIT_REPO=GIT_REPO_BASE+"clang-tools-extra.git"
|
||||
elif component == "compiler-rt":
|
||||
SVN_REPO="http://llvm.org/svn/llvm-project/compiler-rt/"
|
||||
GIT_REPO=GIT_REPO_BASE+"compiler-rt.git"
|
||||
else:
|
||||
error("Trying to checkout unidentified component: " + component, 1)
|
||||
|
||||
# Identify the version
|
||||
if version_LLVM == "trunk":
|
||||
SVN_PATH="trunk"
|
||||
GIT_BRANCH="master"
|
||||
elif version_LLVM == "4_0":
|
||||
SVN_PATH="branches/release_40"
|
||||
GIT_BRANCH="release_40"
|
||||
elif version_LLVM == "3_9":
|
||||
SVN_PATH="tags/RELEASE_390/final"
|
||||
GIT_BRANCH="release_39"
|
||||
elif version_LLVM == "3_8":
|
||||
SVN_PATH="tags/RELEASE_381/final"
|
||||
GIT_BRANCH="release_38"
|
||||
elif version_LLVM == "3_7":
|
||||
SVN_PATH="tags/RELEASE_370/final"
|
||||
GIT_BRANCH="release_37"
|
||||
elif version_LLVM == "3_6":
|
||||
SVN_PATH="tags/RELEASE_362/final"
|
||||
GIT_BRANCH="release_36"
|
||||
elif version_LLVM == "3_5":
|
||||
SVN_PATH="tags/RELEASE_351/final"
|
||||
GIT_BRANCH="release_35"
|
||||
elif version_LLVM == "3_4":
|
||||
SVN_PATH="tags/RELEASE_34/dot2-final"
|
||||
GIT_BRANCH="release_34"
|
||||
elif version_LLVM == "3_3":
|
||||
SVN_PATH="tags/RELEASE_33/final"
|
||||
GIT_BRANCH="release_33"
|
||||
elif version_LLVM == "3_2":
|
||||
SVN_PATH="tags/RELEASE_32/final"
|
||||
GIT_BRANCH="release_32"
|
||||
else:
|
||||
error("Unsupported llvm version: " + version_LLVM, 1)
|
||||
|
||||
if use_git:
|
||||
try_do_LLVM("clone "+component+" from "+GIT_REPO+" to "+target_dir+" ",
|
||||
"git clone "+GIT_REPO+" "+target_dir,
|
||||
from_validation)
|
||||
if GIT_BRANCH != "master":
|
||||
os.chdir(target_dir)
|
||||
try_do_LLVM("switch to "+GIT_BRANCH+" branch ",
|
||||
"git checkout -b "+GIT_BRANCH+" remotes/origin/"+GIT_BRANCH, from_validation)
|
||||
os.chdir("..")
|
||||
else:
|
||||
try_do_LLVM("load "+component+" from "+SVN_REPO+SVN_PATH+" ",
|
||||
"svn co "+revision+" "+SVN_REPO+SVN_PATH+" "+target_dir,
|
||||
from_validation)
|
||||
|
||||
def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, from_validation, force, make, gcc_toolchain_path, use_git):
|
||||
print_debug("Building LLVM. Version: " + version_LLVM + ". ", from_validation, alloy_build)
|
||||
if revision != "":
|
||||
print_debug("Revision: " + revision + ".\n", from_validation, alloy_build)
|
||||
@@ -119,32 +192,11 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
llvm_home = os.environ["LLVM_HOME"]
|
||||
|
||||
make_sure_dir_exists(llvm_home)
|
||||
|
||||
FOLDER_NAME=version_LLVM
|
||||
version_LLVM = re.sub('\.', '_', version_LLVM)
|
||||
|
||||
os.chdir(llvm_home)
|
||||
FOLDER_NAME=version_LLVM
|
||||
if version_LLVM == "trunk":
|
||||
SVN_PATH="trunk"
|
||||
if version_LLVM == "3.8":
|
||||
SVN_PATH="tags/RELEASE_380/final"
|
||||
version_LLVM = "3_8"
|
||||
if version_LLVM == "3.7":
|
||||
SVN_PATH="tags/RELEASE_370/final"
|
||||
version_LLVM = "3_7"
|
||||
if version_LLVM == "3.6":
|
||||
SVN_PATH="tags/RELEASE_362/final"
|
||||
version_LLVM = "3_6"
|
||||
if version_LLVM == "3.5":
|
||||
SVN_PATH="tags/RELEASE_351/final"
|
||||
version_LLVM = "3_5"
|
||||
if version_LLVM == "3.4":
|
||||
SVN_PATH="tags/RELEASE_34/dot2-final"
|
||||
version_LLVM = "3_4"
|
||||
if version_LLVM == "3.3":
|
||||
SVN_PATH="tags/RELEASE_33/final"
|
||||
version_LLVM = "3_3"
|
||||
if version_LLVM == "3.2":
|
||||
SVN_PATH="tags/RELEASE_32/final"
|
||||
version_LLVM = "3_2"
|
||||
if revision != "":
|
||||
FOLDER_NAME = FOLDER_NAME + "_" + revision
|
||||
revision = "-" + revision
|
||||
@@ -173,7 +225,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
if os.path.exists(os.path.join(path, "xcrun")):
|
||||
found_xcrun = True
|
||||
if found_xcrun:
|
||||
mac_system_root = " --with-default-sysroot=`xcrun --show-sdk-path`"
|
||||
mac_system_root = "`xcrun --show-sdk-path`"
|
||||
else:
|
||||
error("Can't find XCode (xcrun tool) - it's required on MacOS 10.9 and newer", 1)
|
||||
|
||||
@@ -184,13 +236,9 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
llvm_home + "\n", from_validation, alloy_build)
|
||||
# load llvm
|
||||
if tarball == "":
|
||||
try_do_LLVM("load LLVM from http://llvm.org/svn/llvm-project/llvm/" + SVN_PATH + " ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/llvm/" + SVN_PATH + " " + LLVM_SRC,
|
||||
from_validation)
|
||||
checkout_LLVM("llvm", options.use_git, version_LLVM, revision, LLVM_SRC, from_validation)
|
||||
os.chdir(LLVM_SRC + "/tools")
|
||||
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
|
||||
from_validation)
|
||||
checkout_LLVM("clang", options.use_git, version_LLVM, revision, "clang", from_validation)
|
||||
os.chdir("..")
|
||||
if current_OS == "MacOS" and int(current_OS_version.split(".")[0]) >= 13:
|
||||
# Starting with MacOS 10.9 Maverics, the system doesn't contain headers for standard C++ library and
|
||||
@@ -202,19 +250,13 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
# to the linker explicitly (either through command line or environment variables). So we are not doing it
|
||||
# currently to make the build process easier.
|
||||
os.chdir("projects")
|
||||
try_do_LLVM("load libcxx http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " libcxx",
|
||||
from_validation)
|
||||
checkout_LLVM("libcxx", options.use_git, version_LLVM, revision, "libcxx", from_validation)
|
||||
os.chdir("..")
|
||||
if extra == True:
|
||||
os.chdir("tools/clang/tools")
|
||||
try_do_LLVM("load extra clang extra tools ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
|
||||
from_validation)
|
||||
checkout_LLVM("clang-tools-extra", options.use_git, version_LLVM, revision, "extra", from_validation)
|
||||
os.chdir("../../../projects")
|
||||
try_do_LLVM("load extra clang compiler-rt ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
|
||||
from_validation)
|
||||
checkout_LLVM("compiler-rt", options.use_git, version_LLVM, revision, "compiler-rt", from_validation)
|
||||
os.chdir("..")
|
||||
else:
|
||||
tar = tarball.split(" ")
|
||||
@@ -249,7 +291,6 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
os.makedirs(LLVM_BIN_selfbuild)
|
||||
os.chdir(LLVM_BUILD_selfbuild)
|
||||
if version_LLVM not in LLVM_configure_capable:
|
||||
# TODO: mac_root
|
||||
try_do_LLVM("configure release version for selfbuild ",
|
||||
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
||||
" -DCMAKE_INSTALL_PREFIX=" + llvm_home + "/" + LLVM_BIN_selfbuild +
|
||||
@@ -258,6 +299,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
((" -DGCC_INSTALL_PREFIX=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
|
||||
((" -DCMAKE_C_COMPILER=" + gcc_toolchain_path+"/bin/gcc") if gcc_toolchain_path != "" else "") +
|
||||
((" -DCMAKE_CXX_COMPILER=" + gcc_toolchain_path+"/bin/g++") if gcc_toolchain_path != "" else "") +
|
||||
((" -DDEFAULT_SYSROOT=" + mac_system_root) if mac_system_root != "" else "") +
|
||||
" -DLLVM_TARGETS_TO_BUILD=NVPTX\;X86" +
|
||||
" ../" + LLVM_SRC,
|
||||
from_validation)
|
||||
@@ -269,7 +311,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
LLVM_BIN_selfbuild + " --enable-optimized" +
|
||||
" --enable-targets=x86,x86_64,nvptx" +
|
||||
((" --with-gcc-toolchain=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
|
||||
mac_system_root,
|
||||
((" --with-default-sysroot=" + mac_system_root) if mac_system_root != "" else ""),
|
||||
from_validation)
|
||||
selfbuild_compiler = ("CC=" +llvm_home+ "/" + LLVM_BIN_selfbuild + "/bin/clang " +
|
||||
"CXX="+llvm_home+ "/" + LLVM_BIN_selfbuild + "/bin/clang++ ")
|
||||
@@ -285,7 +327,6 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
if debug == False:
|
||||
if current_OS != "Windows":
|
||||
if version_LLVM not in LLVM_configure_capable:
|
||||
# TODO: mac_root
|
||||
try_do_LLVM("configure release version ",
|
||||
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
||||
selfbuild_compiler +
|
||||
@@ -295,6 +336,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
((" -DGCC_INSTALL_PREFIX=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
|
||||
((" -DCMAKE_C_COMPILER=" + gcc_toolchain_path+"/bin/gcc") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
|
||||
((" -DCMAKE_CXX_COMPILER=" + gcc_toolchain_path+"/bin/g++") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
|
||||
((" -DDEFAULT_SYSROOT=" + mac_system_root) if mac_system_root != "" else "") +
|
||||
" -DLLVM_TARGETS_TO_BUILD=NVPTX\;X86" +
|
||||
" ../" + LLVM_SRC,
|
||||
from_validation)
|
||||
@@ -304,7 +346,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
LLVM_BIN + " --enable-optimized" +
|
||||
" --enable-targets=x86,x86_64,nvptx" +
|
||||
((" --with-gcc-toolchain=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
|
||||
mac_system_root,
|
||||
((" --with-default-sysroot=" + mac_system_root) if mac_system_root != "" else ""),
|
||||
from_validation)
|
||||
else:
|
||||
try_do_LLVM("configure release version ",
|
||||
@@ -313,7 +355,6 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
from_validation)
|
||||
else:
|
||||
if version_LLVM not in LLVM_configure_capable:
|
||||
# TODO: mac_root
|
||||
try_do_LLVM("configure debug version ",
|
||||
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
||||
selfbuild_compiler +
|
||||
@@ -323,6 +364,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
((" -DGCC_INSTALL_PREFIX=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
|
||||
((" -DCMAKE_C_COMPILER=" + gcc_toolchain_path+"/bin/gcc") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
|
||||
((" -DCMAKE_CXX_COMPILER=" + gcc_toolchain_path+"/bin/g++") if gcc_toolchain_path != "" and selfbuild_compiler == "" else "") +
|
||||
((" -DDEFAULT_SYSROOT=" + mac_system_root) if mac_system_root != "" else "") +
|
||||
" -DLLVM_TARGETS_TO_BUILD=NVPTX\;X86" +
|
||||
" ../" + LLVM_SRC,
|
||||
from_validation)
|
||||
@@ -332,7 +374,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
" --enable-debug-runtime --enable-debug-symbols --enable-keep-symbols" +
|
||||
" --enable-targets=x86,x86_64,nvptx" +
|
||||
((" --with-gcc-toolchain=" + gcc_toolchain_path) if gcc_toolchain_path != "" else "") +
|
||||
mac_system_root,
|
||||
((" --with-default-sysroot=" + mac_system_root) if mac_system_root != "" else ""),
|
||||
from_validation)
|
||||
# building llvm
|
||||
if current_OS != "Windows":
|
||||
@@ -352,6 +394,8 @@ def unsupported_llvm_targets(LLVM_VERSION):
|
||||
"3.7":["avx512skx-i32x16"],
|
||||
"3.8":[],
|
||||
"3.9":[],
|
||||
"4.0":[],
|
||||
"5.0":[],
|
||||
"trunk":[]}
|
||||
return prohibited_list[LLVM_VERSION]
|
||||
|
||||
@@ -476,8 +520,12 @@ def build_ispc(version_LLVM, make):
|
||||
temp = "3_7"
|
||||
if version_LLVM == "3.8":
|
||||
temp = "3_8"
|
||||
if version_LLVM == "trunk":
|
||||
if version_LLVM == "3.9":
|
||||
temp = "3_9"
|
||||
if version_LLVM == "4.0":
|
||||
temp = "4_0"
|
||||
if version_LLVM == "trunk":
|
||||
temp = "5_0"
|
||||
os.environ["LLVM_VERSION"] = "LLVM_" + temp
|
||||
try_do_LLVM("clean ISPC for building", "msbuild ispc.vcxproj /t:clean", True)
|
||||
try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", "msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", True)
|
||||
@@ -617,7 +665,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
archs.append("x86-64")
|
||||
if "native" in only:
|
||||
sde_targets_t = []
|
||||
for i in ["3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8", "trunk"]:
|
||||
for i in ["3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8", "3.9", "4.0", "trunk"]:
|
||||
if i in only:
|
||||
LLVM.append(i)
|
||||
if "current" in only:
|
||||
@@ -675,7 +723,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
gen_archs = ["x86-64"]
|
||||
need_LLVM = check_LLVM(LLVM)
|
||||
for i in range(0,len(need_LLVM)):
|
||||
build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path)
|
||||
build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path, False)
|
||||
# begin validation run for stabitily
|
||||
common.remove_if_exists(stability.in_file)
|
||||
R = [[[],[]],[[],[]],[[],[]],[[],[]]]
|
||||
@@ -789,7 +837,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
# prepare newest LLVM
|
||||
need_LLVM = check_LLVM([newest_LLVM])
|
||||
if len(need_LLVM) != 0:
|
||||
build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path)
|
||||
build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make, options.gcc_toolchain_path, options.use_git)
|
||||
if perf_llvm == False:
|
||||
# prepare reference point. build both test and reference compilers
|
||||
try_do_LLVM("apply git", "git branch", True)
|
||||
@@ -903,7 +951,7 @@ def Main():
|
||||
if os.environ.get("SMTP_ISPC") == None:
|
||||
error("you have no SMTP_ISPC in your environment for option notify", 1)
|
||||
if options.only != "":
|
||||
test_only_r = " 3.2 3.3 3.4 3.5 3.6 3.7 3.8 trunk current build stability performance x86 x86-64 x86_64 -O0 -O2 native debug nodebug "
|
||||
test_only_r = " 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 trunk current build stability performance x86 x86-64 x86_64 -O0 -O2 native debug nodebug "
|
||||
test_only = options.only.split(" ")
|
||||
for iterator in test_only:
|
||||
if not (" " + iterator + " " in test_only_r):
|
||||
@@ -929,11 +977,14 @@ def Main():
|
||||
if options.perf_llvm == True:
|
||||
if options.branch == "master":
|
||||
options.branch = "trunk"
|
||||
if options.use_git and options.revision != "":
|
||||
error("--revision is not supported with --git", 1)
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
if options.build_llvm:
|
||||
build_LLVM(options.version, options.revision, options.folder, options.tarball,
|
||||
options.debug, options.selfbuild, options.extra, False, options.force, make, options.gcc_toolchain_path)
|
||||
options.debug, options.selfbuild, options.extra, False, options.force, make, options.gcc_toolchain_path, options.use_git)
|
||||
if options.validation_run:
|
||||
validation_run(options.only, options.only_targets, options.branch,
|
||||
options.number_for_performance, options.notify, options.update, int(options.speed),
|
||||
@@ -1013,13 +1064,13 @@ if __name__ == '__main__':
|
||||
llvm_group = OptionGroup(parser, "Options for building LLVM",
|
||||
"These options must be used with -b option.")
|
||||
llvm_group.add_option('--version', dest='version',
|
||||
help='version of llvm to build: 3.2 3.3 3.4 3.5 3.6 3.7 3.8 trunk. Default: trunk', default="trunk")
|
||||
help='version of llvm to build: 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 trunk. Default: trunk', default="trunk")
|
||||
llvm_group.add_option('--with-gcc-toolchain', dest='gcc_toolchain_path',
|
||||
help='GCC install dir to use when building clang. It is important to set when ' +
|
||||
'you have alternative gcc installation. Note that otherwise gcc from standard ' +
|
||||
'location will be used, not from your PATH', default="")
|
||||
llvm_group.add_option('--revision', dest='revision',
|
||||
help='revision of llvm to build in format r172870', default="")
|
||||
help='revision of llvm to build in format r172870 (not supported with --git)', default="")
|
||||
llvm_group.add_option('--debug', dest='debug',
|
||||
help='debug build of LLVM?', default=False, action="store_true")
|
||||
llvm_group.add_option('--folder', dest='folder',
|
||||
@@ -1032,6 +1083,8 @@ if __name__ == '__main__':
|
||||
help='rebuild LLVM', default=False, action='store_true')
|
||||
llvm_group.add_option('--extra', dest='extra',
|
||||
help='load extra clang tools', default=False, action='store_true')
|
||||
llvm_group.add_option('--git', dest='use_git',
|
||||
help='use git llvm repository instead of svn', default=False, action='store_true')
|
||||
parser.add_option_group(llvm_group)
|
||||
# options for activity "validation run"
|
||||
run_group = OptionGroup(parser, "Options for validation run",
|
||||
@@ -1054,7 +1107,7 @@ if __name__ == '__main__':
|
||||
run_group.add_option('--only', dest='only',
|
||||
help='set types of tests. Possible values:\n' +
|
||||
'-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance),\n' +
|
||||
'build (only build with different LLVM), 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, trunk, native (do not use SDE),\n' +
|
||||
'build (only build with different LLVM), 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, trunk, native (do not use SDE),\n' +
|
||||
'current (do not rebuild ISPC), debug (only with debug info), nodebug (only without debug info, default).',
|
||||
default="")
|
||||
run_group.add_option('--perf_LLVM', dest='perf_llvm',
|
||||
|
||||
42
builtins.cpp
42
builtins.cpp
@@ -72,7 +72,11 @@
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/ADT/Triple.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#else
|
||||
#include <llvm/Bitcode/BitcodeReader.h>
|
||||
#endif
|
||||
|
||||
extern int yyparse();
|
||||
struct yy_buffer_state;
|
||||
@@ -800,7 +804,13 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
llvm::MemoryBufferRef bcBuf = llvm::MemoryBuffer::getMemBuffer(sb)->getMemBufferRef();
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 // LLVM 4.0+
|
||||
llvm::Expected<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
|
||||
if (!ModuleOrErr) {
|
||||
Error(SourcePos(), "Error parsing stdlib bitcode: %s", toString(ModuleOrErr.takeError()).c_str());
|
||||
} else {
|
||||
llvm::Module *bcModule = ModuleOrErr.get().release();
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr = llvm::parseBitcodeFile(bcBuf, *g->ctx);
|
||||
if (std::error_code EC = ModuleOrErr.getError())
|
||||
Error(SourcePos(), "Error parsing stdlib bitcode: %s", EC.message().c_str());
|
||||
@@ -989,7 +999,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
||||
diType,
|
||||
true /* static */,
|
||||
sym_const_storagePtr);
|
||||
#else // LLVM 3.7+
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
|
||||
llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
|
||||
Assert(sym_const_storagePtr);
|
||||
m->diBuilder->createGlobalVariable(
|
||||
@@ -1001,6 +1011,17 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
||||
diType,
|
||||
true /* static */,
|
||||
sym_const_storagePtr);
|
||||
#else // LLVM 4.0+
|
||||
llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
|
||||
llvm::DIGlobalVariableExpression *var = m->diBuilder->createGlobalVariableExpression(
|
||||
file,
|
||||
name,
|
||||
name,
|
||||
file,
|
||||
0 /* line */,
|
||||
diType,
|
||||
true /* static */);
|
||||
sym_GV_storagePtr->addDebugInfo(var);
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
Assert(var.Verify());
|
||||
@@ -1087,7 +1108,7 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
diType,
|
||||
false /* static */,
|
||||
sym->storagePtr);
|
||||
#else // LLVM 3.7+
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
|
||||
llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
|
||||
Assert(sym_const_storagePtr);
|
||||
m->diBuilder->createGlobalVariable(
|
||||
@@ -1099,7 +1120,18 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
diType,
|
||||
false /* static */,
|
||||
sym_const_storagePtr);
|
||||
#endif
|
||||
#else // LLVM 4.0+
|
||||
llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
|
||||
llvm::DIGlobalVariableExpression *var = m->diBuilder->createGlobalVariableExpression(
|
||||
file,
|
||||
sym->name.c_str(),
|
||||
sym->name.c_str(),
|
||||
file,
|
||||
0 /* line */,
|
||||
diType,
|
||||
false /* static */);
|
||||
sym_GV_storagePtr->addDebugInfo(var);
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
Assert(var.Verify());
|
||||
#else // LLVM 3.7+
|
||||
|
||||
@@ -151,6 +151,10 @@ define(`PTR_OP_ARGS',
|
||||
LLVM_VERSION, LLVM_3_8,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
``$1 , $1 *'',
|
||||
``$1 *''
|
||||
)
|
||||
|
||||
@@ -617,16 +617,16 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||
}
|
||||
ctlztz()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
;; TODO: should we use masked versions of SVML functions?
|
||||
;; svml
|
||||
|
||||
include(`svml.m4')
|
||||
svml_stubs(float,f,WIDTH)
|
||||
svml_stubs(double,d,WIDTH)
|
||||
svml_declare(float,f16,16)
|
||||
svml_define(float,f16,16,f)
|
||||
|
||||
;; double precision
|
||||
svml_declare(double,8,8)
|
||||
svml_define_x(double,8,8,d,16)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -36,6 +36,10 @@ ifelse(LLVM_VERSION, LLVM_3_7,
|
||||
LLVM_VERSION, LLVM_3_8,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
`include(`target-avx512-common.ll')'
|
||||
)
|
||||
|
||||
@@ -60,6 +64,10 @@ ifelse(LLVM_VERSION, LLVM_3_7,
|
||||
LLVM_VERSION, LLVM_3_8,
|
||||
rcp_rsqrt_varying_float_knl(),
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
rcp_rsqrt_varying_float_knl(),
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
rcp_rsqrt_varying_float_knl(),
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
rcp_rsqrt_varying_float_knl()
|
||||
)
|
||||
|
||||
|
||||
@@ -42,12 +42,12 @@ include(`target-neon-common.ll')
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
|
||||
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone alwaysinline {
|
||||
unary4to8conv(r, i16, float, @llvm.arm.neon.vcvthf2fp, %v)
|
||||
ret <8 x float> %r
|
||||
}
|
||||
|
||||
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
|
||||
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone alwaysinline {
|
||||
unary4to8conv(r, float, i16, @llvm.arm.neon.vcvtfp2hf, %v)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
@@ -115,13 +115,13 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
|
||||
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__max_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone {
|
||||
<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(r, float, @llvm.arm.neon.vmaxs.v4f32, %0, %1)
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
|
||||
define <WIDTH x float> @__min_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone {
|
||||
<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(r, float, @llvm.arm.neon.vmins.v4f32, %0, %1)
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
@@ -131,22 +131,22 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to8(r, i32, @llvm.arm.neon.vmins.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to8(r, i32, @llvm.arm.neon.vmaxs.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to8(r, i32, @llvm.arm.neon.vminu.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to8(r, i32, @llvm.arm.neon.vmaxu.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
@@ -156,7 +156,7 @@ define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwin
|
||||
declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone {
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
|
||||
unary4to8(x0, float, @llvm.arm.neon.vrecpe.v4f32, %d)
|
||||
binary4to8(x0_nr, float, @llvm.arm.neon.vrecps.v4f32, %d, %x0)
|
||||
%x1 = fmul <WIDTH x float> %x0, %x0_nr
|
||||
@@ -168,7 +168,7 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnon
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone {
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
|
||||
unary4to8(x0, float, @llvm.arm.neon.vrsqrte.v4f32, %d)
|
||||
%x0_2 = fmul <WIDTH x float> %x0, %x0
|
||||
binary4to8(x0_nr, float, @llvm.arm.neon.vrsqrts.v4f32, %d, %x0_2)
|
||||
@@ -179,7 +179,7 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readn
|
||||
ret <WIDTH x float> %x2
|
||||
}
|
||||
|
||||
define float @__rsqrt_uniform_float(float) nounwind readnone {
|
||||
define float @__rsqrt_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %0 to <1 x float>
|
||||
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
@@ -189,7 +189,7 @@ define float @__rsqrt_uniform_float(float) nounwind readnone {
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__rcp_uniform_float(float) nounwind readnone {
|
||||
define float @__rcp_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %0 to <1 x float>
|
||||
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
@@ -201,7 +201,7 @@ define float @__rcp_uniform_float(float) nounwind readnone {
|
||||
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone {
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(result, float, @llvm.sqrt.v4f32, %0)
|
||||
;; this returns nan for v=0, which is undesirable..
|
||||
;; %rsqrt = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %0)
|
||||
@@ -211,7 +211,7 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
||||
|
||||
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone {
|
||||
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
unary4to8(r, double, @llvm.sqrt.v4f64, %0)
|
||||
ret <WIDTH x double> %r
|
||||
}
|
||||
@@ -219,7 +219,7 @@ define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readno
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone {
|
||||
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone alwaysinline {
|
||||
%and_mask = and <WIDTH x i16> %0,
|
||||
<i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128>
|
||||
%v4 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %and_mask)
|
||||
@@ -288,48 +288,48 @@ define(`neon_reduce', `
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @add_f32(float, float) {
|
||||
define internal float @add_f32(float, float) nounwind readnone alwaysinline {
|
||||
%r = fadd float %0, %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) {
|
||||
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone alwaysinline {
|
||||
%r = fadd <WIDTH x float> %0, %1
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
|
||||
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone {
|
||||
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @__add_varying_float, @llvm.arm.neon.vpadd.v2f32, @add_f32)
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @min_f32(float, float) {
|
||||
define internal float @min_f32(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp olt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone {
|
||||
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @__min_varying_float, @llvm.arm.neon.vpmins.v2f32, @min_f32)
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @max_f32(float, float) {
|
||||
define internal float @max_f32(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp ugt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone {
|
||||
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @__max_varying_float, @llvm.arm.neon.vpmaxs.v2f32, @max_f32)
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
|
||||
declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
|
||||
|
||||
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
|
||||
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone alwaysinline {
|
||||
%a16 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %0)
|
||||
%a32 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a16)
|
||||
%a0 = extractelement <2 x i32> %a32, i32 0
|
||||
@@ -341,7 +341,7 @@ define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<WIDTH x i16>)
|
||||
|
||||
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
|
||||
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone alwaysinline {
|
||||
%a1 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<WIDTH x i16> %0)
|
||||
%a2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a1)
|
||||
%aa = extractelement <2 x i64> %a2, i32 0
|
||||
@@ -352,7 +352,7 @@ define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
|
||||
|
||||
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
v8tov4(i32, %0, %va, %vb)
|
||||
%pa = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %va)
|
||||
%pb = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %vb)
|
||||
@@ -365,53 +365,53 @@ define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @min_si32(i32, i32) {
|
||||
define internal i32 @min_si32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp slt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__min_varying_int32, @llvm.arm.neon.vpmins.v2i32, @min_si32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @max_si32(i32, i32) {
|
||||
define internal i32 @max_si32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp sgt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__max_varying_int32, @llvm.arm.neon.vpmaxs.v2i32, @max_si32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @min_ui32(i32, i32) {
|
||||
define internal i32 @min_ui32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ult i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__min_varying_uint32, @llvm.arm.neon.vpmins.v2i32, @min_ui32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @max_ui32(i32, i32) {
|
||||
define internal i32 @max_ui32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ugt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__max_varying_uint32, @llvm.arm.neon.vpmaxs.v2i32, @max_ui32)
|
||||
}
|
||||
|
||||
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone {
|
||||
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
v8tov2(double, %0, %v0, %v1, %v2, %v3)
|
||||
%v01 = fadd <2 x double> %v0, %v1
|
||||
%v23 = fadd <2 x double> %v2, %v3
|
||||
@@ -422,15 +422,15 @@ define double @__reduce_add_double(<WIDTH x double>) nounwind readnone {
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone {
|
||||
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
||||
}
|
||||
|
||||
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone {
|
||||
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
v8tov2(i64, %0, %v0, %v1, %v2, %v3)
|
||||
%v01 = add <2 x i64> %v0, %v1
|
||||
%v23 = add <2 x i64> %v2, %v3
|
||||
@@ -441,19 +441,19 @@ define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone {
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||
}
|
||||
|
||||
@@ -462,56 +462,56 @@ define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i8> @__avg_up_uint8(<8 x i8>, <8 x i8>) nounwind readnone {
|
||||
define <8 x i8> @__avg_up_uint8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %0, <8 x i8> %1)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i8> @__avg_up_int8(<8 x i8>, <8 x i8>) nounwind readnone {
|
||||
define <8 x i8> @__avg_up_int8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %0, <8 x i8> %1)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i8> @__avg_down_uint8(<8 x i8>, <8 x i8>) nounwind readnone {
|
||||
define <8 x i8> @__avg_down_uint8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %0, <8 x i8> %1)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i8> @__avg_down_int8(<8 x i8>, <8 x i8>) nounwind readnone {
|
||||
define <8 x i8> @__avg_down_int8(<8 x i8>, <8 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %0, <8 x i8> %1)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) nounwind readnone {
|
||||
define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %0, <8 x i16> %1)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @__avg_up_int16(<8 x i16>, <8 x i16>) nounwind readnone {
|
||||
define <8 x i16> @__avg_up_int16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %0, <8 x i16> %1)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @__avg_down_uint16(<8 x i16>, <8 x i16>) nounwind readnone {
|
||||
define <8 x i16> @__avg_down_uint16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %0, <8 x i16> %1)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {
|
||||
define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %0, <8 x i16> %1)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
@@ -43,12 +43,12 @@ include(`target-neon-common.ll')
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
define <4 x float> @__half_to_float_varying(<4 x i16> %v) nounwind readnone {
|
||||
define <4 x float> @__half_to_float_varying(<4 x i16> %v) nounwind readnone alwaysinline {
|
||||
%r = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %v)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x i16> @__float_to_half_varying(<4 x float> %v) nounwind readnone {
|
||||
define <4 x i16> @__float_to_half_varying(<4 x float> %v) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %v)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
@@ -106,13 +106,13 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
|
||||
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__max_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone {
|
||||
<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %0, <4 x float> %1)
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
|
||||
define <WIDTH x float> @__min_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone {
|
||||
<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %0, <4 x float> %1)
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
@@ -122,22 +122,22 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %r
|
||||
}
|
||||
@@ -147,7 +147,7 @@ define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwin
|
||||
declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone {
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
|
||||
%x0 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %d)
|
||||
%x0_nr = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %d, <4 x float> %x0)
|
||||
%x1 = fmul <4 x float> %x0, %x0_nr
|
||||
@@ -159,7 +159,7 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnon
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone {
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
|
||||
%x0 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %d)
|
||||
%x0_2 = fmul <4 x float> %x0, %x0
|
||||
%x0_nr = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %d, <4 x float> %x0_2)
|
||||
@@ -170,7 +170,7 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readn
|
||||
ret <4 x float> %x2
|
||||
}
|
||||
|
||||
define float @__rsqrt_uniform_float(float) nounwind readnone {
|
||||
define float @__rsqrt_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %0 to <1 x float>
|
||||
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
@@ -179,7 +179,7 @@ define float @__rsqrt_uniform_float(float) nounwind readnone {
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__rcp_uniform_float(float) nounwind readnone {
|
||||
define float @__rcp_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %0 to <1 x float>
|
||||
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
@@ -190,7 +190,7 @@ define float @__rcp_uniform_float(float) nounwind readnone {
|
||||
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone {
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
%result = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
|
||||
;; this returns nan for v=0, which is undesirable..
|
||||
;; %rsqrt = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %0)
|
||||
@@ -200,7 +200,7 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
||||
|
||||
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone {
|
||||
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %0)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
@@ -208,7 +208,7 @@ define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readno
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
define i64 @__movmsk(<4 x MASK>) nounwind readnone {
|
||||
define i64 @__movmsk(<4 x MASK>) nounwind readnone alwaysinline {
|
||||
%and_mask = and <4 x MASK> %0, <MASK 1, MASK 2, MASK 4, MASK 8>
|
||||
%v01 = shufflevector <4 x i32> %and_mask, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v23 = shufflevector <4 x i32> %and_mask, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
@@ -264,42 +264,42 @@ define(`neon_reduce', `
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @add_f32(float, float) {
|
||||
define internal float @add_f32(float, float) nounwind readnone alwaysinline {
|
||||
%r = fadd float %0, %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_add_float(<4 x float>) nounwind readnone {
|
||||
define float @__reduce_add_float(<4 x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @llvm.arm.neon.vpadd.v2f32, @add_f32)
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @min_f32(float, float) {
|
||||
define internal float @min_f32(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp olt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_min_float(<4 x float>) nounwind readnone {
|
||||
define float @__reduce_min_float(<4 x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @llvm.arm.neon.vpmins.v2f32, @min_f32)
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @max_f32(float, float) {
|
||||
define internal float @max_f32(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp ugt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_max_float(<4 x float>) nounwind readnone {
|
||||
define float @__reduce_max_float(<4 x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @llvm.arm.neon.vpmaxs.v2f32, @max_f32)
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
|
||||
|
||||
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
|
||||
define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone alwaysinline {
|
||||
%v8 = shufflevector <4 x i8> %0, <4 x i8> zeroinitializer,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
|
||||
%a16 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %v8)
|
||||
@@ -313,7 +313,7 @@ define i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
|
||||
|
||||
define i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
|
||||
define i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone alwaysinline {
|
||||
%a32 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %0)
|
||||
%a0 = extractelement <2 x i32> %a32, i32 0
|
||||
%a1 = extractelement <2 x i32> %a32, i32 1
|
||||
@@ -323,7 +323,7 @@ define i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
|
||||
|
||||
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
%a64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %0)
|
||||
%a0 = extractelement <2 x i64> %a64, i32 0
|
||||
%a1 = extractelement <2 x i64> %a64, i32 1
|
||||
@@ -333,53 +333,53 @@ define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @min_si32(i32, i32) {
|
||||
define internal i32 @min_si32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp slt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
|
||||
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @llvm.arm.neon.vpmins.v2i32, @min_si32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @max_si32(i32, i32) {
|
||||
define internal i32 @max_si32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp sgt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
|
||||
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @llvm.arm.neon.vpmaxs.v2i32, @max_si32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @min_ui32(i32, i32) {
|
||||
define internal i32 @min_ui32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ult i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
|
||||
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @llvm.arm.neon.vpmins.v2i32, @min_ui32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @max_ui32(i32, i32) {
|
||||
define internal i32 @max_ui32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ugt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
|
||||
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @llvm.arm.neon.vpmaxs.v2i32, @max_ui32)
|
||||
}
|
||||
|
||||
define double @__reduce_add_double(<4 x double>) nounwind readnone {
|
||||
define double @__reduce_add_double(<4 x double>) nounwind readnone alwaysinline {
|
||||
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
|
||||
<2 x i32> <i32 0, i32 1>
|
||||
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
|
||||
@@ -391,15 +391,15 @@ define double @__reduce_add_double(<4 x double>) nounwind readnone {
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define double @__reduce_min_double(<4 x double>) nounwind readnone {
|
||||
define double @__reduce_min_double(<4 x double>) nounwind readnone alwaysinline {
|
||||
reduce4(double, @__min_varying_double, @__min_uniform_double)
|
||||
}
|
||||
|
||||
define double @__reduce_max_double(<4 x double>) nounwind readnone {
|
||||
define double @__reduce_max_double(<4 x double>) nounwind readnone alwaysinline {
|
||||
reduce4(double, @__max_varying_double, @__max_uniform_double)
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
||||
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone alwaysinline {
|
||||
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
||||
<2 x i32> <i32 0, i32 1>
|
||||
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
||||
@@ -411,19 +411,19 @@ define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
|
||||
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone alwaysinline {
|
||||
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
|
||||
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone alwaysinline {
|
||||
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
|
||||
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone alwaysinline {
|
||||
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
||||
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone alwaysinline {
|
||||
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||
}
|
||||
|
||||
@@ -432,56 +432,56 @@ define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
||||
|
||||
declare <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i8> @__avg_up_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
|
||||
define <4 x i8> @__avg_up_uint8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i8> @__avg_up_int8(<4 x i8>, <4 x i8>) nounwind readnone {
|
||||
define <4 x i8> @__avg_up_int8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i8> @__avg_down_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
|
||||
define <4 x i8> @__avg_down_uint8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i8> @__avg_down_int8(<4 x i8>, <4 x i8>) nounwind readnone {
|
||||
define <4 x i8> @__avg_down_int8(<4 x i8>, <4 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i16> @__avg_up_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
|
||||
define <4 x i16> @__avg_up_uint16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i16> @__avg_up_int16(<4 x i16>, <4 x i16>) nounwind readnone {
|
||||
define <4 x i16> @__avg_up_int16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i16> @__avg_down_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
|
||||
define <4 x i16> @__avg_down_uint16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
|
||||
define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone alwaysinline {
|
||||
%r = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
@@ -42,12 +42,12 @@ include(`target-neon-common.ll')
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
|
||||
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone alwaysinline {
|
||||
unary4to16conv(r, i16, float, @llvm.arm.neon.vcvthf2fp, %v)
|
||||
ret <16 x float> %r
|
||||
}
|
||||
|
||||
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
|
||||
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone alwaysinline {
|
||||
unary4to16conv(r, float, i16, @llvm.arm.neon.vcvtfp2hf, %v)
|
||||
ret <16 x i16> %r
|
||||
}
|
||||
@@ -125,13 +125,13 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
|
||||
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__max_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone {
|
||||
<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
binary4to16(r, float, @llvm.arm.neon.vmaxs.v4f32, %0, %1)
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
|
||||
define <WIDTH x float> @__min_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone {
|
||||
<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
binary4to16(r, float, @llvm.arm.neon.vmins.v4f32, %0, %1)
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
@@ -141,22 +141,22 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to16(r, i32, @llvm.arm.neon.vmins.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to16(r, i32, @llvm.arm.neon.vmaxs.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to16(r, i32, @llvm.arm.neon.vminu.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone {
|
||||
define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
binary4to16(r, i32, @llvm.arm.neon.vmaxu.v4i32, %0, %1)
|
||||
ret <WIDTH x i32> %r
|
||||
}
|
||||
@@ -166,7 +166,7 @@ define <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwin
|
||||
declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone {
|
||||
define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
|
||||
unary4to16(x0, float, @llvm.arm.neon.vrecpe.v4f32, %d)
|
||||
binary4to16(x0_nr, float, @llvm.arm.neon.vrecps.v4f32, %d, %x0)
|
||||
%x1 = fmul <WIDTH x float> %x0, %x0_nr
|
||||
@@ -178,7 +178,7 @@ define <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %d) nounwind readnon
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone {
|
||||
define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readnone alwaysinline {
|
||||
unary4to16(x0, float, @llvm.arm.neon.vrsqrte.v4f32, %d)
|
||||
%x0_2 = fmul <WIDTH x float> %x0, %x0
|
||||
binary4to16(x0_nr, float, @llvm.arm.neon.vrsqrts.v4f32, %d, %x0_2)
|
||||
@@ -189,7 +189,7 @@ define <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %d) nounwind readn
|
||||
ret <WIDTH x float> %x2
|
||||
}
|
||||
|
||||
define float @__rsqrt_uniform_float(float) nounwind readnone {
|
||||
define float @__rsqrt_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %0 to <1 x float>
|
||||
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<16 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
@@ -201,7 +201,7 @@ define float @__rsqrt_uniform_float(float) nounwind readnone {
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__rcp_uniform_float(float) nounwind readnone {
|
||||
define float @__rcp_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %0 to <1 x float>
|
||||
%vs = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<16 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
@@ -215,7 +215,7 @@ define float @__rcp_uniform_float(float) nounwind readnone {
|
||||
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone {
|
||||
define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
unary4to16(result, float, @llvm.sqrt.v4f32, %0)
|
||||
;; this returns nan for v=0, which is undesirable..
|
||||
;; %rsqrt = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %0)
|
||||
@@ -225,7 +225,7 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
||||
|
||||
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone {
|
||||
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
unary4to16(r, double, @llvm.sqrt.v4f64, %0)
|
||||
ret <WIDTH x double> %r
|
||||
}
|
||||
@@ -233,7 +233,7 @@ define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readno
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone {
|
||||
define i64 @__movmsk(<WIDTH x MASK>) nounwind readnone alwaysinline {
|
||||
%and_mask = and <WIDTH x i8> %0,
|
||||
<i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128,
|
||||
i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128>
|
||||
@@ -327,41 +327,41 @@ define(`neon_reduce', `
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @add_f32(float, float) {
|
||||
define internal float @add_f32(float, float) nounwind readnone alwaysinline {
|
||||
%r = fadd float %0, %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) {
|
||||
define internal <WIDTH x float> @__add_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone alwaysinline {
|
||||
%r = fadd <WIDTH x float> %0, %1
|
||||
ret <WIDTH x float> %r
|
||||
}
|
||||
|
||||
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone {
|
||||
define float @__reduce_add_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @__add_varying_float, @llvm.arm.neon.vpadd.v2f32, @add_f32)
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @min_f32(float, float) {
|
||||
define internal float @min_f32(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp olt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone {
|
||||
define float @__reduce_min_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @__min_varying_float, @llvm.arm.neon.vpmins.v2f32, @min_f32)
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
define internal float @max_f32(float, float) {
|
||||
define internal float @max_f32(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp ugt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone {
|
||||
define float @__reduce_max_float(<WIDTH x float>) nounwind readnone alwaysinline {
|
||||
neon_reduce(float, @__max_varying_float, @llvm.arm.neon.vpmaxs.v2f32, @max_f32)
|
||||
}
|
||||
|
||||
@@ -369,7 +369,7 @@ declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnon
|
||||
declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
|
||||
declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
|
||||
|
||||
define i64 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
|
||||
define i64 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone alwaysinline {
|
||||
%a16 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %0)
|
||||
%a32 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a16)
|
||||
%a64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a32)
|
||||
@@ -379,7 +379,7 @@ define i64 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone {
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
|
||||
define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone alwaysinline {
|
||||
v16tov8(i16, %0, %va, %vb)
|
||||
%a32 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %va)
|
||||
%b32 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %vb)
|
||||
@@ -392,7 +392,7 @@ define i64 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone {
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
v16tov4(i32, %0, %va, %vb, %vc, %vd)
|
||||
%a64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %va)
|
||||
%b64 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %vb)
|
||||
@@ -409,101 +409,101 @@ define i64 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone {
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @min_si32(i32, i32) {
|
||||
define internal i32 @min_si32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp slt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__min_varying_int32, @llvm.arm.neon.vpmins.v2i32, @min_si32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @max_si32(i32, i32) {
|
||||
define internal i32 @max_si32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp sgt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__max_varying_int32, @llvm.arm.neon.vpmaxs.v2i32, @max_si32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @min_ui32(i32, i32) {
|
||||
define internal i32 @min_ui32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ult i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__min_varying_uint32, @llvm.arm.neon.vpmins.v2i32, @min_ui32)
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define internal i32 @max_ui32(i32, i32) {
|
||||
define internal i32 @max_ui32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ugt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone {
|
||||
define i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone alwaysinline {
|
||||
neon_reduce(i32, @__max_varying_uint32, @llvm.arm.neon.vpmaxs.v2i32, @max_ui32)
|
||||
}
|
||||
|
||||
define internal double @__add_uniform_double(double, double) {
|
||||
define internal double @__add_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
%r = fadd double %0, %1
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define internal <WIDTH x double> @__add_varying_double(<WIDTH x double>, <WIDTH x double>) {
|
||||
define internal <WIDTH x double> @__add_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone alwaysinline {
|
||||
%r = fadd <WIDTH x double> %0, %1
|
||||
ret <WIDTH x double> %r
|
||||
}
|
||||
|
||||
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone {
|
||||
define double @__reduce_add_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
reduce16(double, @__add_varying_double, @__add_uniform_double)
|
||||
}
|
||||
|
||||
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone {
|
||||
define double @__reduce_min_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
reduce16(double, @__min_varying_double, @__min_uniform_double)
|
||||
}
|
||||
|
||||
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone {
|
||||
define double @__reduce_max_double(<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
reduce16(double, @__max_varying_double, @__max_uniform_double)
|
||||
}
|
||||
|
||||
define internal i64 @__add_uniform_int64(i64, i64) {
|
||||
define internal i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||
%r = add i64 %0, %1
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define internal <WIDTH x i64> @__add_varying_int64(<WIDTH x i64>, <WIDTH x i64>) {
|
||||
define internal <WIDTH x i64> @__add_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
%r = add <WIDTH x i64> %0, %1
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce16(i64, @__add_varying_int64, @__add_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce16(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce16(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce16(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
|
||||
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
reduce16(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||
}
|
||||
|
||||
@@ -512,35 +512,35 @@ define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <16 x i8> @__avg_up_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
|
||||
define <16 x i8> @__avg_up_uint8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %0, <16 x i8> %1)
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <16 x i8> @__avg_up_int8(<16 x i8>, <16 x i8>) nounwind readnone {
|
||||
define <16 x i8> @__avg_up_int8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %0, <16 x i8> %1)
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <16 x i8> @__avg_down_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
|
||||
define <16 x i8> @__avg_down_uint8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %0, <16 x i8> %1)
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <16 x i8> @__avg_down_int8(<16 x i8>, <16 x i8>) nounwind readnone {
|
||||
define <16 x i8> @__avg_down_int8(<16 x i8>, <16 x i8>) nounwind readnone alwaysinline {
|
||||
%r = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %0, <16 x i8> %1)
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
|
||||
v16tov8(i16, %0, %a0, %b0)
|
||||
v16tov8(i16, %1, %a1, %b1)
|
||||
%r0 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
||||
@@ -551,7 +551,7 @@ define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
|
||||
v16tov8(i16, %0, %a0, %b0)
|
||||
v16tov8(i16, %1, %a1, %b1)
|
||||
%r0 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
||||
@@ -562,7 +562,7 @@ define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
|
||||
v16tov8(i16, %0, %a0, %b0)
|
||||
v16tov8(i16, %1, %a1, %b1)
|
||||
%r0 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
||||
@@ -573,7 +573,7 @@ define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone alwaysinline {
|
||||
v16tov8(i16, %0, %a0, %b0)
|
||||
v16tov8(i16, %1, %a1, %b1)
|
||||
%r0 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a0, <8 x i16> %a1)
|
||||
|
||||
@@ -49,7 +49,7 @@ ctlztz()
|
||||
declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vec = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
||||
@@ -58,7 +58,7 @@ define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone alwaysinline {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vec = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
||||
@@ -70,7 +70,14 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; math
|
||||
|
||||
define void @__fastmath() nounwind {
|
||||
declare i32 @llvm.arm.get.fpscr() nounwind
|
||||
declare void @llvm.arm.set.fpscr(i32) nounwind
|
||||
|
||||
define void @__fastmath() nounwind alwaysinline {
|
||||
%x = call i32 @llvm.arm.get.fpscr()
|
||||
; Turn on FTZ (bit 24) and default NaN (bit 25)
|
||||
%y = or i32 %x, 50331648
|
||||
call void @llvm.arm.set.fpscr(i32 %y)
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -120,111 +127,111 @@ declare double @__ceil_uniform_double(double) nounwind readnone
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; min/max
|
||||
|
||||
define float @__max_uniform_float(float, float) nounwind readnone {
|
||||
define float @__max_uniform_float(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp ugt float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__min_uniform_float(float, float) nounwind readnone {
|
||||
define float @__min_uniform_float(float, float) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp ult float %0, %1
|
||||
%r = select i1 %cmp, float %0, float %1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i32 @__min_uniform_int32(i32, i32) nounwind readnone {
|
||||
define i32 @__min_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp slt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__max_uniform_int32(i32, i32) nounwind readnone {
|
||||
define i32 @__max_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp sgt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__min_uniform_uint32(i32, i32) nounwind readnone {
|
||||
define i32 @__min_uniform_uint32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ult i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__max_uniform_uint32(i32, i32) nounwind readnone {
|
||||
define i32 @__max_uniform_uint32(i32, i32) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ugt i32 %0, %1
|
||||
%r = select i1 %cmp, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i64 @__min_uniform_int64(i64, i64) nounwind readnone {
|
||||
define i64 @__min_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||
%cmp = icmp slt i64 %0, %1
|
||||
%r = select i1 %cmp, i64 %0, i64 %1
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @__max_uniform_int64(i64, i64) nounwind readnone {
|
||||
define i64 @__max_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||
%cmp = icmp sgt i64 %0, %1
|
||||
%r = select i1 %cmp, i64 %0, i64 %1
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @__min_uniform_uint64(i64, i64) nounwind readnone {
|
||||
define i64 @__min_uniform_uint64(i64, i64) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ult i64 %0, %1
|
||||
%r = select i1 %cmp, i64 %0, i64 %1
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @__max_uniform_uint64(i64, i64) nounwind readnone {
|
||||
define i64 @__max_uniform_uint64(i64, i64) nounwind readnone alwaysinline {
|
||||
%cmp = icmp ugt i64 %0, %1
|
||||
%r = select i1 %cmp, i64 %0, i64 %1
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define double @__min_uniform_double(double, double) nounwind readnone {
|
||||
define double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp olt double %0, %1
|
||||
%r = select i1 %cmp, double %0, double %1
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @__max_uniform_double(double, double) nounwind readnone {
|
||||
define double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
%cmp = fcmp ogt double %0, %1
|
||||
%r = select i1 %cmp, double %0, double %1
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
|
||||
define <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
%m = icmp slt <WIDTH x i64> %0, %1
|
||||
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
|
||||
define <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
%m = icmp sgt <WIDTH x i64> %0, %1
|
||||
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
|
||||
define <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
%m = icmp ult <WIDTH x i64> %0, %1
|
||||
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone {
|
||||
define <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone alwaysinline {
|
||||
%m = icmp ugt <WIDTH x i64> %0, %1
|
||||
%r = select <WIDTH x i1> %m, <WIDTH x i64> %0, <WIDTH x i64> %1
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define <WIDTH x double> @__min_varying_double(<WIDTH x double>,
|
||||
<WIDTH x double>) nounwind readnone {
|
||||
<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
%m = fcmp olt <WIDTH x double> %0, %1
|
||||
%r = select <WIDTH x i1> %m, <WIDTH x double> %0, <WIDTH x double> %1
|
||||
ret <WIDTH x double> %r
|
||||
}
|
||||
|
||||
define <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
||||
<WIDTH x double>) nounwind readnone {
|
||||
<WIDTH x double>) nounwind readnone alwaysinline {
|
||||
%m = fcmp ogt <WIDTH x double> %0, %1
|
||||
%r = select <WIDTH x i1> %m, <WIDTH x double> %0, <WIDTH x double> %1
|
||||
ret <WIDTH x double> %r
|
||||
@@ -234,14 +241,14 @@ define <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
||||
|
||||
declare float @llvm.sqrt.f32(float)
|
||||
|
||||
define float @__sqrt_uniform_float(float) nounwind readnone {
|
||||
define float @__sqrt_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%r = call float @llvm.sqrt.f32(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare double @llvm.sqrt.f64(double)
|
||||
|
||||
define double @__sqrt_uniform_double(double) nounwind readnone {
|
||||
define double @__sqrt_uniform_double(double) nounwind readnone alwaysinline {
|
||||
%r = call double @llvm.sqrt.f64(double %0)
|
||||
ret double %r
|
||||
}
|
||||
@@ -251,12 +258,12 @@ define double @__sqrt_uniform_double(double) nounwind readnone {
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
|
||||
define i32 @__popcnt_int32(i32) nounwind readnone {
|
||||
define i32 @__popcnt_int32(i32) nounwind readnone alwaysinline {
|
||||
%v = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
define i64 @__popcnt_int64(i64) nounwind readnone {
|
||||
define i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
|
||||
%v = call i64 @llvm.ctpop.i64(i64 %0)
|
||||
ret i64 %v
|
||||
}
|
||||
|
||||
@@ -35,6 +35,10 @@ define(`WIDTH',`16')
|
||||
ifelse(LLVM_VERSION, LLVM_3_8,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
`include(`target-avx512-common.ll')'
|
||||
)
|
||||
|
||||
@@ -80,6 +84,10 @@ define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly al
|
||||
ifelse(LLVM_VERSION, LLVM_3_8,
|
||||
rcp_rsqrt_varying_float_skx(),
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
rcp_rsqrt_varying_float_skx(),
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
rcp_rsqrt_varying_float_skx(),
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
rcp_rsqrt_varying_float_skx()
|
||||
)
|
||||
|
||||
|
||||
@@ -54,9 +54,13 @@ define(`MASK_HIGH_BIT_ON',
|
||||
define(`PTR_OP_ARGS',
|
||||
ifelse(LLVM_VERSION, LLVM_3_7,
|
||||
``$1 , $1 *'',
|
||||
ifelse(LLVM_VERSION, LLVM_3_8,
|
||||
LLVM_VERSION, LLVM_3_8,
|
||||
``$1 , $1 *'',
|
||||
ifelse(LLVM_VERSION, LLVM_3_9,
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
``$1 , $1 *'',
|
||||
``$1 *''
|
||||
)
|
||||
|
||||
@@ -57,6 +57,10 @@ define(`PTR_OP_ARGS',
|
||||
LLVM_VERSION, LLVM_3_8,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
``$1 , $1 *'',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
``$1 , $1 *'',
|
||||
``$1 *''
|
||||
)
|
||||
@@ -69,6 +73,10 @@ define(`MdORi64',
|
||||
``i64'',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
``i64'',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
``i64'',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
``i64'',
|
||||
``double''
|
||||
)
|
||||
)
|
||||
@@ -78,6 +86,10 @@ define(`MfORi32',
|
||||
``i32'',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
``i32'',
|
||||
LLVM_VERSION, LLVM_4_0,
|
||||
``i32'',
|
||||
LLVM_VERSION, LLVM_5_0,
|
||||
``i32'',
|
||||
``float''
|
||||
)
|
||||
)
|
||||
@@ -1586,6 +1598,12 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||
',LLVM_VERSION,LLVM_3_9,`
|
||||
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
||||
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
|
||||
',LLVM_VERSION,LLVM_4_0,`
|
||||
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
||||
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
|
||||
',LLVM_VERSION,LLVM_5_0,`
|
||||
%r_LANE_ID_t = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
|
||||
%r_LANE_ID = extractvalue { $2, i1 } %r_LANE_ID_t, 0
|
||||
',`
|
||||
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
|
||||
')
|
||||
@@ -1614,6 +1632,12 @@ define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
||||
',LLVM_VERSION,LLVM_3_9,`
|
||||
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
||||
%r = extractvalue { $2, i1 } %r_t, 0
|
||||
',LLVM_VERSION,LLVM_4_0,`
|
||||
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
||||
%r = extractvalue { $2, i1 } %r_t, 0
|
||||
',LLVM_VERSION,LLVM_5_0,`
|
||||
%r_t = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
|
||||
%r = extractvalue { $2, i1 } %r_t, 0
|
||||
',`
|
||||
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
|
||||
')
|
||||
|
||||
189
cbackend.cpp
189
cbackend.cpp
@@ -133,8 +133,8 @@
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// This part of code was in LLVM's ConstantsScanner.h,
|
||||
// but it was removed in revision #232397
|
||||
// This part of code was in LLVM's ConstantsScanner.h,
|
||||
// but it was removed in revision #232397
|
||||
|
||||
namespace constant_scanner {
|
||||
class constant_iterator : public std::iterator<std::forward_iterator_tag,
|
||||
@@ -381,8 +381,8 @@ namespace {
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm::ArrayType*> &t,
|
||||
std::vector<llvm::IntegerType*> &i, std::vector<bool> &IsVolatile,
|
||||
static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm::ArrayType*> &t,
|
||||
std::vector<llvm::IntegerType*> &i, std::vector<bool> &IsVolatile,
|
||||
std::vector<int> &Alignment) {
|
||||
TypeFinder(t, i, IsVolatile, Alignment).run(*m);
|
||||
}
|
||||
@@ -390,7 +390,7 @@ static void findUsedArrayAndLongIntTypes(const llvm::Module *m, std::vector<llvm
|
||||
|
||||
static bool is_vec16_i64_ty(llvm::Type *Ty) {
|
||||
llvm::VectorType *VTy = llvm::dyn_cast<llvm::VectorType>(Ty);
|
||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||
if ((VTy != NULL) && (VTy->getElementType()->isIntegerTy()) &&
|
||||
VTy->getElementType()->getPrimitiveSizeInBits() == 64)
|
||||
return true;
|
||||
return false;
|
||||
@@ -462,7 +462,11 @@ namespace {
|
||||
VectorConstantIndex = 0;
|
||||
}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
|
||||
virtual const char *getPassName() const { return "C backend"; }
|
||||
#else // LLVM 4.0+
|
||||
virtual llvm::StringRef getPassName() const { return "C backend"; }
|
||||
#endif
|
||||
|
||||
void getAnalysisUsage(llvm::AnalysisUsage &AU) const {
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 // <= 3.6
|
||||
@@ -525,8 +529,10 @@ namespace {
|
||||
bool IgnoreName = false,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
const llvm::AttrListPtr &PAL = llvm::AttrListPtr()
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
const llvm::AttributeSet &PAL = llvm::AttributeSet()
|
||||
#else // LLVM 5.0+
|
||||
const llvm::AttributeList &PAL = llvm::AttributeList()
|
||||
#endif
|
||||
);
|
||||
llvm::raw_ostream &printSimpleType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
@@ -536,8 +542,10 @@ namespace {
|
||||
void printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
const llvm::AttrListPtr &PAL,
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
const llvm::AttributeSet &PAL,
|
||||
#else // LLVM 5.0+
|
||||
const llvm::AttributeList &PAL,
|
||||
#endif
|
||||
llvm::PointerType *Ty);
|
||||
|
||||
@@ -782,8 +790,10 @@ std::string CWriter::getArrayName(llvm::ArrayType *AT) {
|
||||
void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
const llvm::AttrListPtr &PAL,
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
const llvm::AttributeSet &PAL,
|
||||
#else // LLVM 5.0+
|
||||
const llvm::AttributeList &PAL,
|
||||
#endif
|
||||
llvm::PointerType *TheTy) {
|
||||
llvm::FunctionType *FTy = llvm::cast<llvm::FunctionType>(TheTy->getElementType());
|
||||
@@ -801,8 +811,10 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
|
||||
llvm::Type *ArgTy = *I;
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#else // LLVM 5.0+
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#endif
|
||||
assert(ArgTy->isPointerTy());
|
||||
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
|
||||
@@ -810,8 +822,10 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
|
||||
printType(FunctionInnards, ArgTy,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt),
|
||||
#endif
|
||||
"");
|
||||
PrintedType = true;
|
||||
@@ -827,8 +841,10 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out,
|
||||
printType(Out, RetTy,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::SExt),
|
||||
#endif
|
||||
FunctionInnards.str());
|
||||
}
|
||||
@@ -925,8 +941,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
bool IgnoreName,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
const llvm::AttrListPtr &PAL
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
const llvm::AttributeSet &PAL
|
||||
#else // LLVM 5.0+
|
||||
const llvm::AttributeList &PAL
|
||||
#endif
|
||||
) {
|
||||
|
||||
@@ -947,8 +965,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
llvm::Type *ArgTy = *I;
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#else // LLVM 5.0+
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#endif
|
||||
assert(ArgTy->isPointerTy());
|
||||
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
|
||||
@@ -958,8 +978,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
printType(FunctionInnards, ArgTy,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt),
|
||||
#endif
|
||||
"");
|
||||
++Idx;
|
||||
@@ -975,8 +997,10 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
printType(Out, FTy->getReturnType(),
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::SExt),
|
||||
#endif
|
||||
FunctionInnards.str());
|
||||
return Out;
|
||||
@@ -1087,7 +1111,7 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty,
|
||||
|
||||
void CWriter::printConstantArray(llvm::ConstantArray *CPA, bool Static) {
|
||||
// vec16_i64 should be handled separately
|
||||
|
||||
|
||||
if (is_vec16_i64_ty(CPA->getOperand(0)->getType())) {
|
||||
Out << "/* vec16_i64 should be loaded carefully on knc */";
|
||||
Out << "\n#if defined(KNC)\n";
|
||||
@@ -1180,6 +1204,7 @@ void CWriter::printConstantDataSequential(llvm::ConstantDataSequential *CDS,
|
||||
|
||||
static inline std::string ftostr(const llvm::APFloat& V) {
|
||||
std::string Buf;
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
if (&V.getSemantics() == &llvm::APFloat::IEEEdouble) {
|
||||
llvm::raw_string_ostream(Buf) << V.convertToDouble();
|
||||
return Buf;
|
||||
@@ -1187,6 +1212,15 @@ static inline std::string ftostr(const llvm::APFloat& V) {
|
||||
llvm::raw_string_ostream(Buf) << (double)V.convertToFloat();
|
||||
return Buf;
|
||||
}
|
||||
#else // LLVM 4.0+
|
||||
if (&V.getSemantics() == &llvm::APFloat::IEEEdouble()) {
|
||||
llvm::raw_string_ostream(Buf) << V.convertToDouble();
|
||||
return Buf;
|
||||
} else if (&V.getSemantics() == &llvm::APFloat::IEEEsingle()) {
|
||||
llvm::raw_string_ostream(Buf) << (double)V.convertToFloat();
|
||||
return Buf;
|
||||
}
|
||||
#endif
|
||||
return "<unknown format in ftostr>"; // error
|
||||
}
|
||||
|
||||
@@ -1206,7 +1240,11 @@ static bool isFPCSafeToPrint(const llvm::ConstantFP *CFP) {
|
||||
return false;
|
||||
llvm::APFloat APF = llvm::APFloat(CFP->getValueAPF()); // copy
|
||||
if (CFP->getType() == llvm::Type::getFloatTy(CFP->getContext()))
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
|
||||
APF.convert(llvm::APFloat::IEEEdouble, llvm::APFloat::rmNearestTiesToEven, &ignored);
|
||||
#else // LLVM 4.0+
|
||||
APF.convert(llvm::APFloat::IEEEdouble(), llvm::APFloat::rmNearestTiesToEven, &ignored);
|
||||
#endif
|
||||
#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
|
||||
char Buffer[100];
|
||||
sprintf(Buffer, "%a", APF.convertToDouble());
|
||||
@@ -1637,7 +1675,11 @@ void CWriter::printConstant(llvm::Constant *CPV, bool Static) {
|
||||
// useful.
|
||||
llvm::APFloat Tmp = FPC->getValueAPF();
|
||||
bool LosesInfo;
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
|
||||
Tmp.convert(llvm::APFloat::IEEEdouble, llvm::APFloat::rmTowardZero, &LosesInfo);
|
||||
#else // LLVM 4.0+
|
||||
Tmp.convert(llvm::APFloat::IEEEdouble(), llvm::APFloat::rmTowardZero, &LosesInfo);
|
||||
#endif
|
||||
V = Tmp.convertToDouble();
|
||||
}
|
||||
|
||||
@@ -1819,11 +1861,11 @@ void CWriter::printConstant(llvm::Constant *CPV, bool Static) {
|
||||
// when generating code for knl-generic in multitarget mode.
|
||||
// Short vectors are mapped to "native" vectors and cause AVX-512 code
|
||||
// generation in static block initialization (__vec16_* in ::init function).
|
||||
bool isGenericKNL = g->target->getISA() == Target::GENERIC &&
|
||||
bool isGenericKNL = g->target->getISA() == Target::GENERIC &&
|
||||
!g->target->getTreatGenericAsSmth().empty() &&
|
||||
g->mangleFunctionsWithTarget;
|
||||
if (isGenericKNL && CPV->getOperand(0)->getType()->isVectorTy())
|
||||
llvm::report_fatal_error("knl-generic-* target doesn's support short vectors");
|
||||
if (isGenericKNL && CPV->getOperand(0)->getType()->isVectorTy())
|
||||
llvm::report_fatal_error("knl-generic-* target doesn's support short vectors");
|
||||
Out << ' ';
|
||||
printConstant(llvm::cast<llvm::Constant>(CPV->getOperand(0)), Static);
|
||||
for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
|
||||
@@ -2024,7 +2066,7 @@ void CWriter::writeInstComputationInline(llvm::Instruction &I) {
|
||||
|
||||
if (NeedBoolTrunc)
|
||||
Out << "((";
|
||||
|
||||
|
||||
visit(I);
|
||||
|
||||
if (NeedBoolTrunc)
|
||||
@@ -2798,7 +2840,7 @@ void CWriter::printFloatingPointConstants(llvm::Function &F) {
|
||||
// the precision of the printed form, unless the printed form preserves
|
||||
// precision.
|
||||
//
|
||||
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
|
||||
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
|
||||
E = constant_scanner::constant_end(&F); I != E; ++I)
|
||||
printFloatingPointConstants(*I);
|
||||
|
||||
@@ -2865,7 +2907,7 @@ void CWriter::printFloatingPointConstants(const llvm::Constant *C) {
|
||||
// loads to get their values, rather than tediously inserting the
|
||||
// individual values into the vector.
|
||||
void CWriter::printVectorConstants(llvm::Function &F) {
|
||||
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
|
||||
for (constant_scanner::constant_iterator I = constant_scanner::constant_begin(&F),
|
||||
E = constant_scanner::constant_end(&F); I != E; ++I) {
|
||||
const llvm::ConstantDataVector *CDV = llvm::dyn_cast<llvm::ConstantDataVector>(*I);
|
||||
if (CDV == NULL)
|
||||
@@ -3017,7 +3059,7 @@ void CWriter::printModuleTypes() {
|
||||
Out << " struct " << Name << ";\n";
|
||||
}
|
||||
Out << "};\n";
|
||||
|
||||
|
||||
for (unsigned i = 0, e = IntegerTypes.size(); i != e; ++i) {
|
||||
llvm::IntegerType *IT = IntegerTypes[i];
|
||||
if (IT->getIntegerBitWidth() <= 64 || Alignment[i] == 0)
|
||||
@@ -3142,8 +3184,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
|
||||
llvm::FunctionType *FT = llvm::cast<llvm::FunctionType>(F->getFunctionType());
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
const llvm::AttrListPtr &PAL = F->getAttributes();
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
const llvm::AttributeSet &PAL = F->getAttributes();
|
||||
#else // LLVM 5.0+
|
||||
const llvm::AttributeList &PAL = F->getAttributes();
|
||||
#endif
|
||||
|
||||
std::string tstr;
|
||||
@@ -3180,8 +3224,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
|
||||
llvm::Type *ArgTy = I->getType();
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#else // LLVM 5.0+
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#endif
|
||||
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */
|
||||
@@ -3193,8 +3239,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
|
||||
printType(FunctionInnards, ArgTy,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt),
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt),
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt),
|
||||
#endif
|
||||
ArgName);
|
||||
PrintedArg = true;
|
||||
@@ -3219,8 +3267,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
|
||||
llvm::Type *ArgTy = *I;
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::ByVal)) {
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#else // LLVM 5.0+
|
||||
if (PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ByVal)) {
|
||||
#endif
|
||||
assert(ArgTy->isPointerTy());
|
||||
ArgTy = llvm::cast<llvm::PointerType>(ArgTy)->getElementType();
|
||||
@@ -3228,8 +3278,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
|
||||
printType(FunctionInnards, ArgTy,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::Attributes::SExt)
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt)
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(Idx).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt)
|
||||
#endif
|
||||
);
|
||||
PrintedArg = true;
|
||||
@@ -3265,8 +3317,10 @@ void CWriter::printFunctionSignature(const llvm::Function *F, bool Prototype) {
|
||||
printType(Out, RetTy,
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::Attributes::SExt),
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeSet::ReturnIndex, llvm::Attribute::SExt),
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(0).hasAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::SExt),
|
||||
#endif
|
||||
FunctionInnards.str());
|
||||
}
|
||||
@@ -4061,17 +4115,17 @@ void CWriter::printIntrinsicDefinition(const llvm::Function &F, llvm::raw_ostrea
|
||||
|
||||
printType(Out, retT);
|
||||
Out << "r;\n";
|
||||
|
||||
|
||||
unsigned NumBits = llvm::cast<llvm::IntegerType>(elemT)->getBitWidth();
|
||||
std::stringstream str_type;
|
||||
if (NumBits <= 32)
|
||||
if (NumBits <= 32)
|
||||
str_type << "uint" << 2 * NumBits << "_t";
|
||||
else {
|
||||
assert(NumBits <= 64 && "Bit widths > 128 not implemented yet");
|
||||
str_type << "llvmUInt128";
|
||||
}
|
||||
|
||||
Out << " " << str_type.str() << " result = (" << str_type.str() << ") a * (" << str_type.str() << ") b;\n";
|
||||
Out << " " << str_type.str() << " result = (" << str_type.str() << ") a * (" << str_type.str() << ") b;\n";
|
||||
Out << " r.field0 = result;\n";
|
||||
Out << " r.field1 = result >> " << NumBits << ";\n";
|
||||
Out << " return r;\n}\n";
|
||||
@@ -4201,8 +4255,10 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
|
||||
// parameter instead of passing it to the call.
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
const llvm::AttrListPtr &PAL = I.getAttributes();
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
const llvm::AttributeSet &PAL = I.getAttributes();
|
||||
#else // LLVM 5.0+
|
||||
const llvm::AttributeList &PAL = I.getAttributes();
|
||||
#endif
|
||||
|
||||
bool hasByVal = I.hasByValArgument();
|
||||
@@ -4241,7 +4297,7 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
|
||||
if (Callee->getName() == "malloc" ||
|
||||
Callee->getName() == "_aligned_malloc")
|
||||
Out << "(uint8_t *)";
|
||||
|
||||
|
||||
// This 'if' will fix 'soa-18.ispc' test (fails with optimizations off)
|
||||
// Yet the way the case is fixed is quite dirty and leads to many other fails
|
||||
|
||||
@@ -4302,7 +4358,7 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
|
||||
|
||||
for (; AI != AE; ++AI, ++ArgNo) {
|
||||
if (PrintedArg) Out << ", ";
|
||||
if (ArgNo == 0 &&
|
||||
if (ArgNo == 0 &&
|
||||
Callee->getName() == "posix_memalign") {
|
||||
// uint8_t** is incompatible with void** without explicit cast.
|
||||
// Should be do this any other functions?
|
||||
@@ -4314,8 +4370,10 @@ void CWriter::visitCallInst(llvm::CallInst &I) {
|
||||
printType(Out, FTy->getParamType(ArgNo),
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
PAL.getParamAttributes(ArgNo+1).hasAttribute(llvm::Attributes::SExt)
|
||||
#else /* LLVM 3.3+ */
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
PAL.getParamAttributes(ArgNo+1).hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::SExt)
|
||||
#else // LLVM 5.0+
|
||||
PAL.getParamAttributes(ArgNo+1).hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::SExt)
|
||||
#endif
|
||||
);
|
||||
Out << ')';
|
||||
@@ -4377,7 +4435,7 @@ bool CWriter::visitBuiltinCall(llvm::CallInst &I, llvm::Intrinsic::ID ID,
|
||||
if (I.getParent()->getParent()->arg_empty())
|
||||
Out << "vararg_dummy_arg";
|
||||
else
|
||||
writeOperand(&*(--I.getParent()->getParent()->arg_end()));
|
||||
writeOperand(&*(std::prev(I.getParent()->getParent()->arg_end())));
|
||||
Out << ')';
|
||||
return true;
|
||||
case llvm::Intrinsic::vaend:
|
||||
@@ -4552,7 +4610,11 @@ void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I,
|
||||
llvm::VectorType *LastIndexIsVector = 0;
|
||||
{
|
||||
for (llvm::gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
LastIndexIsVector = llvm::dyn_cast<llvm::VectorType>(*TmpI);
|
||||
#else // LLVM 4.0+
|
||||
LastIndexIsVector = llvm::dyn_cast<llvm::VectorType>(TmpI.getIndexedType());
|
||||
#endif
|
||||
}
|
||||
|
||||
Out << "(";
|
||||
@@ -4581,7 +4643,11 @@ void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I,
|
||||
// exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
|
||||
if (isAddressExposed(Ptr)) {
|
||||
writeOperandInternal(Ptr, Static);
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
} else if (I != E && (*I)->isStructTy()) {
|
||||
#else // LLVM 4.0+
|
||||
} else if (I != E && I.isStruct()) {
|
||||
#endif
|
||||
// If we didn't already emit the first operand, see if we can print it as
|
||||
// P->f instead of "P[0].f"
|
||||
writeOperand(Ptr);
|
||||
@@ -4596,13 +4662,18 @@ void CWriter::printGEPExpression(llvm::Value *Ptr, llvm::gep_type_iterator I,
|
||||
}
|
||||
|
||||
for (; I != E; ++I) {
|
||||
if ((*I)->isStructTy()) {
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
llvm::Type *type = *I;
|
||||
#else // LLVM 4.0+
|
||||
llvm::Type *type = I.getIndexedType();
|
||||
#endif
|
||||
if (type->isStructTy()) {
|
||||
Out << ".field" << llvm::cast<llvm::ConstantInt>(I.getOperand())->getZExtValue();
|
||||
} else if ((*I)->isArrayTy()) {
|
||||
} else if (type->isArrayTy()) {
|
||||
Out << ".array[";
|
||||
writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
|
||||
Out << ']';
|
||||
} else if (!(*I)->isVectorTy()) {
|
||||
} else if (!type->isVectorTy()) {
|
||||
Out << '[';
|
||||
writeOperandWithCast(I.getOperand(), llvm::Instruction::GetElementPtr);
|
||||
Out << ']';
|
||||
@@ -4633,7 +4704,7 @@ void CWriter::writeMemoryAccess(llvm::Value *Operand, llvm::Type *OperandType,
|
||||
Out << '*';
|
||||
if (IsVolatile || IsUnaligned) {
|
||||
Out << "((";
|
||||
if (IsUnaligned && ITy && (ITy->getBitWidth() > 64))
|
||||
if (IsUnaligned && ITy && (ITy->getBitWidth() > 64))
|
||||
Out << "iN_" << ITy->getBitWidth() << "_align_" << Alignment << " *)";
|
||||
else {
|
||||
if (IsUnaligned)
|
||||
@@ -4798,7 +4869,7 @@ void CWriter::visitShuffleVectorInst(llvm::ShuffleVectorInst &SVI) {
|
||||
printType(Out, llvm::PointerType::getUnqual(EltTy));
|
||||
Out << ")(&" << GetValueName(Op)
|
||||
<< "))[" << SrcVal << "]";
|
||||
Out << " \n#endif \n";
|
||||
Out << " \n#endif \n";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4901,7 +4972,11 @@ public:
|
||||
SmearCleanupPass(llvm::Module *m, int width)
|
||||
: BasicBlockPass(ID) { module = m; vectorWidth = width; }
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
|
||||
const char *getPassName() const { return "Smear Cleanup Pass"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Smear Cleanup Pass"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
static char ID;
|
||||
@@ -4989,7 +5064,7 @@ SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const {
|
||||
llvm::dyn_cast<llvm::Constant>(shuffleInst->getOperand(2));
|
||||
|
||||
// Check that the shuffle is a broadcast of the element of the first vector,
|
||||
// i.e. mask vector is vector with equal elements of expected size.
|
||||
// i.e. mask vector is vector with equal elements of expected size.
|
||||
if (!(mask &&
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
(mask->isNullValue() || (shuffleInst->getMask()->getType()->isVectorTy() && llvm::dyn_cast<llvm::ConstantVector>(shuffleInst->getMask())->getSplatValue() != 0 ) ) &&
|
||||
@@ -5014,34 +5089,34 @@ SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const {
|
||||
if (operandVec && operandVec->getNumElements() == 1)
|
||||
return NULL;
|
||||
|
||||
// Insert ExtractElementInstr to get value for smear
|
||||
// Insert ExtractElementInstr to get value for smear
|
||||
|
||||
llvm::Function *extractFunc = module->getFunction("__extract_element");
|
||||
|
||||
|
||||
if (extractFunc == NULL) {
|
||||
// Declare the __extract_element function if needed; it takes a vector and
|
||||
// Declare the __extract_element function if needed; it takes a vector and
|
||||
// a scalar parameter and returns a scalar of the vector parameter type.
|
||||
llvm::Constant *ef =
|
||||
module->getOrInsertFunction("__extract_element",
|
||||
shuffleInst->getOperand(0)->getType()->getVectorElementType(),
|
||||
module->getOrInsertFunction("__extract_element",
|
||||
shuffleInst->getOperand(0)->getType()->getVectorElementType(),
|
||||
shuffleInst->getOperand(0)->getType(),
|
||||
llvm::IntegerType::get(module->getContext(), 32), NULL);
|
||||
extractFunc = llvm::dyn_cast<llvm::Function>(ef);
|
||||
assert(extractFunc != NULL);
|
||||
extractFunc->setDoesNotThrow();
|
||||
extractFunc->setOnlyReadsMemory();
|
||||
}
|
||||
}
|
||||
|
||||
if (extractFunc == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
llvm::Instruction *extractCall =
|
||||
llvm::ExtractElementInst::Create(shuffleInst->getOperand(0),
|
||||
llvm::Instruction *extractCall =
|
||||
llvm::ExtractElementInst::Create(shuffleInst->getOperand(0),
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2
|
||||
// mask is of VectorType
|
||||
llvm::dyn_cast<llvm::ConstantVector>(mask)->getSplatValue(),
|
||||
llvm::dyn_cast<llvm::ConstantVector>(mask)->getSplatValue(),
|
||||
#else
|
||||
mask->getSplatValue(),
|
||||
mask->getSplatValue(),
|
||||
#endif
|
||||
"__extract_element", inst);
|
||||
return extractCall;
|
||||
@@ -5109,7 +5184,11 @@ public:
|
||||
AndCmpCleanupPass()
|
||||
: BasicBlockPass(ID) { }
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
|
||||
const char *getPassName() const { return "AndCmp Cleanup Pass"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "AndCmp Cleanup Pass"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
static char ID;
|
||||
@@ -5251,7 +5330,11 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // <= 3.9
|
||||
const char *getPassName() const { return "MaskOps Cleanup Pass"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "MaskOps Cleanup Pass"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
private:
|
||||
|
||||
21
ctx.cpp
21
ctx.cpp
@@ -385,10 +385,14 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
llvm::DISubroutineType *diSubprogramType_n =
|
||||
llvm::cast<llvm::DISubroutineType>(getDICompositeType(diSubprogramType));
|
||||
int flags = llvm::DINode::FlagPrototyped;
|
||||
#else /* LLVM 3.8+ */
|
||||
#elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
|
||||
Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
|
||||
llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
|
||||
int flags = llvm::DINode::FlagPrototyped;
|
||||
#else /* LLVM 4.0+ */
|
||||
Assert(llvm::isa<llvm::DISubroutineType>(diSubprogramType));
|
||||
llvm::DISubroutineType *diSubprogramType_n = llvm::cast<llvm::DISubroutineType>(diSubprogramType);
|
||||
llvm::DINode::DIFlags flags = llvm::DINode::FlagPrototyped;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -417,7 +421,16 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
isStatic, true, /* is defn */
|
||||
firstLine, flags,
|
||||
isOptimized, llvmFunction);
|
||||
#else /* LLVM 3.8+ */
|
||||
#elif ISPC_LLVM_VERSION == ISPC_LLVM_3_8 || ISPC_LLVM_VERSION == ISPC_LLVM_3_9 /* LLVM 3.8, 3.9 */
|
||||
diSubprogram =
|
||||
m->diBuilder->createFunction(diFile /* scope */, funSym->name,
|
||||
mangledName, diFile,
|
||||
firstLine, diSubprogramType_n,
|
||||
isStatic, true, /* is defn */
|
||||
firstLine, flags,
|
||||
isOptimized);
|
||||
llvmFunction->setSubprogram(diSubprogram);
|
||||
#else /* LLVM 4.0+ */
|
||||
diSubprogram =
|
||||
m->diBuilder->createFunction(diFile /* scope */, funSym->name,
|
||||
mangledName, diFile,
|
||||
@@ -1821,7 +1834,11 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum) {
|
||||
if (m->diBuilder == NULL)
|
||||
return;
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
int flags = 0;
|
||||
#else // LLVM 4.0+
|
||||
llvm::DINode::DIFlags flags = llvm::DINode::FlagZero;
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6 /* 3.2, 3.3, 3.4, 3.5, 3.6 */
|
||||
llvm::DIScope scope = diSubprogram;
|
||||
llvm::DIType diType = sym->type->GetDIType(scope);
|
||||
|
||||
@@ -4,7 +4,7 @@ An ISPC update with new native AVX512 target for future Xeon CPUs and
|
||||
improvements for debugging, including new switch --dwarf-version to support
|
||||
debugging on old systems.
|
||||
|
||||
The release is based on patched version LLVM 3.8.
|
||||
The release is based on patched LLVM 3.8.
|
||||
|
||||
=== v1.9.0 === (12 Feb 2016)
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.9.1
|
||||
PROJECT_NUMBER = 1.9.2dev
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
4
func.cpp
4
func.cpp
@@ -420,8 +420,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
(
|
||||
#if ISPC_LLVM_VERSION == ISPC_LLVM_3_2 // 3.2
|
||||
(function->getFnAttributes().hasAttribute(llvm::Attributes::AlwaysInline) == false)
|
||||
#else // LLVM 3.3+
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
(function->getAttributes().getFnAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::AlwaysInline) == false)
|
||||
#else // LLVM 5.0+
|
||||
(function->getAttributes().getFnAttributes().hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline) == false)
|
||||
#endif
|
||||
&&
|
||||
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||
|
||||
12
ispc.cpp
12
ispc.cpp
@@ -1158,11 +1158,19 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
#endif
|
||||
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
||||
attrBuilder.addAttribute("target-features", this->m_attributes);
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
this->m_tf_attributes = new llvm::AttributeSet(
|
||||
llvm::AttributeSet::get(
|
||||
*g->ctx,
|
||||
llvm::AttributeSet::FunctionIndex,
|
||||
attrBuilder));
|
||||
#else // LLVM 5.0+
|
||||
this->m_tf_attributes = new llvm::AttributeList(
|
||||
llvm::AttributeList::get(
|
||||
*g->ctx,
|
||||
llvm::AttributeList::FunctionIndex,
|
||||
attrBuilder));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1477,7 +1485,11 @@ Target::StructOffset(llvm::Type *type, int element,
|
||||
void Target::markFuncWithTargetAttr(llvm::Function* func) {
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3
|
||||
if (m_tf_attributes) {
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
|
||||
#else // LLVM 5.0+
|
||||
func->addAttributes(llvm::AttributeList::FunctionIndex, *m_tf_attributes);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
14
ispc.h
14
ispc.h
@@ -41,7 +41,7 @@
|
||||
#include "ispc_version.h"
|
||||
|
||||
#if ISPC_LLVM_VERSION < OLDEST_SUPPORTED_LLVM || ISPC_LLVM_VERSION > LATEST_SUPPORTED_LLVM
|
||||
#error "Only LLVM 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8 and 3.9 development branch are supported"
|
||||
#error "Only LLVM 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 and 5.0 development branch are supported"
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@@ -72,7 +72,11 @@
|
||||
|
||||
// Forward declarations of a number of widely-used LLVM types
|
||||
namespace llvm {
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
class AttributeSet;
|
||||
#else // LLVM 5.0+
|
||||
class AttributeList;
|
||||
#endif
|
||||
class BasicBlock;
|
||||
class Constant;
|
||||
class ConstantValue;
|
||||
@@ -86,13 +90,11 @@ namespace llvm {
|
||||
class TargetMachine;
|
||||
class Type;
|
||||
class Value;
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
class DIFile;
|
||||
class DIType;
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
class DIDescriptor;
|
||||
#else // LLVM 3.7+
|
||||
class DIFile;
|
||||
class DIType;
|
||||
class DIScope;
|
||||
#endif
|
||||
}
|
||||
@@ -348,7 +350,11 @@ private:
|
||||
/** Target-specific LLVM attribute, which has to be attached to every
|
||||
function to ensure that it is generated for correct target architecture.
|
||||
This is requirement was introduced in LLVM 3.3 */
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_4_0
|
||||
llvm::AttributeSet* m_tf_attributes;
|
||||
#else // LLVM 5.0+
|
||||
llvm::AttributeList* m_tf_attributes;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** Native vector width of the vector instruction set. Note that this
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
#ifndef ISPC_VERSION_H
|
||||
#define ISPC_VERSION_H
|
||||
|
||||
#define ISPC_VERSION "1.9.1"
|
||||
#define ISPC_VERSION "1.9.2dev"
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
|
||||
#define ISPC_LLVM_VERSION ( LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 )
|
||||
@@ -51,9 +51,11 @@
|
||||
#define ISPC_LLVM_3_7 30700
|
||||
#define ISPC_LLVM_3_8 30800
|
||||
#define ISPC_LLVM_3_9 30900
|
||||
#define ISPC_LLVM_4_0 40000
|
||||
#define ISPC_LLVM_5_0 50000
|
||||
|
||||
#define OLDEST_SUPPORTED_LLVM ISPC_LLVM_3_2
|
||||
#define LATEST_SUPPORTED_LLVM ISPC_LLVM_3_9
|
||||
#define LATEST_SUPPORTED_LLVM ISPC_LLVM_5_0
|
||||
|
||||
#ifdef __ispc__xstr
|
||||
#undef __ispc__xstr
|
||||
|
||||
@@ -51,7 +51,11 @@
|
||||
#include <llvm/IR/Constants.h>
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
#define PTYPE(p) (llvm::cast<llvm::SequentialType>((p)->getType()->getScalarType())->getElementType())
|
||||
#else // LLVM 4.0+
|
||||
#define PTYPE(p) (llvm::cast<llvm::PointerType>((p)->getType()->getScalarType())->getElementType())
|
||||
#endif
|
||||
|
||||
namespace llvm {
|
||||
class PHINode;
|
||||
|
||||
46
module.cpp
46
module.cpp
@@ -124,10 +124,15 @@
|
||||
#include <clang/Frontend/TextDiagnosticPrinter.h>
|
||||
#include <clang/Frontend/Utils.h>
|
||||
#include <clang/Basic/TargetInfo.h>
|
||||
#include <clang/Lex/PreprocessorOptions.h>
|
||||
#include <llvm/Support/ToolOutputFile.h>
|
||||
#include <llvm/Support/Host.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#else
|
||||
#include <llvm/Bitcode/BitcodeWriter.h>
|
||||
#endif
|
||||
|
||||
/*! list of files encountered by the parser. this allows emitting of
|
||||
the module file's dependencies via the -MMM option */
|
||||
@@ -426,9 +431,7 @@ Module::Module(const char *fn) {
|
||||
sprintf(producerString, "ispc version %s (built on %s)",
|
||||
ISPC_VERSION, __DATE__);
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
|
||||
diCompileUnit =
|
||||
#endif // LLVM_3_4+
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_3
|
||||
diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */
|
||||
name, /* filename */
|
||||
directory, /* directory */
|
||||
@@ -436,6 +439,25 @@ Module::Module(const char *fn) {
|
||||
g->opt.level > 0 /* is optimized */,
|
||||
"-g", /* command line args */
|
||||
0 /* run time version */);
|
||||
#elif ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.4-3.9
|
||||
diCompileUnit =
|
||||
diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */
|
||||
name, /* filename */
|
||||
directory, /* directory */
|
||||
producerString, /* producer */
|
||||
g->opt.level > 0 /* is optimized */,
|
||||
"-g", /* command line args */
|
||||
0 /* run time version */);
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_4_0 // LLVM 4.0+
|
||||
auto srcFile = diBuilder->createFile(name, directory);
|
||||
diCompileUnit =
|
||||
diBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99, /* lang */
|
||||
srcFile, /* filename */
|
||||
producerString, /* producer */
|
||||
g->opt.level > 0 /* is optimized */,
|
||||
"-g", /* command line args */
|
||||
0 /* run time version */);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -734,7 +756,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
||||
sym->type->GetDIType(file),
|
||||
(sym->storageClass == SC_STATIC),
|
||||
sym_const_storagePtr);
|
||||
#else // LLVM 3.7+
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_9 // LLVM 3.7 - 3.9
|
||||
llvm::DIFile *file = pos.GetDIFile();
|
||||
//llvm::MDFile *file = pos.GetDIFile();
|
||||
llvm::Constant *sym_const_storagePtr = llvm::dyn_cast<llvm::Constant>(sym->storagePtr);
|
||||
@@ -748,6 +770,20 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
||||
sym->type->GetDIType(file),
|
||||
(sym->storageClass == SC_STATIC),
|
||||
sym_const_storagePtr);
|
||||
#else // LLVM 4.0+
|
||||
llvm::DIFile *file = pos.GetDIFile();
|
||||
//llvm::MDFile *file = pos.GetDIFile();
|
||||
llvm::GlobalVariable *sym_GV_storagePtr = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
|
||||
Assert(sym_GV_storagePtr);
|
||||
llvm::DIGlobalVariableExpression *var = diBuilder->createGlobalVariableExpression(
|
||||
file,
|
||||
name,
|
||||
name,
|
||||
file,
|
||||
pos.first_line,
|
||||
sym->type->GetDIType(file),
|
||||
(sym->storageClass == SC_STATIC));
|
||||
sym_GV_storagePtr->addDebugInfo(var);
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
Assert(var.Verify());
|
||||
|
||||
49
opt.cpp
49
opt.cpp
@@ -503,8 +503,13 @@ DebugPassManager::add(llvm::Pass * P, int stage = -1) {
|
||||
if (g->debug_stages.find(number) != g->debug_stages.end()) {
|
||||
// adding dump of LLVM IR after optimization
|
||||
char buf[100];
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
|
||||
number, P->getPassName());
|
||||
#else // LLVM 4.0+
|
||||
sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
|
||||
number, P->getPassName().data());
|
||||
#endif
|
||||
PM.add(CreateDebugPass(buf));
|
||||
}
|
||||
|
||||
@@ -943,7 +948,11 @@ class IntrinsicsOpt : public llvm::BasicBlockPass {
|
||||
public:
|
||||
IntrinsicsOpt() : BasicBlockPass(ID) {};
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Intrinsics Cleanup Optimization"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Intrinsics Cleanup Optimization"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
static char ID;
|
||||
@@ -1257,7 +1266,11 @@ public:
|
||||
InstructionSimplifyPass()
|
||||
: BasicBlockPass(ID) { }
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Vector Select Optimization"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Vector Select Optimization"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
static char ID;
|
||||
@@ -1426,7 +1439,11 @@ public:
|
||||
static char ID;
|
||||
ImproveMemoryOpsPass() : BasicBlockPass(ID) { }
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Improve Memory Ops"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Improve Memory Ops"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
};
|
||||
|
||||
@@ -3278,7 +3295,11 @@ public:
|
||||
static char ID;
|
||||
GatherCoalescePass() : BasicBlockPass(ID) { }
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Gather Coalescing"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Gather Coalescing"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
};
|
||||
|
||||
@@ -4336,7 +4357,11 @@ public:
|
||||
static char ID;
|
||||
ReplacePseudoMemoryOpsPass() : BasicBlockPass(ID) { }
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Replace Pseudo Memory Ops"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Replace Pseudo Memory Ops"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
};
|
||||
|
||||
@@ -4705,7 +4730,11 @@ public:
|
||||
isLastTry = last;
|
||||
}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Resolve \"is compile time constant\""; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Resolve \"is compile time constant\""; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
bool isLastTry;
|
||||
@@ -4800,7 +4829,11 @@ public:
|
||||
sprintf(str_output, "%s", output);
|
||||
}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Dump LLVM IR"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Dump LLVM IR"; }
|
||||
#endif
|
||||
bool runOnModule(llvm::Module &m);
|
||||
|
||||
private:
|
||||
@@ -4846,7 +4879,11 @@ public:
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Make internal funcs \"static\""; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Make internal funcs \"static\""; }
|
||||
#endif
|
||||
bool runOnModule(llvm::Module &m);
|
||||
};
|
||||
|
||||
@@ -4953,7 +4990,11 @@ class PeepholePass : public llvm::BasicBlockPass {
|
||||
public:
|
||||
PeepholePass();
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Peephole Optimizations"; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Peephole Optimizations"; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
static char ID;
|
||||
@@ -5354,7 +5395,11 @@ public:
|
||||
ReplaceStdlibShiftPass() : BasicBlockPass(ID) {
|
||||
}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Resolve \"replace extract insert chains\""; }
|
||||
#endif
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
};
|
||||
@@ -5453,7 +5498,11 @@ public:
|
||||
static char ID;
|
||||
FixBooleanSelectPass() :FunctionPass(ID) {}
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
|
||||
#else // LLVM 4.0+
|
||||
llvm::StringRef getPassName() const { return "Resolve \"replace extract insert chains\""; }
|
||||
#endif
|
||||
bool runOnFunction(llvm::Function &F);
|
||||
|
||||
private:
|
||||
|
||||
189
prepro.py
Normal file
189
prepro.py
Normal file
@@ -0,0 +1,189 @@
|
||||
import re
|
||||
import sys
|
||||
|
||||
def floating2float(function, idx):
|
||||
typ = 'floating<' + str(idx) + '>'
|
||||
return function.replace(typ, 'float')
|
||||
|
||||
def floating2double(function, idx):
|
||||
typ = 'floating<' + str(idx) + '>'
|
||||
return function.replace(typ, 'double')
|
||||
|
||||
def number2float(function, idx):
|
||||
typ = 'number<' + str(idx) + '>'
|
||||
return function.replace(typ, 'float')
|
||||
|
||||
def number2double(function, idx):
|
||||
typ = 'number<' + str(idx) + '>'
|
||||
return function.replace(typ, 'double')
|
||||
|
||||
def number2int(function, idx):
|
||||
typ = 'number<' + str(idx) + '>'
|
||||
return function.replace(typ, 'int')
|
||||
|
||||
def number2long(function, idx):
|
||||
typ = 'number<' + str(idx) + '>'
|
||||
return function.replace(typ, 'long')
|
||||
|
||||
def number2short(function, idx):
|
||||
typ = 'number<' + str(idx) + '>'
|
||||
return function.replace(typ, 'short')
|
||||
|
||||
def number2char(function, idx):
|
||||
typ = 'number<' + str(idx) + '>'
|
||||
return function.replace(typ, 'char')
|
||||
|
||||
def integer2int(function, idx):
|
||||
typ = 'integer<' + str(idx) + '>'
|
||||
return function.replace(typ, 'int')
|
||||
|
||||
def integer2long(function, idx):
|
||||
typ = 'integer<' + str(idx) + '>'
|
||||
return function.replace(typ, 'long')
|
||||
|
||||
def integer2short(function, idx):
|
||||
typ = 'integer<' + str(idx) + '>'
|
||||
return function.replace(typ, 'short')
|
||||
|
||||
def integer2char(function, idx):
|
||||
typ = 'integer<' + str(idx) + '>'
|
||||
return function.replace(typ, 'char')
|
||||
|
||||
def strip_types(fun, i):
|
||||
return fun.replace('export', '').replace('void', '').replace('float', '') \
|
||||
.replace('double', '').replace('char','').replace('short','') \
|
||||
.replace('int','').replace('long','').replace('[]','') \
|
||||
.replace('*','').replace('(','_' + str(i) + '(') \
|
||||
.replace('uniform','')
|
||||
|
||||
if __name__ == '__main__':
|
||||
if (len(sys.argv) != 2):
|
||||
print 'usage:\n\tpython ',
|
||||
print sys.argv[0],
|
||||
print '[file.ispc]'
|
||||
exit(1)
|
||||
|
||||
f = open(sys.argv[1], 'r')
|
||||
function = f.read()
|
||||
fun_def = re.findall(r'export [^{]*\)', function)[0]
|
||||
print fun_def
|
||||
f.close()
|
||||
|
||||
floating = 0
|
||||
number = 0
|
||||
integer = 0
|
||||
|
||||
while ('floating<' + str(floating) + '>' in function):
|
||||
floating += 1
|
||||
|
||||
while ('number<' + str(number) + '>' in function):
|
||||
number += 1
|
||||
|
||||
while ('integer<' + str(integer) + '>' in function):
|
||||
integer += 1
|
||||
|
||||
for i in range(len(re.findall(r'floating[\s+]', function))):
|
||||
function = function.replace('floating ',
|
||||
'floating<' + str(floating) + '> ', 1)
|
||||
fun_def = fun_def.replace('floating ',
|
||||
'floating<' + str(floating) + '> ', 1)
|
||||
floating += 1
|
||||
|
||||
for i in range(len(re.findall(r'number[\s+]', function))):
|
||||
function = function.replace('number ',
|
||||
'number<' + str(number) + '> ', 1)
|
||||
fun_def = fun_def.replace('number ',
|
||||
'number<' + str(number) + '> ', 1)
|
||||
number += 1
|
||||
|
||||
for i in range(len(re.findall(r'integer[\s+]', function))):
|
||||
function = function.replace('integer ',
|
||||
'integer<' + str(integer) + '> ', 1)
|
||||
fun_def = fun_def.replace('integer ',
|
||||
'integer<' + str(integer) + '> ', 1)
|
||||
integer += 1
|
||||
|
||||
floats = [[(fun_def, function)]]
|
||||
|
||||
for i in range(floating):
|
||||
floats.append([])
|
||||
for (h, f) in floats[i]:
|
||||
floats[i+1].append((floating2float(h, i),
|
||||
floating2float(f, i)
|
||||
))
|
||||
floats[i+1].append((floating2double(h, i),
|
||||
floating2double(f, i)
|
||||
))
|
||||
|
||||
numbers = [[]]
|
||||
|
||||
for f in floats[floating]:
|
||||
numbers[0].append(f)
|
||||
|
||||
for i in range(number):
|
||||
numbers.append([])
|
||||
for (h, f) in numbers[i]:
|
||||
numbers[i+1].append((number2float(h, i),
|
||||
number2float(f, i)
|
||||
))
|
||||
numbers[i+1].append((number2double(h, i),
|
||||
number2double(f, i)
|
||||
))
|
||||
numbers[i+1].append((number2int(h, i),
|
||||
number2int(f, i)
|
||||
))
|
||||
numbers[i+1].append((number2long(h, i),
|
||||
number2long(f, i)
|
||||
))
|
||||
numbers[i+1].append((number2short(h, i),
|
||||
number2short(f, i)
|
||||
))
|
||||
numbers[i+1].append((number2char(h, i),
|
||||
number2char(f, i)
|
||||
))
|
||||
|
||||
integers = [[]]
|
||||
|
||||
for f in numbers[number]:
|
||||
integers[0].append(f)
|
||||
|
||||
for i in range(integer):
|
||||
integers.append([])
|
||||
for (h, f) in integers[i]:
|
||||
integers[i+1].append((integer2int(h, i),
|
||||
integer2int(f, i)
|
||||
))
|
||||
integers[i+1].append((integer2long(h, i),
|
||||
integer2long(f, i)
|
||||
))
|
||||
integers[i+1].append((integer2short(h, i),
|
||||
integer2short(f, i)
|
||||
))
|
||||
integers[i+1].append((integer2char(h, i),
|
||||
integer2char(f, i)
|
||||
))
|
||||
|
||||
o = open(sys.argv[1] + '.pre.ispc', 'w')
|
||||
hdr = open(sys.argv[1] + '.h', 'w')
|
||||
|
||||
hdr.write('#include "' + '.'.join(sys.argv[1].split('.')[:-1]) + '.h"\n\n')
|
||||
hdr.write('#ifndef _' + sys.argv[1].upper().replace('.', '_') + '_H_\n')
|
||||
hdr.write('#define _' + sys.argv[1].upper().replace('.', '_') + '_H_\n')
|
||||
hdr.write('namespace ispc {\n')
|
||||
|
||||
fun_name = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*\(', function)[0][:-1]
|
||||
|
||||
i = 0
|
||||
for (h, f) in integers[integer]:
|
||||
o.write(f.replace(fun_name, fun_name+'_'+str(i), 1))
|
||||
hdr.write(h.replace('uniform','').replace('export','') \
|
||||
+ '\n{\n\treturn ' + strip_types(h, i) + ';\n}\n\n')
|
||||
|
||||
i += 1
|
||||
|
||||
o.close()
|
||||
hdr.write('}\n')
|
||||
hdr.write('#endif\n')
|
||||
hdr.close()
|
||||
|
||||
exit(0)
|
||||
@@ -5,8 +5,8 @@ struct Foo { float x; float y; };
|
||||
|
||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||
float a = aa[programIndex];
|
||||
uniform Foo foo[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
uniform Foo foo[programCount+1];
|
||||
for (uniform int i = 0; i < programCount+1; ++i) {
|
||||
foo[i].x = i;
|
||||
foo[i].y = -1234 + i;
|
||||
}
|
||||
|
||||
@@ -5,8 +5,8 @@ typedef int<3> int3;
|
||||
|
||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||
float a = aa[programIndex];
|
||||
uniform int3 array[programCount];
|
||||
for (uniform int i = 0; i < programCount + 5 - b; ++i) {
|
||||
uniform int3 array[programCount+1];
|
||||
for (uniform int i = 0; i < programCount + 6 - b; ++i) {
|
||||
for (uniform int j = 0; j < 3; ++j)
|
||||
array[i][j] = i+100*j;
|
||||
}
|
||||
|
||||
23
tests_ispcpp/hello.cpp
Normal file
23
tests_ispcpp/hello.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "hello.ispc.h"
|
||||
|
||||
int main() {
|
||||
float A[100];
|
||||
float B[100];
|
||||
double result[100];
|
||||
|
||||
for (int i=0; i<100; i++) {
|
||||
A[i] = 100 - i;
|
||||
B[i] = i*i;
|
||||
}
|
||||
|
||||
ispc::saxpy(100, 3.1415926535, (float*)&A, (float*)&B, (double*)&result);
|
||||
|
||||
for (int i=0; i<100; i++) {
|
||||
printf("%.6f\n", result[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
11
tests_ispcpp/hello.ispc
Normal file
11
tests_ispcpp/hello.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
export void saxpy(uniform int N,
|
||||
uniform floating<0> scale,
|
||||
uniform floating<1> X[],
|
||||
uniform floating<1> Y[],
|
||||
uniform floating<2> result[])
|
||||
{
|
||||
foreach (i = 0 ... N) {
|
||||
floating<2> tmp = scale * X[i] + Y[i];
|
||||
result[i] = tmp;
|
||||
}
|
||||
}
|
||||
63
type.cpp
63
type.cpp
@@ -541,6 +541,8 @@ llvm::DIType *AtomicType::GetDIType(llvm::DIScope *scope) const {
|
||||
#else //LLVM 3.7++
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
case TYPE_BOOL:
|
||||
return m->diBuilder->createBasicType("bool", 32 /* size */, 32 /* align */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
@@ -585,6 +587,53 @@ llvm::DIType *AtomicType::GetDIType(llvm::DIScope *scope) const {
|
||||
return m->diBuilder->createBasicType("uint64", 64 /* size */, 64 /* align */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
#else // LLVM 4.0+
|
||||
case TYPE_BOOL:
|
||||
return m->diBuilder->createBasicType("bool", 32 /* size */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_INT8:
|
||||
return m->diBuilder->createBasicType("int8", 8 /* size */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
break;
|
||||
case TYPE_UINT8:
|
||||
return m->diBuilder->createBasicType("uint8", 8 /* size */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_INT16:
|
||||
return m->diBuilder->createBasicType("int16", 16 /* size */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
break;
|
||||
case TYPE_UINT16:
|
||||
return m->diBuilder->createBasicType("uint16", 16 /* size */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_INT32:
|
||||
return m->diBuilder->createBasicType("int32", 32 /* size */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
break;
|
||||
case TYPE_UINT32:
|
||||
return m->diBuilder->createBasicType("uint32", 32 /* size */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_FLOAT:
|
||||
return m->diBuilder->createBasicType("float", 32 /* size */,
|
||||
llvm::dwarf::DW_ATE_float);
|
||||
break;
|
||||
case TYPE_DOUBLE:
|
||||
return m->diBuilder->createBasicType("double", 64 /* size */,
|
||||
llvm::dwarf::DW_ATE_float);
|
||||
break;
|
||||
case TYPE_INT64:
|
||||
return m->diBuilder->createBasicType("int64", 64 /* size */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
break;
|
||||
case TYPE_UINT64:
|
||||
return m->diBuilder->createBasicType("uint64", 64 /* size */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
FATAL("unhandled basic type in AtomicType::GetDIType()");
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
@@ -2307,9 +2356,15 @@ llvm::DIType *StructType::GetDIType(llvm::DIScope *scope) const {
|
||||
llvm::DIFile *diFile = elementPositions[i].GetDIFile();
|
||||
llvm::DIDerivedType *fieldType =
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
m->diBuilder->createMemberType(scope, elementNames[i], diFile,
|
||||
line, eltSize, eltAlign,
|
||||
currentSize, 0, eltType);
|
||||
#else // LLVM 4.0+
|
||||
m->diBuilder->createMemberType(scope, elementNames[i], diFile,
|
||||
line, eltSize, eltAlign,
|
||||
currentSize, llvm::DINode::FlagZero, eltType);
|
||||
#endif
|
||||
elementLLVMTypes.push_back(fieldType);
|
||||
|
||||
currentSize += eltSize;
|
||||
@@ -2334,7 +2389,11 @@ llvm::DIType *StructType::GetDIType(llvm::DIScope *scope) const {
|
||||
pos.first_line, // Line number
|
||||
currentSize, // Size in bits
|
||||
align, // Alignment in bits
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
0, // Flags
|
||||
#else // LLVM 4.0+
|
||||
llvm::DINode::FlagZero, // Flags
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
llvm::DIType(), // DerivedFrom
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7++
|
||||
@@ -2584,7 +2643,11 @@ llvm::DIType *UndefinedStructType::GetDIType(llvm::DIScope *scope) const {
|
||||
pos.first_line, // Line number
|
||||
0, // Size
|
||||
0, // Align
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_9
|
||||
0, // Flags
|
||||
#else // LLVM 4.0+
|
||||
llvm::DINode::FlagZero, // Flags
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_3 && ISPC_LLVM_VERSION <= ISPC_LLVM_3_6
|
||||
llvm::DIType(), // DerivedFrom
|
||||
#elif ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||
|
||||
Reference in New Issue
Block a user