diff --git a/.gitignore b/.gitignore index 429199bb..7cdc4a4e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,6 @@ examples/*/objs/* examples/*/ref examples/*/test *.swp +check_isa.exe diff --git a/alloy.py b/alloy.py index 0aaf3d8d..51aec82b 100755 --- a/alloy.py +++ b/alloy.py @@ -65,7 +65,12 @@ def try_do_LLVM(text, command, from_validation): if from_validation == True: text = text + "\n" print_debug("Trying to " + text, from_validation, alloy_build) - if os.system(command + " >> " + alloy_build + " 2>> " + alloy_build) != 0: + postfix = "" + if current_OS == "Windows": + postfix = " 1>> " + alloy_build + " 2>&1" + else: + postfix = " >> " + alloy_build + " 2>> " + alloy_build + if os.system(command + postfix) != 0: print_debug("ERROR.\n", from_validation, alloy_build) error("can't " + text, 1) print_debug("DONE.\n", from_validation, alloy_build) @@ -144,10 +149,13 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, "tar -xvzf " + tar[1] + " --strip-components 1", from_validation) os.chdir("../../") # paching llvm - patches = glob.glob(os.environ["ISPC_HOME"] + "/llvm_patches/*.*") + patches = glob.glob(os.environ["ISPC_HOME"] + os.sep + "llvm_patches" + os.sep + "*.*") for patch in patches: if version_LLVM in os.path.basename(patch): - try_do_LLVM("patch LLVM with patch" + patch + " ", "patch -p0 < " + patch, from_validation) + if current_OS != "Windows": + try_do_LLVM("patch LLVM with patch " + patch + " ", "patch -p0 < " + patch, from_validation) + else: + try_do_LLVM("patch LLVM with patch " + patch + " ", "C:\\gnuwin32\\bin\\patch.exe -p0 < " + patch, from_validation) os.chdir("../") # configuring llvm, build first part of selfbuild os.makedirs(LLVM_BUILD) @@ -173,77 +181,65 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, print_debug("Now we have compiler for selfbuild: " + selfbuild_compiler + "\n", from_validation, alloy_build) os.chdir(LLVM_BUILD) if debug == False: - try_do_LLVM("configure release version ", + if current_OS != "Windows": + try_do_LLVM("configure release version ", "../" + LLVM_SRC + "/configure --prefix=" + llvm_home + "/" + LLVM_BIN + " --enable-optimized" + selfbuild_compiler, from_validation) + else: + try_do_LLVM("configure release version ", + 'cmake -G "Visual Studio 10" -DCMAKE_INSTALL_PREFIX="..\\'+ LLVM_BIN + + '" -DLLVM_LIT_TOOLS_DIR="C:\\gnuwin32\\bin" ..\\' + LLVM_SRC, + from_validation) else: try_do_LLVM("configure debug version ", "../" + LLVM_SRC + "/configure --prefix=" + llvm_home + "/" + LLVM_BIN + " --enable-debug-runtime --enable-debug-symbols --enable-keep-symbols" + selfbuild_compiler, from_validation) # building llvm - try_do_LLVM("build LLVM ", make, from_validation) - try_do_LLVM("install LLVM ", "make install", from_validation) + if current_OS != "Windows": + try_do_LLVM("build LLVM ", make, from_validation) + try_do_LLVM("install LLVM ", "make install", from_validation) + else: + try_do_LLVM("build LLVM and than install LLVM ", "msbuild INSTALL.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", from_validation) os.chdir(current_path) def check_targets(): answer = [] answer_sde = [] - SSE2 = False; - SSE4 = False; - AVX = False; - AVX11 = False; - AVX2 = False; - if current_OS == "Linux": - cpu = open("/proc/cpuinfo") - f_lines = cpu.readlines() - cpu.close() - # check what native targets do we have - for i in range(0,len(f_lines)): - if SSE2 == False and "sse2" in f_lines[i]: - SSE2 = True; - answer = answer + ["sse2-i32x4", "sse2-i32x8"] - if SSE4 == False and "sse4_1" in f_lines[i]: - SSE4 = True; - answer = answer + ["sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16"] - if AVX == False and "avx" in f_lines[i]: - AVX = True; - answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] - if AVX11 == False and "rdrand" in f_lines[i]: - AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] - if AVX2 == False and "avx2" in f_lines[i]: - AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] - if current_OS == "MacOS": - f_lines = take_lines("sysctl machdep.cpu.features", "first") - if "SSE2" in f_lines: - SSE2 = True; - answer = answer + ["sse2-i32x4", "sse2-i32x8"] - if "SSE4.1" in f_lines: - SSE4 = True; - answer = answer + ["sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16"] - if "AVX1.0" in f_lines: - AVX = True; - answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] - if "RDRAND" in f_lines: - AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] - if "AVX2.0" in f_lines: - AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] - - answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"] + # check what native targets do we have + if current_OS != "Windows": + try_do_LLVM("build check_ISA", "clang check_isa.cpp -o check_isa.exe", True) + else: + try_do_LLVM("build check_ISA", "cl check_isa.cpp", True) + SSE2 = ["sse2-i32x4", "sse2-i32x8"] + SSE4 = ["sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16"] + AVX = ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] + AVX11 = ["avx1.1-i32x8","avx1.1-i32x16","avx1.1-i64x4"] + AVX2 = ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] + targets = [["AVX2", AVX2, False], ["AVX1.1", AVX11, False], ["AVX", AVX, False], ["SSE4", SSE4, False], ["SSE2", SSE2, False]] + f_lines = take_lines("check_isa.exe", "first") + for i in range(0,5): + if targets[i][0] in f_lines: + for j in range(i,5): + answer = targets[j][1] + answer + targets[j][2] = True + break + if current_OS != "Windows": + answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"] # now check what targets we have with the help of SDE sde_exists = "" PATH_dir = string.split(os.getenv("PATH"), os.pathsep) + if current_OS == "Windows": + sde_n = "sde.exe" + else: + sde_n = "sde" for counter in PATH_dir: - if os.path.exists(counter + os.sep + "sde") and sde_exists == "": - sde_exists = counter + os.sep + "sde" + if os.path.exists(counter + os.sep + sde_n) and sde_exists == "": + sde_exists = counter + os.sep + sde_n if os.environ.get("SDE_HOME") != None: - if os.path.exists(os.environ.get("SDE_HOME") + os.sep + "sde"): - sde_exists = os.environ.get("SDE_HOME") + os.sep + "sde" + if os.path.exists(os.environ.get("SDE_HOME") + os.sep + sde_n): + sde_exists = os.environ.get("SDE_HOME") + os.sep + sde_n if sde_exists == "": error("you haven't got sde neither in SDE_HOME nor in your PATH.\n" + "To test all platforms please set SDE_HOME to path containing SDE.\n" + @@ -252,24 +248,38 @@ def check_targets(): # here we have SDE f_lines = take_lines(sde_exists + " -help", "all") for i in range(0,len(f_lines)): - if SSE4 == False and "wsm" in f_lines[i]: + if targets[3][2] == False and "wsm" in f_lines[i]: answer_sde = answer_sde + [["-wsm", "sse4-i32x4"], ["-wsm", "sse4-i32x8"], ["-wsm", "sse4-i16x8"], ["-wsm", "sse4-i8x16"]] - if AVX == False and "snb" in f_lines[i]: + if targets[2][2] == False and "snb" in f_lines[i]: answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]] - if AVX11 == False and "ivb" in f_lines[i]: + if targets[1][2] == False and "ivb" in f_lines[i]: answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]] - if AVX2 == False and "hsw" in f_lines[i]: + if targets[0][2] == False and "hsw" in f_lines[i]: answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]] return [answer, answer_sde] def build_ispc(version_LLVM, make): current_path = os.getcwd() os.chdir(os.environ["ISPC_HOME"]) - p_temp = os.getenv("PATH") - os.environ["PATH"] = os.environ["LLVM_HOME"] + "/bin-" + version_LLVM + "/bin:" + os.environ["PATH"] - try_do_LLVM("clean ISPC for building", "make clean", True) - try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", make, True) - os.environ["PATH"] = p_temp + if current_OS != "Windows": + p_temp = os.getenv("PATH") + os.environ["PATH"] = os.environ["LLVM_HOME"] + "/bin-" + version_LLVM + "/bin:" + os.environ["PATH"] + try_do_LLVM("clean ISPC for building", "make clean", True) + try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", make, True) + os.environ["PATH"] = p_temp + else: + p_temp = os.getenv("LLVM_INSTALL_DIR") + v_temp = os.getenv("LLVM_VERSION") + os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM + if version_LLVM == "3.3": + temp = "3_3" + if version_LLVM == "trunk": + temp = "3_4" + os.environ["LLVM_VERSION"] = "LLVM_" + temp + try_do_LLVM("clean ISPC for building", "msbuild ispc.vcxproj /t:clean", True) + try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", "msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", True) + os.environ["LLVM_INSTALL_DIR"] = p_temp + os.environ["LLVM_VERSION"] = v_temp os.chdir(current_path) def execute_stability(stability, R, print_version): @@ -305,9 +315,13 @@ def execute_stability(stability, R, print_version): def run_special_tests(): i = 5 +class options_for_drivers: + pass + def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm, time): os.chdir(os.environ["ISPC_HOME"]) - os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"] + if current_OS != "Windows": + os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"] if options.notify != "": common.remove_if_exists(os.environ["ISPC_HOME"] + os.sep + "notify_log.log") smtp_server = os.environ["SMTP_ISPC"] @@ -319,8 +333,6 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, print_debug("Folder: " + os.environ["ISPC_HOME"] + "\n", False, "") date = datetime.datetime.now() print_debug("Date: " + date.strftime('%H:%M %d/%m/%Y') + "\n", False, "") - class options_for_drivers: - pass # *** *** *** # Stability validation run # *** *** *** @@ -475,12 +487,14 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, performance = options_for_drivers() # performance constant options performance.number = number - performance.config = "./perf.ini" - performance.path = "./" + performance.config = "." + os.sep + "perf.ini" + performance.path = "." + os.sep performance.silent = True performance.output = "" performance.compiler = "" performance.ref = "ispc_ref" + if current_OS == "Windows": + performance.ref = "ispc_ref.exe" performance.in_file = "." + os.sep + f_date + os.sep + "performance.log" # prepare LLVM 3.3 as newest LLVM need_LLVM = check_LLVM(["3.3"]) @@ -502,7 +516,11 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, sys.stdout.write(".\n") build_ispc("3.3", make) sys.stdout.write(".\n") - os.rename("ispc", "ispc_ref") + if current_OS != "Windows": + os.rename("ispc", "ispc_ref") + else: + common.remove_if_exists("Release\\ispc_ref.exe") + os.rename("Release\\ispc.exe", "Release\\ispc_ref.exe") try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True) if stashing: try_do_LLVM("return current branch ", "git stash pop", True) @@ -541,7 +559,6 @@ def Main(): global current_OS if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True: current_OS = "Windows" - error("Windows isn't supported now", 1) else: if (platform.system() == 'Darwin'): current_OS = "MacOS" @@ -566,7 +583,9 @@ def Main(): for iterator in test_only: if not (" " + iterator + " " in test_only_r): error("unknow option for only: " + iterator, 1) - + if current_OS == "Windows": + if options.debug == True or options.selfbuild == True or options.tarball != "": + error("Debug, selfbuild and tarball options are unsupported on windows", 1) global f_date f_date = "logs" common.remove_if_exists(f_date) @@ -626,79 +645,80 @@ import common error = common.error take_lines = common.take_lines print_debug = common.print_debug -# parsing options -class MyParser(OptionParser): - def format_epilog(self, formatter): - return self.epilog -examples = ("Examples:\n" + -"Load and build LLVM from trunk\n\talloy.py -b\n" + -"Load and build LLVM 3.3. Rewrite LLVM folders\n\talloy.py -b --version=3.3 --force\n" + -"Untar files llvm.tgz clang.tgz, build LLVM from them in folder bin-from_tar\n\talloy.py -b --tarball='llvm.tgz clang.tgz' --folder=from_tar\n" + -"Load LLVM from trunk, revision r172870. Build it. Do selfbuild\n\talloy.py -b --revision=r172870 --selfbuild\n" + -"Validation run with LLVM 3.3, trunk; x86, x86-64; -O2;\nall supported targets; performance\n\talloy.py -r\n" + -"Validation run with all avx targets and sse4-i8x16 without performance\n\talloy.py -r --only=stability --only-targets='avx sse4-i8x16'\n" + -"Validation run with avx2-i32x8, all sse4 and sse2 targets\nand all targets with i32x16\n\talloy.py -r --only-targets='avx2-i32x8 sse4 i32x16 sse2'\n" + -"Stability validation run with LLVM 3.2, 3.3; -O0; x86,\nupdate fail_db.txt with passes and fails\n\talloy.py -r --only='3.2 -O0 stability 3.3 x86' --update-errors=FP\n" + -"Try to build compiler with all LLVM\n\talloy.py -r --only=build\n" + -"Performance validation run with 10 runs of each test and comparing to branch 'old'\n\talloy.py -r --only=performance --compare-with=old --number=10\n" + -"Validation run. Update fail_db.txt with new fails, send results to my@my.com\n\talloy.py -r --update-errors=F --notify='my@my.com'\n") -parser = MyParser(usage="Usage: alloy.py -r/-b [options]", epilog=examples) -parser.add_option('-b', '--build-llvm', dest='build_llvm', - help='ask to build LLVM', default=False, action="store_true") -parser.add_option('-r', '--run', dest='validation_run', - help='ask for validation run', default=False, action="store_true") -parser.add_option('-j', dest='speed', - help='set -j for make', default="8") -# options for activity "build LLVM" -llvm_group = OptionGroup(parser, "Options for building LLVM", +if __name__ == '__main__': + # parsing options + class MyParser(OptionParser): + def format_epilog(self, formatter): + return self.epilog + examples = ("Examples:\n" + + "Load and build LLVM from trunk\n\talloy.py -b\n" + + "Load and build LLVM 3.3. Rewrite LLVM folders\n\talloy.py -b --version=3.3 --force\n" + + "Untar files llvm.tgz clang.tgz, build LLVM from them in folder bin-from_tar\n\talloy.py -b --tarball='llvm.tgz clang.tgz' --folder=from_tar\n" + + "Load LLVM from trunk, revision r172870. Build it. Do selfbuild\n\talloy.py -b --revision=r172870 --selfbuild\n" + + "Validation run with LLVM 3.3, trunk; x86, x86-64; -O2;\nall supported targets; performance\n\talloy.py -r\n" + + "Validation run with all avx targets and sse4-i8x16 without performance\n\talloy.py -r --only=stability --only-targets='avx sse4-i8x16'\n" + + "Validation run with avx2-i32x8, all sse4 and sse2 targets\nand all targets with i32x16\n\talloy.py -r --only-targets='avx2-i32x8 sse4 i32x16 sse2'\n" + + "Stability validation run with LLVM 3.2, 3.3; -O0; x86,\nupdate fail_db.txt with passes and fails\n\talloy.py -r --only='3.2 -O0 stability 3.3 x86' --update-errors=FP\n" + + "Try to build compiler with all LLVM\n\talloy.py -r --only=build\n" + + "Performance validation run with 10 runs of each test and comparing to branch 'old'\n\talloy.py -r --only=performance --compare-with=old --number=10\n" + + "Validation run. Update fail_db.txt with new fails, send results to my@my.com\n\talloy.py -r --update-errors=F --notify='my@my.com'\n") + parser = MyParser(usage="Usage: alloy.py -r/-b [options]", epilog=examples) + parser.add_option('-b', '--build-llvm', dest='build_llvm', + help='ask to build LLVM', default=False, action="store_true") + parser.add_option('-r', '--run', dest='validation_run', + help='ask for validation run', default=False, action="store_true") + parser.add_option('-j', dest='speed', + help='set -j for make', default="8") + # options for activity "build LLVM" + llvm_group = OptionGroup(parser, "Options for building LLVM", "These options must be used with -b option.") -llvm_group.add_option('--version', dest='version', - help='version of llvm to build: 3.1 3.2 3.3 trunk. Default: trunk', default="trunk") -llvm_group.add_option('--revision', dest='revision', - help='revision of llvm to build in format r172870', default="") -llvm_group.add_option('--debug', dest='debug', - help='debug build of LLVM?', default=False, action="store_true") -llvm_group.add_option('--folder', dest='folder', - help='folder to build LLVM in', default="") -llvm_group.add_option('--tarball', dest='tarball', - help='"llvm_tarball clang_tarball"', default="") -llvm_group.add_option('--selfbuild', dest='selfbuild', - help='make selfbuild of LLVM and clang', default=False, action="store_true") -llvm_group.add_option('--force', dest='force', - help='rebuild LLVM', default=False, action='store_true') -llvm_group.add_option('--extra', dest='extra', - help='load extra clang tools', default=False, action='store_true') -parser.add_option_group(llvm_group) -# options for activity "validation run" -run_group = OptionGroup(parser, "Options for validation run", + llvm_group.add_option('--version', dest='version', + help='version of llvm to build: 3.1 3.2 3.3 trunk. Default: trunk', default="trunk") + llvm_group.add_option('--revision', dest='revision', + help='revision of llvm to build in format r172870', default="") + llvm_group.add_option('--debug', dest='debug', + help='debug build of LLVM?', default=False, action="store_true") + llvm_group.add_option('--folder', dest='folder', + help='folder to build LLVM in', default="") + llvm_group.add_option('--tarball', dest='tarball', + help='"llvm_tarball clang_tarball"', default="") + llvm_group.add_option('--selfbuild', dest='selfbuild', + help='make selfbuild of LLVM and clang', default=False, action="store_true") + llvm_group.add_option('--force', dest='force', + help='rebuild LLVM', default=False, action='store_true') + llvm_group.add_option('--extra', dest='extra', + help='load extra clang tools', default=False, action='store_true') + parser.add_option_group(llvm_group) + # options for activity "validation run" + run_group = OptionGroup(parser, "Options for validation run", "These options must be used with -r option.") -run_group.add_option('--compare-with', dest='branch', - help='set performance reference point. Dafault: master', default="master") -run_group.add_option('--number', dest='number_for_performance', - help='number of performance runs for each test. Default: 5', default=5) -run_group.add_option('--notify', dest='notify', - help='email to sent results to', default="") -run_group.add_option('--update-errors', dest='update', - help='rewrite fail_db.txt file according to received results (F or FP)', default="") -run_group.add_option('--only-targets', dest='only_targets', - help='set list of targets to test. Possible values - all subnames of targets.', - default="") -run_group.add_option('--time', dest='time', - help='display time of testing', default=False, action='store_true') -run_group.add_option('--only', dest='only', - help='set types of tests. Possible values:\n' + - '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + - 'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).', + run_group.add_option('--compare-with', dest='branch', + help='set performance reference point. Dafault: master', default="master") + run_group.add_option('--number', dest='number_for_performance', + help='number of performance runs for each test. Default: 5', default=5) + run_group.add_option('--notify', dest='notify', + help='email to sent results to', default="") + run_group.add_option('--update-errors', dest='update', + help='rewrite fail_db.txt file according to received results (F or FP)', default="") + run_group.add_option('--only-targets', dest='only_targets', + help='set list of targets to test. Possible values - all subnames of targets.', default="") -run_group.add_option('--perf_LLVM', dest='perf_llvm', - help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true') -parser.add_option_group(run_group) -# options for activity "setup PATHS" -setup_group = OptionGroup(parser, "Options for setup", + run_group.add_option('--time', dest='time', + help='display time of testing', default=False, action='store_true') + run_group.add_option('--only', dest='only', + help='set types of tests. Possible values:\n' + + '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + + 'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).', + default="") + run_group.add_option('--perf_LLVM', dest='perf_llvm', + help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true') + parser.add_option_group(run_group) + # options for activity "setup PATHS" + setup_group = OptionGroup(parser, "Options for setup", "These options must be use with -r or -b to setup environment variables") -setup_group.add_option('--llvm_home', dest='llvm_home',help='path to LLVM',default="") -setup_group.add_option('--ispc_home', dest='ispc_home',help='path to ISPC',default="") -setup_group.add_option('--sde_home', dest='sde_home',help='path to SDE',default="") -parser.add_option_group(setup_group) -(options, args) = parser.parse_args() -Main() + setup_group.add_option('--llvm_home', dest='llvm_home',help='path to LLVM',default="") + setup_group.add_option('--ispc_home', dest='ispc_home',help='path to ISPC',default="") + setup_group.add_option('--sde_home', dest='sde_home',help='path to SDE',default="") + parser.add_option_group(setup_group) + (options, args) = parser.parse_args() + Main() diff --git a/builtins.cpp b/builtins.cpp index af9649b7..730e315c 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -536,6 +536,12 @@ lSetInternalFunctions(llvm::Module *module) { "__set_system_isa", "__sext_uniform_bool", "__sext_varying_bool", + "__shift_double", + "__shift_float", + "__shift_i16", + "__shift_i32", + "__shift_i64", + "__shift_i8", "__shuffle2_double", "__shuffle2_float", "__shuffle2_i16", diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 2a5d1b32..92b7a18e 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -80,6 +80,13 @@ declare @__rotate_i32(, i32) nounwind readnone declare @__rotate_double(, i32) nounwind readnone declare @__rotate_i64(, i32) nounwind readnone +declare @__shift_i8(, i32) nounwind readnone +declare @__shift_i16(, i32) nounwind readnone +declare @__shift_float(, i32) nounwind readnone +declare @__shift_i32(, i32) nounwind readnone +declare @__shift_double(, i32) nounwind readnone +declare @__shift_i64(, i32) nounwind readnone + declare @__shuffle_i8(, ) nounwind readnone declare @__shuffle2_i8(, , ) nounwind readnone diff --git a/builtins/util.m4 b/builtins/util.m4 index 11501780..e1c9bf97 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -797,6 +797,24 @@ not_const: ret %result } +define @__shift_$1(, i32) nounwind readnone alwaysinline { + %ptr = alloca , i32 3 + %ptr0 = getelementptr * %ptr, i32 0 + store zeroinitializer, * %ptr0 + %ptr1 = getelementptr * %ptr, i32 1 + store %0, * %ptr1 + %ptr2 = getelementptr * %ptr, i32 2 + store zeroinitializer, * %ptr2 + + %offset = add i32 %1, WIDTH + %ptr_as_elt_array = bitcast * %ptr to [eval(3*WIDTH) x $1] * + %load_ptr = getelementptr [eval(3*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset + %load_ptr_vec = bitcast $1 * %load_ptr to * + %result = load * %load_ptr_vec, align $2 + ret %result +} + + define @__shuffle_$1(, ) nounwind readnone alwaysinline { forloop(i, 0, eval(WIDTH-1), ` %index_`'i = extractelement %1, i32 i') diff --git a/check_isa.cpp b/check_isa.cpp new file mode 100644 index 00000000..a4d10606 --- /dev/null +++ b/check_isa.cpp @@ -0,0 +1,129 @@ +/* + Copyright (c) 2013, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/////////////////////////////////////////////////////////////////////////////// +// // +// This file is a standalone program, which detects the best supported ISA. // +// // +/////////////////////////////////////////////////////////////////////////////// + + + +#include + +#if defined(_WIN32) || defined(_WIN64) +#define ISPC_IS_WINDOWS +#include +#endif + +#if !defined (__arm__) +#if !defined(ISPC_IS_WINDOWS) +static void __cpuid(int info[4], int infoType) { + __asm__ __volatile__ ("cpuid" + : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3]) + : "0" (infoType)); +} + +/* Save %ebx in case it's the PIC register */ +static void __cpuidex(int info[4], int level, int count) { + __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t" + "cpuid\n\t" + "xchg{l}\t{%%}ebx, %1\n\t" + : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3]) + : "0" (level), "2" (count)); +} +#endif // !ISPC_IS_WINDOWS + +static bool __os_has_avx_support() { +#if defined(ISPC_IS_WINDOWS) + // Check if the OS will save the YMM registers + unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + return (xcrFeatureMask & 6) == 6; +#else // !defined(ISPC_IS_WINDOWS) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + int rEAX, rEDX; + __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); + return (rEAX & 6) == 6; +#endif // !defined(ISPC_IS_WINDOWS) +} +#endif // !__arm__ + + +static const char * +lGetSystemISA() { +#ifdef __arm__ + return "ARM NEON"; +#else + int info[4]; + __cpuid(info, 1); + + if ((info[2] & (1 << 28)) != 0 && + __os_has_avx_support()) { // AVX + // AVX1 for sure.... + // Ivy Bridge? + if ((info[2] & (1 << 29)) != 0 && // F16C + (info[2] & (1 << 30)) != 0) { // RDRAND + // So far, so good. AVX2? + // Call cpuid with eax=7, ecx=0 + int info2[4]; + __cpuidex(info2, 7, 0); + if ((info2[1] & (1 << 5)) != 0) { + return "AVX2 (codename Haswell)"; + } + else { + return "AVX1.1 (codename Ivy Bridge)"; + } + } + // Regular AVX + return "AVX (codename Sandy Bridge)"; + } + else if ((info[2] & (1 << 19)) != 0) { + return "SSE4"; + } + else if ((info[3] & (1 << 26)) != 0) { + return "SSE2"; + } + else { + return "Error"; + } +#endif +} + +int main () { + const char* isa = lGetSystemISA(); + printf("ISA: %s\n", isa); + + return 0; +} diff --git a/docs/ispc.rst b/docs/ispc.rst index eac9b24e..93b6ac9b 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -2344,8 +2344,11 @@ based on C++'s ``new`` and ``delete`` operators: In the above code, each program instance allocates its own ``count`` sized array of ``uniform int`` values, uses that memory, and then deallocates that memory. Uses of ``new`` and ``delete`` in ``ispc`` programs are -serviced by corresponding calls the system C library's ``malloc()`` and -``free()`` functions. +implemented as calls to C library's aligned memory allocation routines, +which are platform dependent (``posix_memalign()`` and ``free()`` on Linux +and Mac and ``_aligned_malloc()`` and ``_aligned_free()`` on Windows). So it's +advised to pair ISPC's ``new`` and ``delete`` with each other, but not with +C/C++ memory management functions. Note that the rules for ``uniform`` and ``varying`` for ``new`` are analogous to the corresponding rules for pointers (as described in @@ -3719,6 +3722,22 @@ the size of the gang (it is masked to ensure valid offsets). double rotate(double value, uniform int offset) +The ``shift()`` function allows each program instance to find the value of +the given value that their neighbor ``offset`` steps away has. This is similar +to ``rotate()`` with the exception that values are not circularly shifted. +Instead, zeroes are shifted in where appropriate. + + +:: + + int8 shift(int8 value, uniform int offset) + int16 shift(int16 value, uniform int offset) + int32 shift(int32 value, uniform int offset) + int64 shift(int64 value, uniform int offset) + float shift(float value, uniform int offset) + double shift(double value, uniform int offset) + + Finally, the ``shuffle()`` functions allow two variants of fully general shuffling of values among the program instances. For the first version, each program instance's value of permutation gives the program instance @@ -3751,7 +3770,7 @@ the last element of ``value1``, etc.) double shuffle(double value0, double value1, int permutation) Finally, there are primitive operations that extract and set values in the -SIMD lanes. You can implement all of the broadcast, rotate, and shuffle +SIMD lanes. You can implement all of the broadcast, rotate, shift, and shuffle operations described above in this section from these routines, though in general, not as efficiently. These routines are useful for implementing other reductions and cross-lane communication that isn't included in the diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index 48e26e40..a5b354ce 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -26,15 +26,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h @@ -44,6 +44,7 @@ {F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB} Win32Proj aobench + ispc diff --git a/examples/aobench_instrumented/aobench_instrumented.vcxproj b/examples/aobench_instrumented/aobench_instrumented.vcxproj index d54332b6..5247762c 100644 --- a/examples/aobench_instrumented/aobench_instrumented.vcxproj +++ b/examples/aobench_instrumented/aobench_instrumented.vcxproj @@ -26,15 +26,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2 $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2 $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h @@ -44,6 +44,7 @@ {B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958} Win32Proj aobench_instrumented + ispc @@ -171,4 +172,4 @@ - \ No newline at end of file + diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index 9a2a64bf..94e38540 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -22,6 +22,7 @@ {87f53c53-957e-4e91-878a-bc27828fb9eb} Win32Proj mandelbrot + ispc @@ -158,15 +159,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/examples.sln b/examples/examples.sln index e9992f76..2285f6a6 100755 --- a/examples/examples.sln +++ b/examples/examples.sln @@ -25,6 +25,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "deferred_shading", "deferre EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "perfbench", "perfbench\perfbench.vcxproj", "{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sort", "sort\sort.vcxproj", "{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -129,6 +131,14 @@ Global {D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|Win32.Build.0 = Release|Win32 {D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|x64.ActiveCfg = Release|x64 {D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|x64.Build.0 = Release|x64 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Debug|Win32.ActiveCfg = Debug|Win32 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Debug|Win32.Build.0 = Debug|Win32 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Debug|x64.ActiveCfg = Debug|x64 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Debug|x64.Build.0 = Debug|x64 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Release|Win32.ActiveCfg = Release|Win32 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Release|Win32.Build.0 = Release|Win32 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Release|x64.ActiveCfg = Release|x64 + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index d81101f7..fa794276 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -311,6 +311,17 @@ static FORCEINLINE VTYPE __rotate_##NAME(VTYPE v, int index) { \ return ret; \ } \ +#define SHIFT(VTYPE, NAME, STYPE) \ +static FORCEINLINE VTYPE __shift_##NAME(VTYPE v, int index) { \ + VTYPE ret; \ + for (int i = 0; i < 16; ++i) { \ + int modIndex = i+index; \ + STYPE val = ((modIndex >= 0) && (modIndex < 16)) ? v.v[modIndex] : 0; \ + ret.v[i] = val; \ + } \ + return ret; \ +} \ + #define SHUFFLES(VTYPE, NAME, STYPE) \ static FORCEINLINE VTYPE __shuffle_##NAME(VTYPE v, __vec16_i32 index) { \ VTYPE ret; \ @@ -492,6 +503,7 @@ SETZERO(__vec16_i8, i8) UNDEF(__vec16_i8, i8) BROADCAST(__vec16_i8, i8, int8_t) ROTATE(__vec16_i8, i8, int8_t) +SHIFT(__vec16_i8, i8, int8_t) SHUFFLES(__vec16_i8, i8, int8_t) LOAD_STORE(__vec16_i8, int8_t) @@ -537,6 +549,7 @@ SETZERO(__vec16_i16, i16) UNDEF(__vec16_i16, i16) BROADCAST(__vec16_i16, i16, int16_t) ROTATE(__vec16_i16, i16, int16_t) +SHIFT(__vec16_i16, i16, int16_t) SHUFFLES(__vec16_i16, i16, int16_t) LOAD_STORE(__vec16_i16, int16_t) @@ -582,6 +595,7 @@ SETZERO(__vec16_i32, i32) UNDEF(__vec16_i32, i32) BROADCAST(__vec16_i32, i32, int32_t) ROTATE(__vec16_i32, i32, int32_t) +SHIFT(__vec16_i32, i32, int32_t) SHUFFLES(__vec16_i32, i32, int32_t) LOAD_STORE(__vec16_i32, int32_t) @@ -627,6 +641,7 @@ SETZERO(__vec16_i64, i64) UNDEF(__vec16_i64, i64) BROADCAST(__vec16_i64, i64, int64_t) ROTATE(__vec16_i64, i64, int64_t) +SHIFT(__vec16_i64, i64, int64_t) SHUFFLES(__vec16_i64, i64, int64_t) LOAD_STORE(__vec16_i64, int64_t) @@ -672,6 +687,7 @@ SETZERO(__vec16_f, float) UNDEF(__vec16_f, float) BROADCAST(__vec16_f, float, float) ROTATE(__vec16_f, float, float) +SHIFT(__vec16_f, float, float) SHUFFLES(__vec16_f, float, float) LOAD_STORE(__vec16_f, float) @@ -832,6 +848,7 @@ SETZERO(__vec16_d, double) UNDEF(__vec16_d, double) BROADCAST(__vec16_d, double, double) ROTATE(__vec16_d, double, double) +SHIFT(__vec16_d, double, double) SHUFFLES(__vec16_d, double, double) LOAD_STORE(__vec16_d, double) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index ff00d920..919716be 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -108,22 +108,21 @@ struct __vec4_i64 { }; struct __vec4_i32 { - __vec4_i32() { } + FORCEINLINE __vec4_i32() { } FORCEINLINE __vec4_i32(__m128i vv) : v(vv) { } - FORCEINLINE __vec4_i32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + FORCEINLINE __vec4_i32(int32_t a, int32_t b, int32_t c, int32_t d) { v = _mm_set_epi32(d, c, b, a); } - FORCEINLINE __vec4_i32(uint32_t *p) { + FORCEINLINE __vec4_i32(int32_t *p) { v = _mm_loadu_si128((__m128i *)p); } - + FORCEINLINE __vec4_i32(const __vec4_i32 &other) : v(other.v) {} + FORCEINLINE __vec4_i32& operator =(const __vec4_i32 &o) { v=o.v; return *this; } FORCEINLINE operator __m128() const { return _mm_castsi128_ps(v); } - + __m128i v; }; -static inline int32_t __extract_element(__vec4_i32 v, int index); - struct __vec4_i16 { __vec4_i16() { } FORCEINLINE __vec4_i16(__m128i vv) : v(vv) { } @@ -215,6 +214,64 @@ INSERT_EXTRACT(__vec1_i64, int64_t) INSERT_EXTRACT(__vec1_f, float) INSERT_EXTRACT(__vec1_d, double) +static FORCEINLINE bool __extract_element(const __vec4_i1 &v, int index) { + return ((int32_t *)&v)[index] ? true : false; +} + +static FORCEINLINE void __insert_element(__vec4_i1 *v, int index, bool val) { + ((int32_t *)v)[index] = val ? -1 : 0; +} + +static FORCEINLINE int8_t __extract_element(const __vec4_i8 &v, int index) { + return ((int8_t *)&v)[index]; +} + +static FORCEINLINE void __insert_element(__vec4_i8 *v, int index, int8_t val) { + ((int8_t *)v)[index] = val; +} + +static FORCEINLINE int16_t __extract_element(const __vec4_i16 &v, int index) { + return ((int16_t *)&v)[index]; +} + +static FORCEINLINE void __insert_element(__vec4_i16 *v, int index, int16_t val) { + ((int16_t *)v)[index] = val; +} + +static FORCEINLINE int32_t __extract_element(const __vec4_i32 &v, int index) { + return ((int32_t *)&v)[index]; +} + +static FORCEINLINE void __insert_element(__vec4_i32 *v, int index, int32_t val) { + ((int32_t *)v)[index] = val; +} + +static FORCEINLINE int64_t __extract_element(const __vec4_i64 &v, int index) { + return ((int64_t *)&v)[index]; +} + +static FORCEINLINE void __insert_element(__vec4_i64 *v, int index, int64_t val) { + ((int64_t *)v)[index] = val; +} + +static FORCEINLINE float __extract_element(const __vec4_f &v, int index) { + return ((float *)&v)[index]; +} + +static FORCEINLINE void __insert_element(__vec4_f *v, int index, float val) { + ((float *)v)[index] = val; +} + +static FORCEINLINE double __extract_element(const __vec4_d &v, int index) { + return ((double *)&v)[index]; +} + +static FORCEINLINE void __insert_element(__vec4_d *v, int index, double val) { + ((double *)v)[index] = val; +} + + + #define CAST_BITS_SCALAR(TO, FROM) \ static FORCEINLINE TO __cast_bits(TO, FROM v) { \ union { \ @@ -313,13 +370,6 @@ static FORCEINLINE __vec4_i1 __select(__vec4_i1 mask, __vec4_i1 a, __vec4_i1 b) return _mm_blendv_ps(b.v, a.v, mask.v); } -static FORCEINLINE bool __extract_element(__vec4_i1 v, int index) { - return ((int32_t *)&v)[index] ? true : false; -} - -static FORCEINLINE void __insert_element(__vec4_i1 *v, int index, bool val) { - ((int32_t *)v)[index] = val ? -1 : 0; -} template static FORCEINLINE __vec4_i1 __load(const __vec4_i1 *v) { // FIXME: handle align of 16... @@ -564,13 +614,6 @@ static FORCEINLINE __vec4_i8 __select(__vec4_i1 mask, __vec4_i8 a, __vec4_i8 b) _mm_extract_epi8(b.v, 3)); } -static FORCEINLINE int8_t __extract_element(__vec4_i8 v, int index) { - return ((int8_t *)&v)[index]; -} - -static FORCEINLINE void __insert_element(__vec4_i8 *v, int index, int8_t val) { - ((int8_t *)v)[index] = val; -} template __vec4_i8 __smear_i8(int8_t v); template <> FORCEINLINE __vec4_i8 __smear_i8<__vec4_i8>(int8_t v) { @@ -598,6 +641,20 @@ static FORCEINLINE __vec4_i8 __rotate_i8(__vec4_i8 v, int delta) { __extract_element(v, (delta+3) & 0x3)); } +static FORCEINLINE __vec4_i8 __shift_i8(__vec4_i8 v, int delta) { + int8_t v1, v2, v3, v4; + int d1, d2, d3, d4; + d1 = delta+0; + d2 = delta+1; + d3 = delta+2; + d4 = delta+3; + v1 = ((d1 >= 0) && (d1 < 4)) ? __extract_element(v, d1) : 0; + v2 = ((d2 >= 0) && (d2 < 4)) ? __extract_element(v, d2) : 0; + v3 = ((d3 >= 0) && (d3 < 4)) ? __extract_element(v, d3) : 0; + v4 = ((d4 >= 0) && (d4 < 4)) ? __extract_element(v, d4) : 0; + return __vec4_i8(v1, v2, v3, v4); +} + static FORCEINLINE __vec4_i8 __shuffle_i8(__vec4_i8 v, __vec4_i32 index) { return __vec4_i8(__extract_element(v, __extract_element(index, 0) & 0x3), __extract_element(v, __extract_element(index, 1) & 0x3), @@ -836,13 +893,6 @@ static FORCEINLINE __vec4_i16 __select(__vec4_i1 mask, __vec4_i16 a, __vec4_i16 _mm_extract_epi16(b.v, 3)); } -static FORCEINLINE int16_t __extract_element(__vec4_i16 v, int index) { - return ((int16_t *)&v)[index]; -} - -static FORCEINLINE void __insert_element(__vec4_i16 *v, int index, int16_t val) { - ((int16_t *)v)[index] = val; -} template __vec4_i16 __smear_i16(int16_t v); template <> FORCEINLINE __vec4_i16 __smear_i16<__vec4_i16>(int16_t v) { @@ -870,6 +920,20 @@ static FORCEINLINE __vec4_i16 __rotate_i16(__vec4_i16 v, int delta) { __extract_element(v, (delta+3) & 0x3)); } +static FORCEINLINE __vec4_i16 __shift_i16(__vec4_i16 v, int delta) { + int16_t v1, v2, v3, v4; + int d1, d2, d3, d4; + d1 = delta+0; + d2 = delta+1; + d3 = delta+2; + d4 = delta+3; + v1 = ((d1 >= 0) && (d1 < 4)) ? __extract_element(v, d1) : 0; + v2 = ((d2 >= 0) && (d2 < 4)) ? __extract_element(v, d2) : 0; + v3 = ((d3 >= 0) && (d3 < 4)) ? __extract_element(v, d3) : 0; + v4 = ((d4 >= 0) && (d4 < 4)) ? __extract_element(v, d4) : 0; + return __vec4_i16(v1, v2, v3, v4); +} + static FORCEINLINE __vec4_i16 __shuffle_i16(__vec4_i16 v, __vec4_i32 index) { return __vec4_i16(__extract_element(v, __extract_element(index, 0) & 0x3), __extract_element(v, __extract_element(index, 1) & 0x3), @@ -1109,13 +1173,6 @@ template <> FORCEINLINE __vec4_i32 __undef_i32<__vec4_i32>() { return __vec4_i32(); } -static FORCEINLINE int32_t __extract_element(__vec4_i32 v, int index) { - return ((int32_t *)&v)[index]; -} - -static FORCEINLINE void __insert_element(__vec4_i32 *v, int index, int32_t val) { - ((int32_t *)v)[index] = val; -} static FORCEINLINE __vec4_i32 __broadcast_i32(__vec4_i32 v, int index) { return _mm_set1_epi32(__extract_element(v, index)); @@ -1128,6 +1185,21 @@ static FORCEINLINE __vec4_i32 __rotate_i32(__vec4_i32 v, int delta) { __extract_element(v, (delta+3) & 0x3)); } +#include +static FORCEINLINE __vec4_i32 __shift_i32(const __vec4_i32 &v, int delta) { + int32_t v1, v2, v3, v4; + int32_t d1, d2, d3, d4; + d1 = delta+0; + d2 = delta+1; + d3 = delta+2; + d4 = delta+3; + v1 = ((d1 >= 0) && (d1 < 4)) ? __extract_element(v, d1) : 0; + v2 = ((d2 >= 0) && (d2 < 4)) ? __extract_element(v, d2) : 0; + v3 = ((d3 >= 0) && (d3 < 4)) ? __extract_element(v, d3) : 0; + v4 = ((d4 >= 0) && (d4 < 4)) ? __extract_element(v, d4) : 0; + return __vec4_i32(v1, v2, v3, v4); +} + static FORCEINLINE __vec4_i32 __shuffle_i32(__vec4_i32 v, __vec4_i32 index) { return __vec4_i32(__extract_element(v, __extract_element(index, 0) & 0x3), __extract_element(v, __extract_element(index, 1) & 0x3), @@ -1383,13 +1455,6 @@ template <> FORCEINLINE __vec4_i64 __undef_i64<__vec4_i64>() { return __vec4_i64(); } -static FORCEINLINE int64_t __extract_element(__vec4_i64 v, int index) { - return ((int64_t *)&v)[index]; -} - -static FORCEINLINE void __insert_element(__vec4_i64 *v, int index, int64_t val) { - ((int64_t *)v)[index] = val; -} static FORCEINLINE __vec4_i64 __broadcast_i64(__vec4_i64 v, int index) { uint64_t val = __extract_element(v, index); @@ -1403,6 +1468,20 @@ static FORCEINLINE __vec4_i64 __rotate_i64(__vec4_i64 v, int delta) { __extract_element(v, (delta+3) & 0x3)); } +static FORCEINLINE __vec4_i64 __shift_i64(__vec4_i64 v, int delta) { + int64_t v1, v2, v3, v4; + int d1, d2, d3, d4; + d1 = delta+0; + d2 = delta+1; + d3 = delta+2; + d4 = delta+3; + v1 = ((d1 >= 0) && (d1 < 4)) ? __extract_element(v, d1) : 0; + v2 = ((d2 >= 0) && (d2 < 4)) ? __extract_element(v, d2) : 0; + v3 = ((d3 >= 0) && (d3 < 4)) ? __extract_element(v, d3) : 0; + v4 = ((d4 >= 0) && (d4 < 4)) ? __extract_element(v, d4) : 0; + return __vec4_i64(v1, v2, v3, v4); +} + static FORCEINLINE __vec4_i64 __shuffle_i64(__vec4_i64 v, __vec4_i32 index) { return __vec4_i64(__extract_element(v, __extract_element(index, 0) & 0x3), __extract_element(v, __extract_element(index, 1) & 0x3), @@ -1504,13 +1583,6 @@ template <> FORCEINLINE __vec4_f __undef_float<__vec4_f>() { return __vec4_f(); } -static FORCEINLINE float __extract_element(__vec4_f v, int index) { - return ((float *)&v)[index]; -} - -static FORCEINLINE void __insert_element(__vec4_f *v, int index, float val) { - ((float *)v)[index] = val; -} static FORCEINLINE __vec4_f __broadcast_float(__vec4_f v, int index) { return _mm_set1_ps(__extract_element(v, index)); @@ -1523,6 +1595,20 @@ static FORCEINLINE __vec4_f __rotate_float(__vec4_f v, int delta) { __extract_element(v, (delta+3) & 0x3)); } +static FORCEINLINE __vec4_f __shift_float(__vec4_f v, int delta) { + float v1, v2, v3, v4; + int d1, d2, d3, d4; + d1 = delta+0; + d2 = delta+1; + d3 = delta+2; + d4 = delta+3; + v1 = ((d1 >= 0) && (d1 < 4)) ? __extract_element(v, d1) : 0.f; + v2 = ((d2 >= 0) && (d2 < 4)) ? __extract_element(v, d2) : 0.f; + v3 = ((d3 >= 0) && (d3 < 4)) ? __extract_element(v, d3) : 0.f; + v4 = ((d4 >= 0) && (d4 < 4)) ? __extract_element(v, d4) : 0.f; + return __vec4_f(v1, v2, v3, v4); +} + static FORCEINLINE __vec4_f __shuffle_float(__vec4_f v, __vec4_i32 index) { return __vec4_f(__extract_element(v, __extract_element(index, 0) & 0x3), __extract_element(v, __extract_element(index, 1) & 0x3), @@ -1656,13 +1742,6 @@ template <> FORCEINLINE __vec4_d __undef_double<__vec4_d>() { return __vec4_d(); } -static FORCEINLINE double __extract_element(__vec4_d v, int index) { - return ((double *)&v)[index]; -} - -static FORCEINLINE void __insert_element(__vec4_d *v, int index, double val) { - ((double *)v)[index] = val; -} static FORCEINLINE __vec4_d __broadcast_double(__vec4_d v, int index) { return __vec4_d(_mm_set1_pd(__extract_element(v, index)), @@ -1676,6 +1755,20 @@ static FORCEINLINE __vec4_d __rotate_double(__vec4_d v, int delta) { __extract_element(v, (delta+3) & 0x3)); } +static FORCEINLINE __vec4_d __shift_double(__vec4_d v, int delta) { + double v1, v2, v3, v4; + int d1, d2, d3, d4; + d1 = delta+0; + d2 = delta+1; + d3 = delta+2; + d4 = delta+3; + v1 = ((d1 >= 0) && (d1 < 4)) ? __extract_element(v, d1) : 0; + v2 = ((d2 >= 0) && (d2 < 4)) ? __extract_element(v, d2) : 0; + v3 = ((d3 >= 0) && (d3 < 4)) ? __extract_element(v, d3) : 0; + v4 = ((d4 >= 0) && (d4 < 4)) ? __extract_element(v, d4) : 0; + return __vec4_d(v1, v2, v3, v4); +} + static FORCEINLINE __vec4_d __shuffle_double(__vec4_d v, __vec4_i32 index) { return __vec4_d(__extract_element(v, __extract_element(index, 0) & 0x3), __extract_element(v, __extract_element(index, 1) & 0x3), @@ -1889,7 +1982,7 @@ static FORCEINLINE __vec4_f __cast_sitofp(__vec4_f, __vec4_i16 val) { (float)((int16_t)_mm_extract_epi16(val.v, 3))); } -static FORCEINLINE __vec4_f __cast_sitofp(__vec4_f, __vec4_i32 val) { +static FORCEINLINE __vec4_f __cast_sitofp(__vec4_f, const __vec4_i32 &val) { return _mm_cvtepi32_ps(val.v); } diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index 7b78d1dd..1b6f1281 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -22,6 +22,7 @@ {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1} Win32Proj mandelbrot + ispc @@ -155,15 +156,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index 3a8fca79..fbebdc32 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -22,6 +22,7 @@ {E80DA7D4-AB22-4648-A068-327307156BE6} Win32Proj mandelbrot_tasks + ispc @@ -160,15 +161,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index 4e983759..01456625 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -22,6 +22,7 @@ {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} Win32Proj noise + ispc @@ -155,15 +156,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index b029b598..77fb9353 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -22,6 +22,7 @@ {8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE} Win32Proj options + ispc @@ -160,15 +161,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/perfbench/perfbench.vcxproj b/examples/perfbench/perfbench.vcxproj index 31974ac7..d94b753c 100644 --- a/examples/perfbench/perfbench.vcxproj +++ b/examples/perfbench/perfbench.vcxproj @@ -22,6 +22,7 @@ {d923bb7e-a7c8-4850-8fcf-0eb9ce35b4e8} Win32Proj perfbench + ispc @@ -155,15 +156,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index 4cfefb81..19d40192 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -22,6 +22,7 @@ {E787BC3F-2D2E-425E-A64D-4721E2FF3DC9} Win32Proj rt + ispc @@ -152,18 +153,18 @@ Document -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/simple/simple.vcxproj b/examples/simple/simple.vcxproj index 65af97bb..34908223 100644 --- a/examples/simple/simple.vcxproj +++ b/examples/simple/simple.vcxproj @@ -25,18 +25,18 @@ Document -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h @@ -46,6 +46,7 @@ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filena {947C5311-8B78-4D05-BEE4-BCF342D4B367} Win32Proj simple + ispc diff --git a/examples/sort/sort.cpp b/examples/sort/sort.cpp index f5e4264a..20221d90 100644 --- a/examples/sort/sort.cpp +++ b/examples/sort/sort.cpp @@ -78,7 +78,7 @@ int main (int argc, char *argv[]) for (i = 0; i < m; i ++) { - for (j = 0; j < n; j ++) code [j] = random() % l; + for (j = 0; j < n; j ++) code [j] = rand() % l; reset_and_start_timer(); @@ -96,7 +96,7 @@ int main (int argc, char *argv[]) for (i = 0; i < m; i ++) { - for (j = 0; j < n; j ++) code [j] = random() % l; + for (j = 0; j < n; j ++) code [j] = rand() % l; reset_and_start_timer(); @@ -114,7 +114,7 @@ int main (int argc, char *argv[]) for (i = 0; i < m; i ++) { - for (j = 0; j < n; j ++) code [j] = random() % l; + for (j = 0; j < n; j ++) code [j] = rand() % l; reset_and_start_timer(); diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj new file mode 100644 index 00000000..b37eab1c --- /dev/null +++ b/examples/sort/sort.vcxproj @@ -0,0 +1,177 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2} + Win32Proj + sort + ispc + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + + + + + + Document + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index ce5d7979..a96a187d 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -22,6 +22,7 @@ {2ef070a1-f62f-4e6a-944b-88d140945c3c} Win32Proj rt + ispc @@ -152,18 +153,18 @@ Document -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx +$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index 908cf734..d3594b98 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -22,6 +22,7 @@ {dee5733a-e93e-449d-9114-9bffcaeb4df9} Win32Proj volume + ispc @@ -156,15 +157,15 @@ Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/expr.cpp b/expr.cpp index c92503e0..1cbebad5 100644 --- a/expr.cpp +++ b/expr.cpp @@ -2798,6 +2798,17 @@ BinaryExpr::TypeCheck() { } } +const Type * +BinaryExpr::GetLValueType() const { + const Type *t = GetType(); + if (CastType(t) != NULL) { + // Are we doing something like (basePtr + offset)[...] = ... + return t; + } + else { + return NULL; + } +} int BinaryExpr::EstimateCost() const { @@ -4266,8 +4277,9 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { } else { Symbol *baseSym = GetBaseSymbol(); - if (dynamic_cast(baseExpr) == NULL) { - // Only check for non-function calls + if (dynamic_cast(baseExpr) == NULL && + dynamic_cast(baseExpr) == NULL) { + // Don't check if we're doing a function call or pointer arith AssertPos(pos, baseSym != NULL); } mask = lMaskForSymbol(baseSym, ctx); @@ -7065,9 +7077,22 @@ TypeCastExpr::GetLValue(FunctionEmitContext *ctx) const { const Type * TypeCastExpr::GetType() const { - // We have to switch off this assert after supporting of operators. - //AssertPos(pos, type->HasUnboundVariability() == false); - return type; + // Here we try to resolve situation where (base_type) can be treated as + // (uniform base_type) of (varying base_type). This is a part of function + // TypeCastExpr::TypeCheck. After implementation of operators we + // have to have this functionality here. + const Type *toType = type, *fromType = expr->GetType(); + if (toType == NULL || fromType == NULL) + return NULL; + if (toType->HasUnboundVariability()) { + if (fromType->IsUniformType()) { + toType = type->ResolveUnboundVariability(Variability::Uniform); + } else { + toType = type->ResolveUnboundVariability(Variability::Varying); + } + } + AssertPos(pos, toType->HasUnboundVariability() == false); + return toType; } diff --git a/expr.h b/expr.h index f8b96abd..45780414 100644 --- a/expr.h +++ b/expr.h @@ -155,6 +155,7 @@ public: llvm::Value *GetValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; void Print() const; Expr *Optimize(); diff --git a/fail_db.txt b/fail_db.txt index 9c43c7f0..da77cac3 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -1,600 +1,12 @@ % List of known fails. % The list is unordered and contains information about commonly used platforms / configurations. % Our goas is to maintain this list for Linux, MacOS and Windows with reasonably new compilers. -% Note, that it's important which C++ compiler was used. For example, gcc 4.4 is know to produce -% considerably more fails with generic targets, than gcc 4.7 or later. -% Using old compilers (gcc 4.4 is considered to be relatively old) may cause LLVM bugs. -% To avoid them you can use LLVM selfbuild. +% Note, that it's important which C++ compiler was used. The currently supported C++ compilers are +% clang 3.3 on Linux and MacOS and cl (VS2010) on Windows. +% Please also note that it's very important to have correctly built LLVM. There are a number of +% LLVM bugs in released versions, that we have to workaround by applying patches (see llvm_patches +% folder). The recommended way to build LLVM on Unix is to use "alloy.py". % -./tests/masked-scatter-vector.ispc runfail x86-64 sse2-i32x4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-13.ispc compfail x86 sse4-i16x8 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-13.ispc compfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-13.ispc compfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-6.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-down-uint16.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-down-uint8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-up-uint16.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-up-uint8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/broadcast-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/broadcast-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/count-leading-trailing-zeros-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/count-leading-trailing-zeros-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/exclusive-scan-add-10.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/exclusive-scan-add-8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/exclusive-scan-add-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/exclusive-scan-and-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/exclusive-scan-or-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-uniform-7.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-uniform-8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-uniform-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-varying-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-varying-7.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-varying-8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-varying-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/half-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/idiv.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/int64-max-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/int64-max.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/int64-min-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/int64-min.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-11.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-12.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-13.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-swap.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-varyingptr-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/local-atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/new-delete-6.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/phi-opts-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/phi-opts-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/popcnt-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/popcnt-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/popcnt-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-add-int16-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-add-int16.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-10.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-12.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-13.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-7.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/reduce-equal-8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/rotate-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/rotate-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/rotate-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/rotate-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/rotate-6.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/rotate.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/short-vec-14.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle-flatten.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-11.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-6.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-7.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/shuffle2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/soa-27.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/soa-28.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-128.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-129.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-130.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-57.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/uint64-max-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/uint64-max.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/uint64-min-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/uint64-min.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-141.ispc runfail x86-64 generic-16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-141.ispc runfail x86 avx2-i32x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/test-141.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.3 g++4.4 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-gather-ifs.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-gather-multi-unif.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-gather-unif.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-mixed-unif-vary-indexing-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-mixed-unif-vary-indexing-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-mixed-unif-vary-indexing.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-multidim-gather.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-scatter-unif-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-scatter-vary.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/array-struct-gather.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-6.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-down-uint16.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-down-uint8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-up-uint16.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-up-uint8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/broadcast-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/broadcast-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/cfor-array-gather-ifs.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/cfor-array-gather-unif.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/cfor-array-multidim-gather.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/cfor-array-struct-gather.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/cfor-struct-test-114.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/cfor-unif-struct-test-114.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/count-leading-trailing-zeros-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/count-leading-trailing-zeros-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/exclusive-scan-add-10.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/exclusive-scan-add-8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/exclusive-scan-add-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/exclusive-scan-and-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/exclusive-scan-or-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-uniform-7.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-uniform-8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-uniform-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-varying-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-varying-7.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-varying-8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/funcptr-varying-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/gather-int16.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/gather-to-vload-neg-offset.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/global-array-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/half-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/idiv.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/int64-max-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/int64-max.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/int64-min-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/int64-min.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-11.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-12.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-13.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-swap.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-varyingptr-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/masked-scatter-struct.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/masked-scatter-vector.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/nested-structs-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/new-delete-6.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/pass-varying-lvalue-to-ref.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/phi-opts-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/phi-opts-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/popcnt-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/popcnt-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/popcnt-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-add-int16-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-add-int16.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-10.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-12.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-13.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-7.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/rotate-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/rotate-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/rotate-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/rotate-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/rotate-6.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/rotate.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/scatter-int16-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/scatter-int16.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/scatter-mask-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/scatter-mask-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/short-vec-12.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/short-vec-14.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle-flatten.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-11.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-6.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-7.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/shuffle2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/soa-28.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/struct-test-114.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-128.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-129.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-130.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-57.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/uint64-max-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/uint64-max.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/uint64-min-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/uint64-min.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/unif-struct-test-114.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/varying-struct-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/varying-struct-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/varying-struct-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/write-same-loc.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.4 g++4.4 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-141.ispc runfail x86-64 generic-16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-11.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-12.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-13.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-5.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-6.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-7.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-8.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-swap.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/local-atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/memset-varying.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-1.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-12.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal-7.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-141.ispc runfail x86 avx2-i32x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/test-141.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.4 g++4.4 -O2 * -./tests/masked-scatter-struct.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.4 -O2 * -./tests/atomics-13.ispc compfail x86 sse4-i16x8 Linux LLVM 3.3 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86-64 sse4-i8x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/broadcast-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/half-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-11.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-12.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-13.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-swap.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-varyingptr-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/memset-varying.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/rotate-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/rotate-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle-flatten.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-1.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-10.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-11.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-3.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-4.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-5.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-6.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-7.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-8.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-9.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/test-129.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/test-130.ispc runfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.3 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 generic-16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/rotate.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shift1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/test-141.ispc runfail x86 avx2-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/shift1.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/broadcast-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/half-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-11.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-12.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-13.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-swap.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/memset-varying.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/rotate-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/rotate-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle-flatten.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-1.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-10.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-11.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-3.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-4.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-5.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-6.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-7.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-8.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-9.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/test-129.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/test-130.ispc runfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.4 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 generic-16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-11.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-12.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-13.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-5.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-6.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-7.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-8.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-swap.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/memset-varying.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-1.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-12.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-7.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shift1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/test-141.ispc runfail x86 avx2-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/shift1.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.4 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-10.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-11.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-13.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-5.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-6.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-10.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-11.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-10.ispc compfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-11.ispc compfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-13.ispc compfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-5.ispc compfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-6.ispc compfail x86 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/atomics-13.ispc compfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-10.ispc compfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-11.ispc compfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 sse4-i8x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/broadcast-1.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/half-3.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-1.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-13.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-5.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-9.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-swap.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/local-atomics-varyingptr-3.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/memset-varying.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/rotate-2.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle-4.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-1.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-10.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-11.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-2.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-3.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-4.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-5.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-6.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-7.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-8.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2-9.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/shuffle2.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/test-129.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/test-130.ispc runfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/short-vec-8.ispc compfail x86-64 generic-4 Mac LLVM 3.3 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 generic-16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/test-143.ispc runfail x86-64 generic-16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Mac LLVM 3.3 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-11.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-12.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-13.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-5.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-6.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-7.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-8.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-swap.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/memset-varying.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-1.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-12.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-2.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-3.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal-7.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 g++4.7 -O2 * -./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/broadcast-1.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/half-3.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-1.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-13.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-5.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-9.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-swap.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/local-atomics-varyingptr-3.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/memset-varying.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/rotate-2.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle-4.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-1.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-10.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-11.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-2.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-3.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-4.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-5.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-6.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-7.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-8.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2-9.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/shuffle2.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/test-129.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/test-130.ispc runfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/short-vec-8.ispc compfail x86-64 generic-4 Mac LLVM 3.4 g++4.7 -O2 * -./tests/ptr-15.ispc runfail x86-64 generic-16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/test-141.ispc runfail x86-64 generic-16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/test-143.ispc runfail x86-64 generic-16 Mac LLVM 3.4 g++4.7 -O2 * -./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Mac LLVM 3.4 g++4.7 -O2 * .\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * .\tests\reduce-equal-10.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * .\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * @@ -644,11 +56,6 @@ .\tests\reduce-add-uint64.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * .\tests\reduce-max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * .\tests\atomics-13.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-11.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-13.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-5.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-6.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * .\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\funcptr-null-4.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * @@ -667,11 +74,6 @@ .\tests\reduce-max-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\reduce-min-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\atomics-13.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-11.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-13.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-5.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-6.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\max-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * @@ -715,8 +117,6 @@ .\tests\uint64-max.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\uint64-min-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\uint64-min.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\avg-down-int8.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\avg-up-int8.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-10.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-11.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-12.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * @@ -765,35 +165,21 @@ .\tests\uint64-max.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\uint64-min.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\avg-down-int8.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\avg-up-int8.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-10.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-11.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-12.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-8.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-9.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\atomics-13.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-11.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-13.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-5.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-6.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * .\tests\funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\atomics-13.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-11.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-13.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-5.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-6.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-10.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\switch-11.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\switch-12.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\switch-8.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\switch-9.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\avg-down-int8.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\avg-up-int8.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-10.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-11.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-12.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * @@ -804,8 +190,6 @@ .\tests\switch-12.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\switch-8.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * .\tests\switch-9.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-10.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-11.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * .\tests\switch-12.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * @@ -906,58 +290,19 @@ ./tests/test-141.ispc runfail x86-64 generic-16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86 avx2-i32x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/reduce-equal-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-11.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-12.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-13.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-5.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-6.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-7.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-8.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-swap.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/memset-varying.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-1.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-12.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-2.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-3.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-4.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-7.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86 avx2-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 avx2-i32x16 Linux LLVM 3.4 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 clang++3.3 -O2 * @@ -983,46 +328,11 @@ ./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86 avx1.1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86 avx1.1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-down-int8.ispc compfail x86-64 avx1.1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/avg-up-int8.ispc compfail x86-64 avx1.1-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/short-vec-8.ispc compfail x86-64 generic-4 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 generic-16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-143.ispc runfail x86-64 generic-16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/double-3.ispc runfail x86 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-11.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-12.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-13.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-5.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-6.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-7.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-8.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-swap.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-varyingptr-2.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-varyingptr-3.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/local-atomics-varyingptr-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/memset-varying.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-1.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-12.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-13.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-2.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-3.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-4.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-5.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-6.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal-7.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * -./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * .\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * @@ -1060,3 +370,95 @@ .\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 * .\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * .\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\funcptr-null-4.ispc runfail x86 sse4-i8x16 Windows LLVM 3.4 cl -O2 * +.\tests\funcptr-null-5.ispc runfail x86 sse4-i8x16 Windows LLVM 3.4 cl -O2 * +.\tests\funcptr-null-6.ispc runfail x86 sse4-i8x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\min-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\test-141.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.4 cl -O2 * +.\tests\funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.4 cl -O2 * +.\tests\funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\test-141.ispc runfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-10.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-11.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-12.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-8.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\switch-9.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * diff --git a/ispc.cpp b/ispc.cpp index db4c161a..859865a5 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -102,6 +102,22 @@ static void __cpuidex(int info[4], int level, int count) { } #endif // !ISPC_IS_WINDOWS && !__ARM__ +#if !defined(__arm__) +static bool __os_has_avx_support() { +#if defined(ISPC_IS_WINDOWS) + // Check if the OS will save the YMM registers + unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + return (xcrFeatureMask & 6) == 6; +#else // !defined(ISPC_IS_WINDOWS) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + int rEAX, rEDX; + __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); + return (rEAX & 6) == 6; +#endif // !defined(ISPC_IS_WINDOWS) +} +#endif // !__arm__ static const char * lGetSystemISA() { @@ -111,7 +127,8 @@ lGetSystemISA() { int info[4]; __cpuid(info, 1); - if ((info[2] & (1 << 28)) != 0) { // AVX + if ((info[2] & (1 << 28)) != 0 && + __os_has_avx_support()) { // AVX // AVX1 for sure.... // Ivy Bridge? if ((info[2] & (1 << 29)) != 0 && // F16C diff --git a/llvm_patches/3_3_0001-Fix-PR16807.patch b/llvm_patches/3_3_0001-Fix-PR16807.patch old mode 100755 new mode 100644 diff --git a/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch b/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch new file mode 100644 index 00000000..b6abb1d3 --- /dev/null +++ b/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch @@ -0,0 +1,69 @@ +From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001 +From: Michael Liao +Date: Mon, 21 Oct 2013 17:47:58 -0700 +Subject: [PATCH] Fix PR17631 + +- Skip instructions added in prolog. For specific targets, prolog may + insert helper function calls (e.g. _chkstk will be called when + there're more than 4K bytes allocated on stack). However, these + helpers don't use/def YMM/XMM registers. +--- + lib/Target/X86/X86VZeroUpper.cpp | 11 ++++++++++- + test/CodeGen/X86/pr17631.ll | 22 ++++++++++++++++++++++ + 2 files changed, 32 insertions(+), 1 deletion(-) + create mode 100644 test/CodeGen/X86/pr17631.ll + +diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp +index 477f75a..0d37a7d 100644 +--- lib/Target/X86/X86VZeroUpper.cpp ++++ lib/Target/X86/X86VZeroUpper.cpp +@@ -231,8 +231,17 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, + bool BBHasCall = false; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { +- MachineInstr *MI = I; + DebugLoc dl = I->getDebugLoc(); ++ MachineInstr *MI = I; ++ ++ // Don't need to check instructions added in prolog. ++ // In prolog, special function calls may be added for specific targets ++ // (e.g. on Windows, a prolog helper '_chkstk' is called when the local ++ // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM ++ // registers. ++ if (MI->getFlag(MachineInstr::FrameSetup)) ++ continue; ++ + bool isControlFlow = MI->isCall() || MI->isReturn(); + + // Shortcut: don't need to check regular instructions in dirty state. +diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll +new file mode 100644 +index 0000000..a572ff2 +--- /dev/null ++++ test/CodeGen/X86/pr17631.ll +@@ -0,0 +1,22 @@ ++; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s ++ ++%struct_type = type { [64 x <8 x float>], <8 x float> } ++ ++; Function Attrs: nounwind readnone ++declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) ++ ++; Function Attrs: nounwind ++define i32 @equal(<8 x i32> %A) { ++allocas: ++ %first_alloc = alloca [64 x <8 x i32>] ++ %second_alloc = alloca %struct_type ++ ++ %A1 = bitcast <8 x i32> %A to <8 x float> ++ %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) ++ ret i32 %A2 ++} ++ ++; CHECK: equal ++; CHECK-NOT: vzeroupper ++; CHECK: _chkstk ++; CHECK: ret +-- +1.8.1.2 + diff --git a/opt.cpp b/opt.cpp index 75eae20c..ce84744a 100644 --- a/opt.cpp +++ b/opt.cpp @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -124,6 +125,8 @@ static llvm::Pass *CreateMakeInternalFuncsStaticPass(); static llvm::Pass *CreateDebugPass(char * output); +static llvm::Pass *CreateReplaceStdlibShiftPass(); + #define DEBUG_START_PASS(NAME) \ if (g->debugPrint && \ (getenv("FUNC") == NULL || \ @@ -521,6 +524,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createPromoteMemoryToRegisterPass()); optPM.add(llvm::createAggressiveDCEPass()); + if (g->opt.disableGatherScatterOptimizations == false && g->target->getVectorWidth() > 1) { optPM.add(llvm::createInstructionCombiningPass(), 210); @@ -546,7 +550,8 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createGlobalOptimizerPass()); optPM.add(llvm::createReassociatePass()); optPM.add(llvm::createIPConstantPropagationPass()); - optPM.add(llvm::createDeadArgEliminationPass()); + optPM.add(CreateReplaceStdlibShiftPass(),229); + optPM.add(llvm::createDeadArgEliminationPass(),230); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createPruneEHPass()); @@ -4879,6 +4884,7 @@ lMatchAvgDownInt16(llvm::Value *inst) { } #endif // !LLVM_3_1 && !LLVM_3_2 + bool PeepholePass::runOnBasicBlock(llvm::BasicBlock &bb) { DEBUG_START_PASS("PeepholePass"); @@ -4923,3 +4929,89 @@ static llvm::Pass * CreatePeepholePass() { return new PeepholePass; } + +/** Given an llvm::Value known to be an integer, return its value as + an int64_t. +*/ +static int64_t +lGetIntValue(llvm::Value *offset) { + llvm::ConstantInt *intOffset = llvm::dyn_cast(offset); + Assert(intOffset && (intOffset->getBitWidth() == 32 || + intOffset->getBitWidth() == 64)); + return intOffset->getSExtValue(); +} + +/////////////////////////////////////////////////////////////////////////// +// ReplaceStdlibShiftPass + +class ReplaceStdlibShiftPass : public llvm::BasicBlockPass { +public: + static char ID; + ReplaceStdlibShiftPass() : BasicBlockPass(ID) { + } + + const char *getPassName() const { return "Resolve \"replace extract insert chains\""; } + bool runOnBasicBlock(llvm::BasicBlock &BB); + +}; + +char ReplaceStdlibShiftPass::ID = 0; + +bool +ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { + DEBUG_START_PASS("ReplaceStdlibShiftPass"); + bool modifiedAny = false; + + llvm::Function *shifts[6]; + shifts[0] = m->module->getFunction("__shift_i8"); + shifts[1] = m->module->getFunction("__shift_i16"); + shifts[2] = m->module->getFunction("__shift_i32"); + shifts[3] = m->module->getFunction("__shift_i64"); + shifts[4] = m->module->getFunction("__shift_float"); + shifts[5] = m->module->getFunction("__shift_double"); + + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { + llvm::Instruction *inst = &*iter; + + if (llvm::CallInst *ci = llvm::dyn_cast(inst)) { + llvm::Function *func = ci->getCalledFunction(); + for (int i = 0; i < 6; i++) { + if (shifts[i] && (shifts[i] == func)) { + // we matched a call + llvm::Value *shiftedVec = ci->getArgOperand(0); + llvm::Value *shiftAmt = ci->getArgOperand(1); + if (llvm::isa(shiftAmt)) { + int vectorWidth = g->target->getVectorWidth(); + int * shuffleVals = new int[vectorWidth]; + int shiftInt = lGetIntValue(shiftAmt); + for (int i = 0; i < vectorWidth; i++) { + int s = i + shiftInt; + s = (s < 0) ? vectorWidth : s; + s = (s >= vectorWidth) ? vectorWidth : s; + shuffleVals[i] = s; + } + llvm::Value *shuffleIdxs = LLVMInt32Vector(shuffleVals); + llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shiftedVec->getType()); + llvm::Value *shuffle = new llvm::ShuffleVectorInst(shiftedVec, zeroVec, + shuffleIdxs, "vecShift", ci); + ci->replaceAllUsesWith(shuffle); + modifiedAny = true; + delete [] shuffleVals; + } else { + PerformanceWarning(SourcePos(), "Stdlib shift() called without constant shift amount."); + } + } + } + } + } + + DEBUG_END_PASS("ReplaceStdlibShiftPass"); + + return modifiedAny; +} + + +static llvm::Pass * +CreateReplaceStdlibShiftPass() { + return new ReplaceStdlibShiftPass(); +} diff --git a/perf.py b/perf.py index 2b5c6edd..7e8b3cff 100755 --- a/perf.py +++ b/perf.py @@ -42,6 +42,9 @@ def print_file(line): def build_test(commands): os.system(commands[4]) test = os.system(commands[1]) + if is_windows: + common.remove_if_exists(".\\X64\\Release1") + os.rename(".\\X64\\Release", ".\\X64\\Release1") if options.ref: ref = os.system(commands[3]) return (options.ref and ref) or test @@ -156,16 +159,16 @@ def cpu_check(): R = c_line.split(' ') cpu_percent = float(R[1]) * 3 else: - os.system("wmic cpu get loadpercentage /value > cpu_temp") - c = open("cpu_temp", 'r') + os.system("wmic cpu get loadpercentage /value > cpu_temp") + c = open("cpu_temp", 'r') c_lines = c.readlines() - c.close() - os.remove("cpu_temp") - t = "0" - for i in c_lines[2]: + c.close() + os.remove("cpu_temp") + t = "0" + for i in c_lines[2]: if i.isdigit(): t = t + i - cpu_percent = int(t) + cpu_percent = int(t) return cpu_percent #returns geomean of list @@ -345,9 +348,14 @@ def perf(options1, args): if options.ref != "": options.ref = True if os.environ.get("ISPC_HOME") != None: - if os.path.exists(os.environ["ISPC_HOME"] + os.sep + ispc_test): - ispc_test_exists = True - ispc_test = os.environ["ISPC_HOME"] + os.sep + ispc_test + if is_windows == False: + if os.path.exists(os.environ["ISPC_HOME"] + os.sep + ispc_test): + ispc_test_exists = True + ispc_test = os.environ["ISPC_HOME"] + os.sep + ispc_test + else: + if os.path.exists(os.environ["ISPC_HOME"] + "\\Release\\" + ispc_test): + ispc_test_exists = True + ispc_test = os.environ["ISPC_HOME"] + "\\Release\\" + ispc_test for counter in PATH_dir: if ispc_test_exists == False: if os.path.exists(counter + os.sep + ispc_test): @@ -419,9 +427,9 @@ def perf(options1, args): re_command = "make clean >> "+build_log else: ex_command_ref = "x64\\Release\\ref.exe " + command + " >> " + perf_temp + "_ref" - ex_command = "x64\\Release\\test.exe " + command + " >> " + perf_temp + "_test" - bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /t:rebuild >> " + build_log - bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /t:rebuild >> " + build_log + ex_command = "x64\\Release1\\test.exe " + command + " >> " + perf_temp + "_test" + bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /p:ISPC_compiler=ispc_ref /t:rebuild >> " + build_log + bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /p:ISPC_compiler=ispc /t:rebuild >> " + build_log re_command = "msbuild /t:clean >> " + build_log commands = [ex_command, bu_command, ex_command_ref, bu_command_ref, re_command] # parsing config parameters diff --git a/stdlib.ispc b/stdlib.ispc index 9b02d0ba..6768594b 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -170,6 +170,60 @@ static inline int64 rotate(int64 v, uniform int i) { return __rotate_i64(v, i); } +__declspec(safe) +static inline float shift(float v, uniform int i) { + varying float result; + unmasked { + result = __shift_float(v, i); + } + return result; +} + +__declspec(safe) +static inline int8 shift(int8 v, uniform int i) { + varying int8 result; + unmasked { + result = __shift_i8(v, i); + } + return result; +} + +__declspec(safe) +static inline int16 shift(int16 v, uniform int i) { + varying int16 result; + unmasked { + result = __shift_i16(v, i); + } + return result; +} + +__declspec(safe) +static inline int32 shift(int32 v, uniform int i) { + varying int32 result; + unmasked { + result = __shift_i32(v, i); + } + return result; +} + +__declspec(safe) +static inline double shift(double v, uniform int i) { + varying double result; + unmasked { + result = __shift_double(v, i); + } + return result; +} + +__declspec(safe) +static inline int64 shift(int64 v, uniform int i) { + varying int64 result; + unmasked { + result = __shift_i64(v, i); + } + return result; +} + __declspec(safe) static inline float shuffle(float v, int i) { return __shuffle_float(v, i); diff --git a/tests/chkstk.ispc b/tests/chkstk.ispc new file mode 100644 index 00000000..bd0a8299 --- /dev/null +++ b/tests/chkstk.ispc @@ -0,0 +1,49 @@ +//test for 17631 bug in LLVM. + +export uniform int width() { return programCount; } + +struct s_temp +{ + float temp[64]; +}; + +int CompressBlockBC7(int A, uniform float b) +{ + // This declaration caused problem because LLVM inserted + // _chkstk after declaration and vzeroupper before it's call. + // A will be in ymm at avx, so we lose a half of it. + s_temp _state; + // These two loops are here to prevent elimination of declaration + for (int i=0; i<64; i++) { + float ii = i; + _state.temp[i] = b + sin(ii); + } + float r = 0; + for (int j=0; j<64; j+=9) { + r += _state.temp[j] + j; + } + + // Here upper bits of A in ymm can be zeros. This will crash the test. + int B; + if (A!=0) { + B = 20; + } + else { + B = 30; + } + if(A == 1) { + B = r; + } + return B; +} + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int A = programIndex; + RET[programIndex] = CompressBlockBC7(A, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 20; + RET[0] = 30; + RET[1] = 292; +} diff --git a/tests/ptr-arith-indexing.ispc b/tests/ptr-arith-indexing.ispc new file mode 100644 index 00000000..9f62a2c9 --- /dev/null +++ b/tests/ptr-arith-indexing.ispc @@ -0,0 +1,16 @@ + +export uniform int width() { return programCount; } + +int foo(uniform float * uniform base, uniform int uOfs, varying int vOfs) { + return (base+uOfs)[vOfs]; +} + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float * uniform ptr = &aFOO[0]; + int val = foo(ptr, programCount, programIndex); + RET[programIndex] = val; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1+programCount+programIndex; +} diff --git a/tests/shift-1.ispc b/tests/shift-1.ispc new file mode 100644 index 00000000..2062e36b --- /dev/null +++ b/tests/shift-1.ispc @@ -0,0 +1,14 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int a = aFOO[programIndex]; + int rot = shift(a, -1); + RET[programIndex] = rot; +} + +export void result(uniform float RET[]) { + varying int val = programIndex; + if (val < 0) val = 0; + RET[programIndex] = val; +} diff --git a/tests/shift-2.ispc b/tests/shift-2.ispc new file mode 100644 index 00000000..6cb88e8a --- /dev/null +++ b/tests/shift-2.ispc @@ -0,0 +1,15 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int a = aFOO[programIndex]; + uniform int delta = b - 6; // -1 + int rot = shift(a, delta); + RET[programIndex] = rot; +} + +export void result(uniform float RET[]) { + varying int val = programIndex; + if (val < 0) val = 0; + RET[programIndex] = val; +} diff --git a/tests/shift-3.ispc b/tests/shift-3.ispc new file mode 100644 index 00000000..827d076f --- /dev/null +++ b/tests/shift-3.ispc @@ -0,0 +1,14 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int a = aFOO[programIndex]; + int rot = shift(a, 1); + RET[programIndex] = rot; +} + +export void result(uniform float RET[]) { + varying int val = 2 + programIndex; + if (val > programCount) val = 0; + RET[programIndex] = val; +}