diff --git a/.gitignore b/.gitignore index 429199bb..7cdc4a4e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,6 @@ examples/*/objs/* examples/*/ref examples/*/test *.swp +check_isa.exe diff --git a/alloy.py b/alloy.py index 0aaf3d8d..51aec82b 100755 --- a/alloy.py +++ b/alloy.py @@ -65,7 +65,12 @@ def try_do_LLVM(text, command, from_validation): if from_validation == True: text = text + "\n" print_debug("Trying to " + text, from_validation, alloy_build) - if os.system(command + " >> " + alloy_build + " 2>> " + alloy_build) != 0: + postfix = "" + if current_OS == "Windows": + postfix = " 1>> " + alloy_build + " 2>&1" + else: + postfix = " >> " + alloy_build + " 2>> " + alloy_build + if os.system(command + postfix) != 0: print_debug("ERROR.\n", from_validation, alloy_build) error("can't " + text, 1) print_debug("DONE.\n", from_validation, alloy_build) @@ -144,10 +149,13 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, "tar -xvzf " + tar[1] + " --strip-components 1", from_validation) os.chdir("../../") # paching llvm - patches = glob.glob(os.environ["ISPC_HOME"] + "/llvm_patches/*.*") + patches = glob.glob(os.environ["ISPC_HOME"] + os.sep + "llvm_patches" + os.sep + "*.*") for patch in patches: if version_LLVM in os.path.basename(patch): - try_do_LLVM("patch LLVM with patch" + patch + " ", "patch -p0 < " + patch, from_validation) + if current_OS != "Windows": + try_do_LLVM("patch LLVM with patch " + patch + " ", "patch -p0 < " + patch, from_validation) + else: + try_do_LLVM("patch LLVM with patch " + patch + " ", "C:\\gnuwin32\\bin\\patch.exe -p0 < " + patch, from_validation) os.chdir("../") # configuring llvm, build first part of selfbuild os.makedirs(LLVM_BUILD) @@ -173,77 +181,65 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, print_debug("Now we have compiler for selfbuild: " + selfbuild_compiler + "\n", from_validation, alloy_build) os.chdir(LLVM_BUILD) if debug == False: - try_do_LLVM("configure release version ", + if current_OS != "Windows": + try_do_LLVM("configure release version ", "../" + LLVM_SRC + "/configure --prefix=" + llvm_home + "/" + LLVM_BIN + " --enable-optimized" + selfbuild_compiler, from_validation) + else: + try_do_LLVM("configure release version ", + 'cmake -G "Visual Studio 10" -DCMAKE_INSTALL_PREFIX="..\\'+ LLVM_BIN + + '" -DLLVM_LIT_TOOLS_DIR="C:\\gnuwin32\\bin" ..\\' + LLVM_SRC, + from_validation) else: try_do_LLVM("configure debug version ", "../" + LLVM_SRC + "/configure --prefix=" + llvm_home + "/" + LLVM_BIN + " --enable-debug-runtime --enable-debug-symbols --enable-keep-symbols" + selfbuild_compiler, from_validation) # building llvm - try_do_LLVM("build LLVM ", make, from_validation) - try_do_LLVM("install LLVM ", "make install", from_validation) + if current_OS != "Windows": + try_do_LLVM("build LLVM ", make, from_validation) + try_do_LLVM("install LLVM ", "make install", from_validation) + else: + try_do_LLVM("build LLVM and than install LLVM ", "msbuild INSTALL.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", from_validation) os.chdir(current_path) def check_targets(): answer = [] answer_sde = [] - SSE2 = False; - SSE4 = False; - AVX = False; - AVX11 = False; - AVX2 = False; - if current_OS == "Linux": - cpu = open("/proc/cpuinfo") - f_lines = cpu.readlines() - cpu.close() - # check what native targets do we have - for i in range(0,len(f_lines)): - if SSE2 == False and "sse2" in f_lines[i]: - SSE2 = True; - answer = answer + ["sse2-i32x4", "sse2-i32x8"] - if SSE4 == False and "sse4_1" in f_lines[i]: - SSE4 = True; - answer = answer + ["sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16"] - if AVX == False and "avx" in f_lines[i]: - AVX = True; - answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] - if AVX11 == False and "rdrand" in f_lines[i]: - AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] - if AVX2 == False and "avx2" in f_lines[i]: - AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] - if current_OS == "MacOS": - f_lines = take_lines("sysctl machdep.cpu.features", "first") - if "SSE2" in f_lines: - SSE2 = True; - answer = answer + ["sse2-i32x4", "sse2-i32x8"] - if "SSE4.1" in f_lines: - SSE4 = True; - answer = answer + ["sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16"] - if "AVX1.0" in f_lines: - AVX = True; - answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] - if "RDRAND" in f_lines: - AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] - if "AVX2.0" in f_lines: - AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] - - answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"] + # check what native targets do we have + if current_OS != "Windows": + try_do_LLVM("build check_ISA", "clang check_isa.cpp -o check_isa.exe", True) + else: + try_do_LLVM("build check_ISA", "cl check_isa.cpp", True) + SSE2 = ["sse2-i32x4", "sse2-i32x8"] + SSE4 = ["sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16"] + AVX = ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] + AVX11 = ["avx1.1-i32x8","avx1.1-i32x16","avx1.1-i64x4"] + AVX2 = ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] + targets = [["AVX2", AVX2, False], ["AVX1.1", AVX11, False], ["AVX", AVX, False], ["SSE4", SSE4, False], ["SSE2", SSE2, False]] + f_lines = take_lines("check_isa.exe", "first") + for i in range(0,5): + if targets[i][0] in f_lines: + for j in range(i,5): + answer = targets[j][1] + answer + targets[j][2] = True + break + if current_OS != "Windows": + answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"] # now check what targets we have with the help of SDE sde_exists = "" PATH_dir = string.split(os.getenv("PATH"), os.pathsep) + if current_OS == "Windows": + sde_n = "sde.exe" + else: + sde_n = "sde" for counter in PATH_dir: - if os.path.exists(counter + os.sep + "sde") and sde_exists == "": - sde_exists = counter + os.sep + "sde" + if os.path.exists(counter + os.sep + sde_n) and sde_exists == "": + sde_exists = counter + os.sep + sde_n if os.environ.get("SDE_HOME") != None: - if os.path.exists(os.environ.get("SDE_HOME") + os.sep + "sde"): - sde_exists = os.environ.get("SDE_HOME") + os.sep + "sde" + if os.path.exists(os.environ.get("SDE_HOME") + os.sep + sde_n): + sde_exists = os.environ.get("SDE_HOME") + os.sep + sde_n if sde_exists == "": error("you haven't got sde neither in SDE_HOME nor in your PATH.\n" + "To test all platforms please set SDE_HOME to path containing SDE.\n" + @@ -252,24 +248,38 @@ def check_targets(): # here we have SDE f_lines = take_lines(sde_exists + " -help", "all") for i in range(0,len(f_lines)): - if SSE4 == False and "wsm" in f_lines[i]: + if targets[3][2] == False and "wsm" in f_lines[i]: answer_sde = answer_sde + [["-wsm", "sse4-i32x4"], ["-wsm", "sse4-i32x8"], ["-wsm", "sse4-i16x8"], ["-wsm", "sse4-i8x16"]] - if AVX == False and "snb" in f_lines[i]: + if targets[2][2] == False and "snb" in f_lines[i]: answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]] - if AVX11 == False and "ivb" in f_lines[i]: + if targets[1][2] == False and "ivb" in f_lines[i]: answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]] - if AVX2 == False and "hsw" in f_lines[i]: + if targets[0][2] == False and "hsw" in f_lines[i]: answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]] return [answer, answer_sde] def build_ispc(version_LLVM, make): current_path = os.getcwd() os.chdir(os.environ["ISPC_HOME"]) - p_temp = os.getenv("PATH") - os.environ["PATH"] = os.environ["LLVM_HOME"] + "/bin-" + version_LLVM + "/bin:" + os.environ["PATH"] - try_do_LLVM("clean ISPC for building", "make clean", True) - try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", make, True) - os.environ["PATH"] = p_temp + if current_OS != "Windows": + p_temp = os.getenv("PATH") + os.environ["PATH"] = os.environ["LLVM_HOME"] + "/bin-" + version_LLVM + "/bin:" + os.environ["PATH"] + try_do_LLVM("clean ISPC for building", "make clean", True) + try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", make, True) + os.environ["PATH"] = p_temp + else: + p_temp = os.getenv("LLVM_INSTALL_DIR") + v_temp = os.getenv("LLVM_VERSION") + os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM + if version_LLVM == "3.3": + temp = "3_3" + if version_LLVM == "trunk": + temp = "3_4" + os.environ["LLVM_VERSION"] = "LLVM_" + temp + try_do_LLVM("clean ISPC for building", "msbuild ispc.vcxproj /t:clean", True) + try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", "msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", True) + os.environ["LLVM_INSTALL_DIR"] = p_temp + os.environ["LLVM_VERSION"] = v_temp os.chdir(current_path) def execute_stability(stability, R, print_version): @@ -305,9 +315,13 @@ def execute_stability(stability, R, print_version): def run_special_tests(): i = 5 +class options_for_drivers: + pass + def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm, time): os.chdir(os.environ["ISPC_HOME"]) - os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"] + if current_OS != "Windows": + os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"] if options.notify != "": common.remove_if_exists(os.environ["ISPC_HOME"] + os.sep + "notify_log.log") smtp_server = os.environ["SMTP_ISPC"] @@ -319,8 +333,6 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, print_debug("Folder: " + os.environ["ISPC_HOME"] + "\n", False, "") date = datetime.datetime.now() print_debug("Date: " + date.strftime('%H:%M %d/%m/%Y') + "\n", False, "") - class options_for_drivers: - pass # *** *** *** # Stability validation run # *** *** *** @@ -475,12 +487,14 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, performance = options_for_drivers() # performance constant options performance.number = number - performance.config = "./perf.ini" - performance.path = "./" + performance.config = "." + os.sep + "perf.ini" + performance.path = "." + os.sep performance.silent = True performance.output = "" performance.compiler = "" performance.ref = "ispc_ref" + if current_OS == "Windows": + performance.ref = "ispc_ref.exe" performance.in_file = "." + os.sep + f_date + os.sep + "performance.log" # prepare LLVM 3.3 as newest LLVM need_LLVM = check_LLVM(["3.3"]) @@ -502,7 +516,11 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, sys.stdout.write(".\n") build_ispc("3.3", make) sys.stdout.write(".\n") - os.rename("ispc", "ispc_ref") + if current_OS != "Windows": + os.rename("ispc", "ispc_ref") + else: + common.remove_if_exists("Release\\ispc_ref.exe") + os.rename("Release\\ispc.exe", "Release\\ispc_ref.exe") try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True) if stashing: try_do_LLVM("return current branch ", "git stash pop", True) @@ -541,7 +559,6 @@ def Main(): global current_OS if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True: current_OS = "Windows" - error("Windows isn't supported now", 1) else: if (platform.system() == 'Darwin'): current_OS = "MacOS" @@ -566,7 +583,9 @@ def Main(): for iterator in test_only: if not (" " + iterator + " " in test_only_r): error("unknow option for only: " + iterator, 1) - + if current_OS == "Windows": + if options.debug == True or options.selfbuild == True or options.tarball != "": + error("Debug, selfbuild and tarball options are unsupported on windows", 1) global f_date f_date = "logs" common.remove_if_exists(f_date) @@ -626,79 +645,80 @@ import common error = common.error take_lines = common.take_lines print_debug = common.print_debug -# parsing options -class MyParser(OptionParser): - def format_epilog(self, formatter): - return self.epilog -examples = ("Examples:\n" + -"Load and build LLVM from trunk\n\talloy.py -b\n" + -"Load and build LLVM 3.3. Rewrite LLVM folders\n\talloy.py -b --version=3.3 --force\n" + -"Untar files llvm.tgz clang.tgz, build LLVM from them in folder bin-from_tar\n\talloy.py -b --tarball='llvm.tgz clang.tgz' --folder=from_tar\n" + -"Load LLVM from trunk, revision r172870. Build it. Do selfbuild\n\talloy.py -b --revision=r172870 --selfbuild\n" + -"Validation run with LLVM 3.3, trunk; x86, x86-64; -O2;\nall supported targets; performance\n\talloy.py -r\n" + -"Validation run with all avx targets and sse4-i8x16 without performance\n\talloy.py -r --only=stability --only-targets='avx sse4-i8x16'\n" + -"Validation run with avx2-i32x8, all sse4 and sse2 targets\nand all targets with i32x16\n\talloy.py -r --only-targets='avx2-i32x8 sse4 i32x16 sse2'\n" + -"Stability validation run with LLVM 3.2, 3.3; -O0; x86,\nupdate fail_db.txt with passes and fails\n\talloy.py -r --only='3.2 -O0 stability 3.3 x86' --update-errors=FP\n" + -"Try to build compiler with all LLVM\n\talloy.py -r --only=build\n" + -"Performance validation run with 10 runs of each test and comparing to branch 'old'\n\talloy.py -r --only=performance --compare-with=old --number=10\n" + -"Validation run. Update fail_db.txt with new fails, send results to my@my.com\n\talloy.py -r --update-errors=F --notify='my@my.com'\n") -parser = MyParser(usage="Usage: alloy.py -r/-b [options]", epilog=examples) -parser.add_option('-b', '--build-llvm', dest='build_llvm', - help='ask to build LLVM', default=False, action="store_true") -parser.add_option('-r', '--run', dest='validation_run', - help='ask for validation run', default=False, action="store_true") -parser.add_option('-j', dest='speed', - help='set -j for make', default="8") -# options for activity "build LLVM" -llvm_group = OptionGroup(parser, "Options for building LLVM", +if __name__ == '__main__': + # parsing options + class MyParser(OptionParser): + def format_epilog(self, formatter): + return self.epilog + examples = ("Examples:\n" + + "Load and build LLVM from trunk\n\talloy.py -b\n" + + "Load and build LLVM 3.3. Rewrite LLVM folders\n\talloy.py -b --version=3.3 --force\n" + + "Untar files llvm.tgz clang.tgz, build LLVM from them in folder bin-from_tar\n\talloy.py -b --tarball='llvm.tgz clang.tgz' --folder=from_tar\n" + + "Load LLVM from trunk, revision r172870. Build it. Do selfbuild\n\talloy.py -b --revision=r172870 --selfbuild\n" + + "Validation run with LLVM 3.3, trunk; x86, x86-64; -O2;\nall supported targets; performance\n\talloy.py -r\n" + + "Validation run with all avx targets and sse4-i8x16 without performance\n\talloy.py -r --only=stability --only-targets='avx sse4-i8x16'\n" + + "Validation run with avx2-i32x8, all sse4 and sse2 targets\nand all targets with i32x16\n\talloy.py -r --only-targets='avx2-i32x8 sse4 i32x16 sse2'\n" + + "Stability validation run with LLVM 3.2, 3.3; -O0; x86,\nupdate fail_db.txt with passes and fails\n\talloy.py -r --only='3.2 -O0 stability 3.3 x86' --update-errors=FP\n" + + "Try to build compiler with all LLVM\n\talloy.py -r --only=build\n" + + "Performance validation run with 10 runs of each test and comparing to branch 'old'\n\talloy.py -r --only=performance --compare-with=old --number=10\n" + + "Validation run. Update fail_db.txt with new fails, send results to my@my.com\n\talloy.py -r --update-errors=F --notify='my@my.com'\n") + parser = MyParser(usage="Usage: alloy.py -r/-b [options]", epilog=examples) + parser.add_option('-b', '--build-llvm', dest='build_llvm', + help='ask to build LLVM', default=False, action="store_true") + parser.add_option('-r', '--run', dest='validation_run', + help='ask for validation run', default=False, action="store_true") + parser.add_option('-j', dest='speed', + help='set -j for make', default="8") + # options for activity "build LLVM" + llvm_group = OptionGroup(parser, "Options for building LLVM", "These options must be used with -b option.") -llvm_group.add_option('--version', dest='version', - help='version of llvm to build: 3.1 3.2 3.3 trunk. Default: trunk', default="trunk") -llvm_group.add_option('--revision', dest='revision', - help='revision of llvm to build in format r172870', default="") -llvm_group.add_option('--debug', dest='debug', - help='debug build of LLVM?', default=False, action="store_true") -llvm_group.add_option('--folder', dest='folder', - help='folder to build LLVM in', default="") -llvm_group.add_option('--tarball', dest='tarball', - help='"llvm_tarball clang_tarball"', default="") -llvm_group.add_option('--selfbuild', dest='selfbuild', - help='make selfbuild of LLVM and clang', default=False, action="store_true") -llvm_group.add_option('--force', dest='force', - help='rebuild LLVM', default=False, action='store_true') -llvm_group.add_option('--extra', dest='extra', - help='load extra clang tools', default=False, action='store_true') -parser.add_option_group(llvm_group) -# options for activity "validation run" -run_group = OptionGroup(parser, "Options for validation run", + llvm_group.add_option('--version', dest='version', + help='version of llvm to build: 3.1 3.2 3.3 trunk. Default: trunk', default="trunk") + llvm_group.add_option('--revision', dest='revision', + help='revision of llvm to build in format r172870', default="") + llvm_group.add_option('--debug', dest='debug', + help='debug build of LLVM?', default=False, action="store_true") + llvm_group.add_option('--folder', dest='folder', + help='folder to build LLVM in', default="") + llvm_group.add_option('--tarball', dest='tarball', + help='"llvm_tarball clang_tarball"', default="") + llvm_group.add_option('--selfbuild', dest='selfbuild', + help='make selfbuild of LLVM and clang', default=False, action="store_true") + llvm_group.add_option('--force', dest='force', + help='rebuild LLVM', default=False, action='store_true') + llvm_group.add_option('--extra', dest='extra', + help='load extra clang tools', default=False, action='store_true') + parser.add_option_group(llvm_group) + # options for activity "validation run" + run_group = OptionGroup(parser, "Options for validation run", "These options must be used with -r option.") -run_group.add_option('--compare-with', dest='branch', - help='set performance reference point. Dafault: master', default="master") -run_group.add_option('--number', dest='number_for_performance', - help='number of performance runs for each test. Default: 5', default=5) -run_group.add_option('--notify', dest='notify', - help='email to sent results to', default="") -run_group.add_option('--update-errors', dest='update', - help='rewrite fail_db.txt file according to received results (F or FP)', default="") -run_group.add_option('--only-targets', dest='only_targets', - help='set list of targets to test. Possible values - all subnames of targets.', - default="") -run_group.add_option('--time', dest='time', - help='display time of testing', default=False, action='store_true') -run_group.add_option('--only', dest='only', - help='set types of tests. Possible values:\n' + - '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + - 'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).', + run_group.add_option('--compare-with', dest='branch', + help='set performance reference point. Dafault: master', default="master") + run_group.add_option('--number', dest='number_for_performance', + help='number of performance runs for each test. Default: 5', default=5) + run_group.add_option('--notify', dest='notify', + help='email to sent results to', default="") + run_group.add_option('--update-errors', dest='update', + help='rewrite fail_db.txt file according to received results (F or FP)', default="") + run_group.add_option('--only-targets', dest='only_targets', + help='set list of targets to test. Possible values - all subnames of targets.', default="") -run_group.add_option('--perf_LLVM', dest='perf_llvm', - help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true') -parser.add_option_group(run_group) -# options for activity "setup PATHS" -setup_group = OptionGroup(parser, "Options for setup", + run_group.add_option('--time', dest='time', + help='display time of testing', default=False, action='store_true') + run_group.add_option('--only', dest='only', + help='set types of tests. Possible values:\n' + + '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + + 'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).', + default="") + run_group.add_option('--perf_LLVM', dest='perf_llvm', + help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true') + parser.add_option_group(run_group) + # options for activity "setup PATHS" + setup_group = OptionGroup(parser, "Options for setup", "These options must be use with -r or -b to setup environment variables") -setup_group.add_option('--llvm_home', dest='llvm_home',help='path to LLVM',default="") -setup_group.add_option('--ispc_home', dest='ispc_home',help='path to ISPC',default="") -setup_group.add_option('--sde_home', dest='sde_home',help='path to SDE',default="") -parser.add_option_group(setup_group) -(options, args) = parser.parse_args() -Main() + setup_group.add_option('--llvm_home', dest='llvm_home',help='path to LLVM',default="") + setup_group.add_option('--ispc_home', dest='ispc_home',help='path to ISPC',default="") + setup_group.add_option('--sde_home', dest='sde_home',help='path to SDE',default="") + parser.add_option_group(setup_group) + (options, args) = parser.parse_args() + Main() diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index ea15df5d..ef14d26e 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -719,8 +719,9 @@ static FORCEINLINE __vec16_i32 __shuffle2_i32(__vec16_i32 v0, __vec16_i32 v1, __ template static FORCEINLINE __vec16_i32 __load(const __vec16_i32 *p) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - return __load<64>(p); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // return __load<64>(p); + return _mm512_load_epi32(p); #else __vec16_i32 v; v = _mm512_extloadunpacklo_epi32(v, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE); @@ -731,8 +732,9 @@ template static FORCEINLINE __vec16_i32 __load(const __vec16_i32 *p) template static FORCEINLINE void __store(__vec16_i32 *p, __vec16_i32 v) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - __store<64>(p,v); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // __store<64>(p,v); + _mm512_store_epi32(p, v); #else _mm512_extpackstorelo_epi32( p, v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); _mm512_extpackstorehi_epi32((uint8_t*)p+64, v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); @@ -979,8 +981,11 @@ static FORCEINLINE __vec16_i64 __shuffle2_double(__vec16_i64 _v0, __vec16_i64 _v template static FORCEINLINE __vec16_i64 __load(const __vec16_i64 *p) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - return __load<128>(p); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // return __load<128>(p); + __m512i v2 = _mm512_load_epi32(p); + __m512i v1 = _mm512_load_epi32(((uint8_t*)p)+64); + return __vec16_i64(v2,v1); #else __vec16_i32 v1; __vec16_i32 v2; @@ -995,8 +1000,12 @@ template static FORCEINLINE __vec16_i64 __load(const __vec16_i64 *p) template static FORCEINLINE void __store(__vec16_i64 *p, __vec16_i64 v) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - return __store<128>(p,v); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // __store<128>(p,v); + __m512i v1 = v.v2; + __m512i v2 = v.v1; + _mm512_store_epi64(p, v2); + _mm512_store_epi64(((uint8_t*)p)+64, v1); #else __m512i v1 = v.v2; __m512i v2 = v.v1; @@ -1099,8 +1108,9 @@ static FORCEINLINE __vec16_f __shuffle2_float(__vec16_f _v0, __vec16_f _v1, __ve template static FORCEINLINE __vec16_f __load(const __vec16_f *p) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - return __load<64>(p); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // return __load<64>(p); + return _mm512_load_ps(p); #else __vec16_f v; v = _mm512_extloadunpacklo_ps(v, p, _MM_UPCONV_PS_NONE, _MM_HINT_NONE); @@ -1111,8 +1121,9 @@ template static FORCEINLINE __vec16_f __load(const __vec16_f *p) template static FORCEINLINE void __store(__vec16_f *p, __vec16_f v) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - __store<64>(p,v); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // __store<64>(p,v); + _mm512_store_ps(p, v); #else _mm512_extpackstorelo_ps( p, v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE); _mm512_extpackstorehi_ps((uint8_t*)p+64, v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE); @@ -1372,8 +1383,9 @@ static FORCEINLINE __vec16_d __shuffle2_double(__vec16_d _v0, __vec16_d _v1, con template static FORCEINLINE __vec16_d __load(const __vec16_d *p) \ { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - return __load<128>(p); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // return __load<128>(p); + return __vec16_d(_mm512_load_pd(p), _mm512_load_pd(((uint8_t*)p)+64)); #else __vec16_d ret; ret.v1 = _mm512_extloadunpacklo_pd(ret.v1, p, _MM_UPCONV_PD_NONE, _MM_HINT_NONE); @@ -1386,8 +1398,10 @@ template static FORCEINLINE __vec16_d __load(const __vec16_d *p) \ template static FORCEINLINE void __store(__vec16_d *p, __vec16_d v) { -#ifdef ISPC_FORCE_ALIGNED_MEMORY__REMOVETHIS_WHEN_FIXED - return __store<128>(p,v); +#ifdef ISPC_FORCE_ALIGNED_MEMORY + // return __store<128>(p,v); + _mm512_store_pd(p, v.v1); + _mm512_store_pd(((uint8_t*)p)+64, v.v2); #else _mm512_extpackstorelo_pd(p, v.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE); _mm512_extpackstorehi_pd((uint8_t*)p+64, v.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE); diff --git a/expr.cpp b/expr.cpp index 6de78e6c..61d9cc2d 100644 --- a/expr.cpp +++ b/expr.cpp @@ -2798,6 +2798,17 @@ BinaryExpr::TypeCheck() { } } +const Type * +BinaryExpr::GetLValueType() const { + const Type *t = GetType(); + if (CastType(t) != NULL) { + // Are we doing something like (basePtr + offset)[...] = ... + return t; + } + else { + return NULL; + } +} int BinaryExpr::EstimateCost() const { @@ -4275,8 +4286,9 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { } else { Symbol *baseSym = GetBaseSymbol(); - if (dynamic_cast(baseExpr) == NULL) { - // Only check for non-function calls + if (dynamic_cast(baseExpr) == NULL && + dynamic_cast(baseExpr) == NULL) { + // Don't check if we're doing a function call or pointer arith AssertPos(pos, baseSym != NULL); } mask = lMaskForSymbol(baseSym, ctx); diff --git a/expr.h b/expr.h index 0d46191b..c7505763 100644 --- a/expr.h +++ b/expr.h @@ -155,6 +155,7 @@ public: llvm::Value *GetValue(FunctionEmitContext *ctx) const; const Type *GetType() const; + const Type *GetLValueType() const; void Print() const; Expr *Optimize(); diff --git a/opt.cpp b/opt.cpp index bb788a8e..ce84744a 100644 --- a/opt.cpp +++ b/opt.cpp @@ -4930,8 +4930,6 @@ CreatePeepholePass() { return new PeepholePass; } -#include - /** Given an llvm::Value known to be an integer, return its value as an int64_t. */ @@ -4978,13 +4976,13 @@ ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { if (llvm::CallInst *ci = llvm::dyn_cast(inst)) { llvm::Function *func = ci->getCalledFunction(); for (int i = 0; i < 6; i++) { - if (shifts[i] == func) { + if (shifts[i] && (shifts[i] == func)) { // we matched a call llvm::Value *shiftedVec = ci->getArgOperand(0); llvm::Value *shiftAmt = ci->getArgOperand(1); if (llvm::isa(shiftAmt)) { int vectorWidth = g->target->getVectorWidth(); - int shuffleVals[vectorWidth]; + int * shuffleVals = new int[vectorWidth]; int shiftInt = lGetIntValue(shiftAmt); for (int i = 0; i < vectorWidth; i++) { int s = i + shiftInt; @@ -4998,6 +4996,7 @@ ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { shuffleIdxs, "vecShift", ci); ci->replaceAllUsesWith(shuffle); modifiedAny = true; + delete [] shuffleVals; } else { PerformanceWarning(SourcePos(), "Stdlib shift() called without constant shift amount."); } diff --git a/perf.py b/perf.py index 2b5c6edd..7e8b3cff 100755 --- a/perf.py +++ b/perf.py @@ -42,6 +42,9 @@ def print_file(line): def build_test(commands): os.system(commands[4]) test = os.system(commands[1]) + if is_windows: + common.remove_if_exists(".\\X64\\Release1") + os.rename(".\\X64\\Release", ".\\X64\\Release1") if options.ref: ref = os.system(commands[3]) return (options.ref and ref) or test @@ -156,16 +159,16 @@ def cpu_check(): R = c_line.split(' ') cpu_percent = float(R[1]) * 3 else: - os.system("wmic cpu get loadpercentage /value > cpu_temp") - c = open("cpu_temp", 'r') + os.system("wmic cpu get loadpercentage /value > cpu_temp") + c = open("cpu_temp", 'r') c_lines = c.readlines() - c.close() - os.remove("cpu_temp") - t = "0" - for i in c_lines[2]: + c.close() + os.remove("cpu_temp") + t = "0" + for i in c_lines[2]: if i.isdigit(): t = t + i - cpu_percent = int(t) + cpu_percent = int(t) return cpu_percent #returns geomean of list @@ -345,9 +348,14 @@ def perf(options1, args): if options.ref != "": options.ref = True if os.environ.get("ISPC_HOME") != None: - if os.path.exists(os.environ["ISPC_HOME"] + os.sep + ispc_test): - ispc_test_exists = True - ispc_test = os.environ["ISPC_HOME"] + os.sep + ispc_test + if is_windows == False: + if os.path.exists(os.environ["ISPC_HOME"] + os.sep + ispc_test): + ispc_test_exists = True + ispc_test = os.environ["ISPC_HOME"] + os.sep + ispc_test + else: + if os.path.exists(os.environ["ISPC_HOME"] + "\\Release\\" + ispc_test): + ispc_test_exists = True + ispc_test = os.environ["ISPC_HOME"] + "\\Release\\" + ispc_test for counter in PATH_dir: if ispc_test_exists == False: if os.path.exists(counter + os.sep + ispc_test): @@ -419,9 +427,9 @@ def perf(options1, args): re_command = "make clean >> "+build_log else: ex_command_ref = "x64\\Release\\ref.exe " + command + " >> " + perf_temp + "_ref" - ex_command = "x64\\Release\\test.exe " + command + " >> " + perf_temp + "_test" - bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /t:rebuild >> " + build_log - bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /t:rebuild >> " + build_log + ex_command = "x64\\Release1\\test.exe " + command + " >> " + perf_temp + "_test" + bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /p:ISPC_compiler=ispc_ref /t:rebuild >> " + build_log + bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /p:ISPC_compiler=ispc /t:rebuild >> " + build_log re_command = "msbuild /t:clean >> " + build_log commands = [ex_command, bu_command, ex_command_ref, bu_command_ref, re_command] # parsing config parameters diff --git a/tests/ptr-arith-indexing.ispc b/tests/ptr-arith-indexing.ispc new file mode 100644 index 00000000..9f62a2c9 --- /dev/null +++ b/tests/ptr-arith-indexing.ispc @@ -0,0 +1,16 @@ + +export uniform int width() { return programCount; } + +int foo(uniform float * uniform base, uniform int uOfs, varying int vOfs) { + return (base+uOfs)[vOfs]; +} + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float * uniform ptr = &aFOO[0]; + int val = foo(ptr, programCount, programIndex); + RET[programIndex] = val; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1+programCount+programIndex; +}