From 496845df60f0d3fc7e5b8713af223d0c5f55492f Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 14 Oct 2013 12:23:14 +0400 Subject: [PATCH 01/17] new changes in test system --- alloy.py | 70 +++++++++++++++++++++++++++++++--------------------- run_tests.py | 13 ++++++++-- 2 files changed, 53 insertions(+), 30 deletions(-) diff --git a/alloy.py b/alloy.py index 68bdd979..dbdc40aa 100755 --- a/alloy.py +++ b/alloy.py @@ -294,9 +294,7 @@ def execute_stability(stability, R, print_version): def run_special_tests(): i = 5 -def validation_run(only, only_targets, reference_branch, number, notify, update, make): - if os.environ["ISPC_HOME"] != os.getcwd(): - error("you ISPC_HOME and your current pass are different!\n", 2) +def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm): os.chdir(os.environ["ISPC_HOME"]) os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"] if options.notify != "": @@ -322,7 +320,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, stability.random = False stability.ispc_flags = "" stability.compiler_exe = None - stability.num_jobs = 1024 + stability.num_jobs = speed_number stability.verbose = False stability.time = False stability.non_interactive = True @@ -476,28 +474,36 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, # prepare LLVM 3.3 as newest LLVM need_LLVM = check_LLVM(["3.3"]) if len(need_LLVM) != 0: - build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make) -# prepare reference point. build both test and reference compilers - try_do_LLVM("apply git", "git branch", True) - temp4 = take_lines("git branch", "all") - for line in temp4: - if "*" in line: - current_branch = line[2:-1] - stashing = True - sys.stdout.write("Please, don't interrupt script here! You can have not sync git status after interruption!\n") - if "No local changes" in take_lines("git stash", "first"): - stashing = False - #try_do_LLVM("stash current branch ", "git stash", True) - try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True) - sys.stdout.write(".\n") - build_ispc("3.3", make) - sys.stdout.write(".\n") - os.rename("ispc", "ispc_ref") - try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True) - if stashing: - try_do_LLVM("return current branch ", "git stash pop", True) - sys.stdout.write("You can interrupt script now.\n") - build_ispc("3.3", make) + build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make) + if perf_llvm == False: + # prepare reference point. build both test and reference compilers + try_do_LLVM("apply git", "git branch", True) + temp4 = take_lines("git branch", "all") + for line in temp4: + if "*" in line: + current_branch = line[2:-1] + stashing = True + sys.stdout.write("Please, don't interrupt script here! You can have not sync git status after interruption!\n") + if "No local changes" in take_lines("git stash", "first"): + stashing = False + #try_do_LLVM("stash current branch ", "git stash", True) + try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True) + sys.stdout.write(".\n") + build_ispc("3.3", make) + sys.stdout.write(".\n") + os.rename("ispc", "ispc_ref") + try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True) + if stashing: + try_do_LLVM("return current branch ", "git stash pop", True) + sys.stdout.write("You can interrupt script now.\n") + build_ispc("3.3", make) + else: + # build compiler with two different LLVM versions + if len(check_LLVM([reference_branch])) != 0: + error("you haven't got llvm called " + reference_branch, 1) + build_ispc("3.3", make) + os.rename("ispc", "ispc_ref") + build_ispc(reference_branch, make) # begin validation run for performance. output is inserted into perf() perf.perf(performance, []) if options.notify != "": @@ -560,16 +566,22 @@ def Main(): stability_log = os.getcwd() + os.sep + f_date + os.sep + "stability.log" current_path = os.getcwd() make = "make -j" + options.speed + if os.environ["ISPC_HOME"] != os.getcwd(): + error("you ISPC_HOME and your current path are different!\n", 2) + if options.perf_llvm == True: + if options.branch == "master": + options.branch = "trunk" try: if options.build_llvm: build_LLVM(options.version, options.revision, options.folder, options.tarball, options.debug, options.selfbuild, options.extra, False, options.force, make) if options.validation_run: validation_run(options.only, options.only_targets, options.branch, - options.number_for_performance, options.notify, options.update, make) + options.number_for_performance, options.notify, options.update, int(options.speed), + make, options.perf_llvm) finally: os.chdir(current_path) - date_name = "alloy_results_" + datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') + date_name = "alloy_results_" + datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') if os.path.exists(date_name): error("It's forbidden to run alloy two times in a second, logs are in ./logs", 1) os.rename(f_date, date_name) @@ -661,6 +673,8 @@ run_group.add_option('--only', dest='only', '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + 'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).', default="") +run_group.add_option('--perf_LLVM', dest='perf_llvm', + help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true') parser.add_option_group(run_group) # options for activity "setup PATHS" setup_group = OptionGroup(parser, "Options for setup", diff --git a/run_tests.py b/run_tests.py index 12822d2d..bf4f5d8a 100755 --- a/run_tests.py +++ b/run_tests.py @@ -374,7 +374,12 @@ def file_check(compfails, runfails): temp3 = re.search("[0-9]*\.[0-9]*", temp2.group()) compiler_version = options.compiler_exe + temp3.group() else: - compiler_version = "cl" + compiler_version = "cl" + possible_compilers = ["g++4.4", "g++4.7", "clang++3.3", "cl"] + if not compiler_version in possible_compilers: + error("\n**********\nWe don't have history of fails for compiler " + + compiler_version + + "\nAll fails will be new!!!\n**********", 2) new_line = " "+options.arch.rjust(6)+" "+options.target.rjust(14)+" "+OS.rjust(7)+" "+llvm_version+" "+compiler_version.rjust(10)+" "+opt+" *\n" new_compfails = compfails[:] @@ -672,7 +677,11 @@ def run_tests(options1, args, print_version): if len(compile_error_files) == 0 and len(run_error_files) == 0: print_debug("No fails\n", s, run_tests_log) - R = file_check(compile_error_files, run_error_files) + if len(args) == 0: + R = file_check(compile_error_files, run_error_files) + else: + error("don't check new fails for incomplete suite of tests", 2) + R = 0 if options.time: print_debug("Elapsed time: %d s\n" % elapsed_time, s, run_tests_log) From 7e9b4c0924a884d05182d6d6416dd24e697d9a96 Mon Sep 17 00:00:00 2001 From: egaburov Date: Tue, 15 Oct 2013 10:02:10 +0200 Subject: [PATCH 02/17] added avx2-i64x4 and avx1.1-i64x4 targets --- Makefile | 2 +- builtins.cpp | 16 ++ builtins/target-avx11-i64x4.ll | 126 +++++++++++ builtins/target-avx2-i64x4.ll | 369 +++++++++++++++++++++++++++++++++ ispc.cpp | 46 +++- 5 files changed, 556 insertions(+), 3 deletions(-) create mode 100644 builtins/target-avx11-i64x4.ll create mode 100644 builtins/target-avx2-i64x4.ll diff --git a/Makefile b/Makefile index 10d51bd5..9d39baa4 100644 --- a/Makefile +++ b/Makefile @@ -140,7 +140,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ type.cpp util.cpp HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h -TARGETS=avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \ +TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \ sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \ generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 ifneq ($(ARM_ENABLED), 0) diff --git a/builtins.cpp b/builtins.cpp index 43f68833..af9649b7 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -966,6 +966,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod } case Target::AVX11: { switch (g->target->getVectorWidth()) { + case 4: + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx11_i64x4_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx11_i64x4_64bit); + } + break; case 8: if (runtime32) { EXPORT_MODULE(builtins_bitcode_avx11_32bit); @@ -989,6 +997,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod } case Target::AVX2: { switch (g->target->getVectorWidth()) { + case 4: + if (runtime32) { + EXPORT_MODULE(builtins_bitcode_avx2_i64x4_32bit); + } + else { + EXPORT_MODULE(builtins_bitcode_avx2_i64x4_64bit); + } + break; case 8: if (runtime32) { EXPORT_MODULE(builtins_bitcode_avx2_32bit); diff --git a/builtins/target-avx11-i64x4.ll b/builtins/target-avx11-i64x4.ll new file mode 100644 index 00000000..aae612bb --- /dev/null +++ b/builtins/target-avx11-i64x4.ll @@ -0,0 +1,126 @@ +;; Copyright (c) 2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx1-i64x4base.ll') + +ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', + LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', + `rdrand_definition()') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(i8) +gen_gather(i16) +gen_gather(i32) +gen_gather(float) +gen_gather(i64) +gen_gather(double) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; float/half conversions + +ifelse(LLVM_VERSION, `LLVM_3_0', ` +;; nothing to define... +', ` + +define(`expand_4to8', ` + %$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> +') +define(`extract_4from8', ` + %$3 = shufflevector <8 x $1> %$2, <8 x $1> undef, <4 x i32> +') + +declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone +; 0 is round nearest even +declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone + +define <4 x float> @__half_to_float_varying(<4 x i16> %v4) nounwind readnone { + expand_4to8(i16, v4, v) + %r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v) + extract_4from8(float, r, ret) + ret <4 x float> %ret +} + +define <4 x i16> @__float_to_half_varying(<4 x float> %v4) nounwind readnone { + expand_4to8(float, v4, v) + %r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0) + extract_4from8(i16, r, ret) + ret <4 x i16> %ret +} + +define float @__half_to_float_uniform(i16 %v) nounwind readnone { + %v1 = bitcast i16 %v to <1 x i16> + %vv = shufflevector <1 x i16> %v1, <1 x i16> undef, + <8 x i32> + %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv) + %r = extractelement <8 x float> %rv, i32 0 + ret float %r +} + +define i16 @__float_to_half_uniform(float %v) nounwind readnone { + %v1 = bitcast float %v to <1 x float> + %vv = shufflevector <1 x float> %v1, <1 x float> undef, + <8 x i32> + ; round to nearest even + %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0) + %r = extractelement <8 x i16> %rv, i32 0 + ret i16 %r +} +') diff --git a/builtins/target-avx2-i64x4.ll b/builtins/target-avx2-i64x4.ll new file mode 100644 index 00000000..cdd10386 --- /dev/null +++ b/builtins/target-avx2-i64x4.ll @@ -0,0 +1,369 @@ +;; Copyright (c) 2010-2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifelse(LLVM_VERSION, `LLVM_3_0', `', + LLVM_VERSION, `LLVM_3_1', `', + `define(`HAVE_GATHER', `1')') + +include(`target-avx1-i64x4base.ll') + +ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', + LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', + `rdrand_definition()') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +;; declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone +;; declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readonly + +define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +;; declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readonly +;; declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readonly + +define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + +define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline { + %m = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %m +} + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; float/half conversions + + + +ifelse(LLVM_VERSION, `LLVM_3_0', ` +;; nothing to define... +', ` + +define(`expand_4to8', ` + %$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> +') +define(`extract_4from8', ` + %$3 = shufflevector <8 x $1> %$2, <8 x $1> undef, <4 x i32> +') + +declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone +; 0 is round nearest even +declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone + +define <4 x float> @__half_to_float_varying(<4 x i16> %v4) nounwind readnone { + expand_4to8(i16, v4, v) + %r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v) + extract_4from8(float, r, ret) + ret <4 x float> %ret +} + +define <4 x i16> @__float_to_half_varying(<4 x float> %v4) nounwind readnone { + expand_4to8(float, v4, v) + %r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0) + extract_4from8(i16, r, ret) + ret <4 x i16> %ret +} + +define float @__half_to_float_uniform(i16 %v) nounwind readnone { + %v1 = bitcast i16 %v to <1 x i16> + %vv = shufflevector <1 x i16> %v1, <1 x i16> undef, + <8 x i32> + %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv) + %r = extractelement <8 x float> %rv, i32 0 + ret float %r +} + +define i16 @__float_to_half_uniform(float %v) nounwind readnone { + %v1 = bitcast float %v to <1 x float> + %vv = shufflevector <1 x float> %v1, <1 x float> undef, + <8 x i32> + ; round to nearest even + %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0) + %r = extractelement <8 x i16> %rv, i32 0 + ret i16 %r +} +') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +declare void @llvm.trap() noreturn nounwind + + +ifelse(LLVM_VERSION, `LLVM_3_0', ` +gen_gather_factored(i8) +gen_gather_factored(i16) +gen_gather_factored(i32) +gen_gather_factored(float) +gen_gather_factored(i64) +gen_gather_factored(double)', +LLVM_VERSION, `LLVM_3_1', ` +gen_gather_factored(i8) +gen_gather_factored(i16) +gen_gather_factored(i32) +gen_gather_factored(float) +gen_gather_factored(i64) +gen_gather_factored(double)', ` + +gen_gather(i8) +gen_gather(i16) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int32 gathers + +declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %target, i8 * %ptr, + <4 x i32> %indices, <4 x i32> %mask, i8 %scale) readonly nounwind +declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %target, i8 * %ptr, + <4 x i64> %indices, <4 x i32> %mask, i8 %scale) readonly nounwind + +define <4 x i32> @__gather_base_offsets32_i32(i8 * %ptr, + i32 %scale, <4 x i32> %offsets, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + + %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8 * %ptr, + <4 x i32> %offsets, <4 x i32> %vecmask, i8 %scale8) + ret <4 x i32> %v +} + + +define <4 x i32> @__gather_base_offsets64_i32(i8 * %ptr, + i32 %scale, <4 x i64> %offsets, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + + %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8 * %ptr, + <4 x i64> %offsets, <4 x i32> %vecmask, i8 %scale8) + + ret <4 x i32> %v +} + + +define <4 x i32> @__gather32_i32(<4 x i32> %ptrs, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + + %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8 * null, + <4 x i32> %ptrs, <4 x i32> %vecmask, i8 1) + + ret <4 x i32> %v +} + + +define <4 x i32> @__gather64_i32(<4 x i64> %ptrs, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + + %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8 * null, + <4 x i64> %ptrs, <4 x i32> %vecmask, i8 1) + + ret <4 x i32> %v +} + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; float gathers + +declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %target, i8 * %ptr, + <4 x i32> %indices, <4 x float> %mask, i8 %scale8) readonly nounwind +declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %target, i8 * %ptr, + <4 x i64> %indices, <4 x float> %mask, i8 %scale8) readonly nounwind + +define <4 x float> @__gather_base_offsets32_float(i8 * %ptr, + i32 %scale, <4 x i32> %offsets, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + %mask = bitcast <4 x i32> %vecmask to <4 x float> + + %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8 * %ptr, + <4 x i32> %offsets, <4 x float> %mask, i8 %scale8) + + ret <4 x float> %v +} + + +define <4 x float> @__gather_base_offsets64_float(i8 * %ptr, + i32 %scale, <4 x i64> %offsets, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + %mask = bitcast <4 x i32> %vecmask to <4 x float> + + %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8 * %ptr, + <4 x i64> %offsets, <4 x float> %mask, i8 %scale8) + + ret <4 x float> %v +} + + +define <4 x float> @__gather32_float(<4 x i32> %ptrs, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + %mask = bitcast <4 x i32> %vecmask to <4 x float> + + %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8 * null, + <4 x i32> %ptrs, <4 x float> %mask, i8 1) + + ret <4 x float> %v +} + + +define <4 x float> @__gather64_float(<4 x i64> %ptrs, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %vecmask = trunc <4 x i64> %vecmask64 to <4 x i32> + %mask = bitcast <4 x i32> %vecmask to <4 x float> + + %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8 * null, + <4 x i64> %ptrs, <4 x float> %mask, i8 1) + + ret <4 x float> %v +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int64 gathers + +declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %target, i8 * %ptr, + <4 x i32> %indices, <4 x i64> %mask, i8 %scale) readonly nounwind +declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %target, i8 * %ptr, + <4 x i64> %indices, <4 x i64> %mask, i8 %scale) readonly nounwind + +define <4 x i64> @__gather_base_offsets32_i64(i8 * %ptr, + i32 %scale, <4 x i32> %offsets, + <4 x i64> %vecmask) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + + %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8 * %ptr, + <4 x i32> %offsets, <4 x i64> %vecmask, i8 %scale8) + + ret <4 x i64> %v +} + + +define <4 x i64> @__gather_base_offsets64_i64(i8 * %ptr, + i32 %scale, <4 x i64> %offsets, + <4 x i64> %vecmask) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + + %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8 * %ptr, + <4 x i64> %offsets, <4 x i64> %vecmask, i8 %scale8) + + ret <4 x i64> %v +} + + +define <4 x i64> @__gather32_i64(<4 x i32> %ptrs, + <4 x i64> %vecmask) nounwind readonly alwaysinline { + + %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8 * null, + <4 x i32> %ptrs, <4 x i64> %vecmask, i8 1) + ret <4 x i64> %v +} + + +define <4 x i64> @__gather64_i64(<4 x i64> %ptrs, + <4 x i64> %vecmask) nounwind readonly alwaysinline { + %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8 * null, + <4 x i64> %ptrs, <4 x i64> %vecmask, i8 1) + ret <4 x i64> %v +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; double gathers + +declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %target, i8 * %ptr, + <4 x i64> %indices, <4 x double> %mask, i8 %scale) readonly nounwind +declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %target, i8 * %ptr, + <4 x i32> %indices, <4 x double> %mask, i8 %scale) readonly nounwind + +define <4 x double> @__gather_base_offsets32_double(i8 * %ptr, + i32 %scale, <4 x i32> %offsets, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + %vecmask = bitcast <4 x i64> %vecmask64 to <4 x double> + + %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8 * %ptr, + <4 x i32> %offsets, <4 x double> %vecmask, i8 %scale8) + ret <4 x double> %v +} + +define <4 x double> @__gather_base_offsets64_double(i8 * %ptr, + i32 %scale, <4 x i64> %offsets, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %scale8 = trunc i32 %scale to i8 + %vecmask = bitcast <4 x i64> %vecmask64 to <4 x double> + + %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8 * %ptr, + <4 x i64> %offsets, <4 x double> %vecmask, i8 %scale8) + + ret <4 x double> %v +} + +define <4 x double> @__gather32_double(<4 x i32> %ptrs, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %vecmask = bitcast <4 x i64> %vecmask64 to <4 x double> + + %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8 * null, + <4 x i32> %ptrs, <4 x double> %vecmask, i8 1) + + ret <4 x double> %v +} + +define <4 x double> @__gather64_double(<4 x i64> %ptrs, + <4 x i64> %vecmask64) nounwind readonly alwaysinline { + %vecmask = bitcast <4 x i64> %vecmask64 to <4 x double> + + %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8 * null, + <4 x i64> %ptrs, <4 x double> %vecmask, i8 1) + + ret <4 x double> %v +} + +') diff --git a/ispc.cpp b/ispc.cpp index 41adffe4..db4c161a 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -507,6 +507,25 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #if !defined(LLVM_3_1) // LLVM 3.2+ only this->m_hasRand = true; +#endif + } + else if (!strcasecmp(isa, "avx1.1-i64x4")) { + this->m_isa = Target::AVX11; + this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_vectorWidth = 4; + this->m_attributes = "+avx,+popcnt,+cmov,+f16c" +#if defined(LLVM_3_4) + ",+rdrnd" +#else + ",+rdrand" +#endif + ; + this->m_maskingIsFree = false; + this->m_maskBitCount = 64; + this->m_hasHalf = true; +#if !defined(LLVM_3_1) + // LLVM 3.2+ only + this->m_hasRand = true; #endif } else if (!strcasecmp(isa, "avx2") || @@ -555,6 +574,29 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : // LLVM 3.2+ only this->m_hasRand = true; this->m_hasGather = true; +#endif + } + else if (!strcasecmp(isa, "avx2-i64x4")) { + this->m_isa = Target::AVX2; + this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_vectorWidth = 4; + this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" +#if defined(LLVM_3_4) + ",+rdrnd" +#else + ",+rdrand" +#endif +#ifndef LLVM_3_1 + ",+fma" +#endif // !LLVM_3_1 + ; + this->m_maskingIsFree = false; + this->m_maskBitCount = 64; + this->m_hasHalf = true; +#if !defined(LLVM_3_1) + // LLVM 3.2+ only + this->m_hasRand = true; + this->m_hasGather = true; #endif } #ifdef ISPC_ARM_ENABLED @@ -715,8 +757,8 @@ Target::SupportedTargets() { "sse2-i32x4, sse2-i32x8, " "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, " "avx1-i32x8, avx1-i32x16, avx1-i64x4, " - "avx1.1-i32x8, avx1.1-i32x16, " - "avx2-i32x8, avx2-i32x16, " + "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 " + "avx2-i32x8, avx2-i32x16, avx2-i64x4, " "generic-x1, generic-x4, generic-x8, generic-x16, " "generic-x32, generic-x64"; } From fb1a2a0a40f1ae3436fa2ed31c2daf8d90087e2b Mon Sep 17 00:00:00 2001 From: evghenii Date: Tue, 15 Oct 2013 17:10:46 +0300 Subject: [PATCH 03/17] __masked_store_* uses vscatter now, and is thread-safe --- examples/intrinsics/knc-i1x16.h | 51 +++++++++++---------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index 78d35ddc..d6bf6fd5 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -2009,43 +2009,37 @@ static FORCEINLINE void __masked_store_i16(void *p, __vec16_i16 val, ptr[i] = val[i]; } -static FORCEINLINE void __masked_store_i32(void *p, __vec16_i32 val, __vec16_i1 mask) +static FORCEINLINE void __masked_store_i32(void *p, const __vec16_i32 val, const __vec16_i1 mask) { #ifdef ISPC_FORCE_ALIGNED_MEMORY _mm512_mask_store_epi32(p, mask, val.v); #else - __vec16_i32 tmp; - tmp.v = _mm512_extloadunpacklo_epi32(tmp.v, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE); - tmp.v = _mm512_extloadunpackhi_epi32(tmp.v, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE); - tmp.v = _mm512_mask_mov_epi32(tmp.v, mask, val.v); - _mm512_extpackstorelo_epi32(p, tmp.v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); - _mm512_extpackstorehi_epi32((uint8_t*)p+64, tmp.v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); + _mm512_mask_i32extscatter_epi32(p, mask, __ispc_stride1, val, _MM_DOWNCONV_EPI32_NONE, _MM_SCALE_4, _MM_HINT_NONE); #endif } -static FORCEINLINE void __masked_store_float(void *p, __vec16_f val, __vec16_i1 mask) +static FORCEINLINE void __masked_store_float(void *p, const __vec16_f val, const __vec16_i1 mask) { #ifdef ISPC_FORCE_ALIGNED_MEMORY _mm512_mask_store_ps(p, mask, val.v); #else - __vec16_f tmp; - tmp.v = _mm512_extloadunpacklo_ps(tmp.v, p, _MM_UPCONV_PS_NONE, _MM_HINT_NONE); - tmp.v = _mm512_extloadunpackhi_ps(tmp.v, (uint8_t*)p+64, _MM_UPCONV_PS_NONE, _MM_HINT_NONE); - tmp.v = _mm512_mask_mov_ps(tmp.v, mask, val.v); - _mm512_extpackstorelo_ps(p, tmp.v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE); - _mm512_extpackstorehi_ps((uint8_t*)p+64, tmp.v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE); + _mm512_mask_i32extscatter_ps(p, mask, __ispc_stride1, val, _MM_DOWNCONV_PS_NONE, _MM_SCALE_4, _MM_HINT_NONE); #endif } -static FORCEINLINE void __masked_store_i64(void *p, __vec16_i64 val, - __vec16_i1 mask) { - int64_t *ptr = (int64_t *)p; - for (int i = 0; i < 16; ++i) - if ((mask.v & (1 << i)) != 0) - ptr[i] = val[i]; +static FORCEINLINE void __masked_store_i64(void *p, const __vec16_i64 val, const __vec16_i1 mask) { +#ifdef ISPC_FORCE_ALIGNED_MEMORY + __vec16_i1 tmp_m = mask; + tmp_m = _mm512_kswapb(tmp_m, tmp_m); + _mm512_mask_store_epi64(p, mask, val.v1); + _mm512_mask_store_epi64((uint8_t*)p+64, tmp_m, val.v2); +#else + _mm512_mask_i32loextscatter_epi64( p, mask, __ispc_stride1, val.v1, _MM_DOWNCONV_EPI64_NONE, _MM_SCALE_8, _MM_HINT_NONE); + _mm512_mask_i32loextscatter_epi64((int64_t*)p+8, _mm512_kswapb(mask,mask), __ispc_stride1, val.v2, _MM_DOWNCONV_EPI64_NONE, _MM_SCALE_8, _MM_HINT_NONE); +#endif } -static FORCEINLINE void __masked_store_double(void *p, __vec16_d val, __vec16_i1 mask) +static FORCEINLINE void __masked_store_double(void *p, const __vec16_d val, const __vec16_i1 mask) { #ifdef ISPC_FORCE_ALIGNED_MEMORY __vec16_i1 tmp_m = mask; @@ -2053,19 +2047,8 @@ static FORCEINLINE void __masked_store_double(void *p, __vec16_d val, __vec16_i1 _mm512_mask_store_pd(p, mask, val.v1); _mm512_mask_store_pd((uint8_t*)p+64, tmp_m, val.v2); #else - __vec16_d tmp; - __vec16_i1 tmp_m = mask; - tmp_m = _mm512_kswapb(tmp_m, tmp_m); - tmp.v1 = _mm512_extloadunpacklo_pd(tmp.v1, p, _MM_UPCONV_PD_NONE, _MM_HINT_NONE); - tmp.v1 = _mm512_extloadunpackhi_pd(tmp.v1, (uint8_t*)p+64, _MM_UPCONV_PD_NONE, _MM_HINT_NONE); - tmp.v2 = _mm512_extloadunpacklo_pd(tmp.v2, (uint8_t*)p+64, _MM_UPCONV_PD_NONE, _MM_HINT_NONE); - tmp.v2 = _mm512_extloadunpackhi_pd(tmp.v2, (uint8_t*)p+128, _MM_UPCONV_PD_NONE, _MM_HINT_NONE); - tmp.v1 = _mm512_mask_mov_pd(tmp.v1, mask, val.v1); - tmp.v2 = _mm512_mask_mov_pd(tmp.v2, tmp_m, val.v2); - _mm512_extpackstorelo_pd(p, tmp.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE); - _mm512_extpackstorehi_pd((uint8_t*)p+64, tmp.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE); - _mm512_extpackstorelo_pd((uint8_t*)p+64, tmp.v2, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE); - _mm512_extpackstorehi_pd((uint8_t*)p+128, tmp.v2, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE); + _mm512_mask_i32loextscatter_pd( p, mask, __ispc_stride1, val.v1, _MM_DOWNCONV_PD_NONE, _MM_SCALE_8, _MM_HINT_NONE); + _mm512_mask_i32loextscatter_pd((double*)p+8, _mm512_kswapb(mask,mask), __ispc_stride1, val.v2, _MM_DOWNCONV_PD_NONE, _MM_SCALE_8, _MM_HINT_NONE); #endif } From 1710b9171fa54f40165a0917ec764e70951b9841 Mon Sep 17 00:00:00 2001 From: egaburov Date: Fri, 18 Oct 2013 08:53:01 +0200 Subject: [PATCH 04/17] removed LLVM_3_0 legacy part and changed copyright to 2013 --- builtins/target-avx11-i64x4.ll | 10 ++-------- builtins/target-avx2-i64x4.ll | 22 ++++------------------ 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/builtins/target-avx11-i64x4.ll b/builtins/target-avx11-i64x4.ll index aae612bb..8fe75266 100644 --- a/builtins/target-avx11-i64x4.ll +++ b/builtins/target-avx11-i64x4.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2012, Intel Corporation +;; Copyright (c) 2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -31,8 +31,7 @@ include(`target-avx1-i64x4base.ll') -ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', - LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', +ifelse(LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -74,10 +73,6 @@ gen_gather(double) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float/half conversions -ifelse(LLVM_VERSION, `LLVM_3_0', ` -;; nothing to define... -', ` - define(`expand_4to8', ` %$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> ') @@ -123,4 +118,3 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone { %r = extractelement <8 x i16> %rv, i32 0 ret i16 %r } -') diff --git a/builtins/target-avx2-i64x4.ll b/builtins/target-avx2-i64x4.ll index cdd10386..d74f32dc 100644 --- a/builtins/target-avx2-i64x4.ll +++ b/builtins/target-avx2-i64x4.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -29,14 +29,12 @@ ;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -ifelse(LLVM_VERSION, `LLVM_3_0', `', - LLVM_VERSION, `LLVM_3_1', `', +ifelse(LLVM_VERSION, `LLVM_3_1', `', `define(`HAVE_GATHER', `1')') include(`target-avx1-i64x4base.ll') -ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', - LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', +ifelse(LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -77,10 +75,6 @@ define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly a -ifelse(LLVM_VERSION, `LLVM_3_0', ` -;; nothing to define... -', ` - define(`expand_4to8', ` %$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> ') @@ -126,7 +120,6 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone { %r = extractelement <8 x i16> %rv, i32 0 ret i16 %r } -') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather @@ -134,14 +127,7 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone { declare void @llvm.trap() noreturn nounwind -ifelse(LLVM_VERSION, `LLVM_3_0', ` -gen_gather_factored(i8) -gen_gather_factored(i16) -gen_gather_factored(i32) -gen_gather_factored(float) -gen_gather_factored(i64) -gen_gather_factored(double)', -LLVM_VERSION, `LLVM_3_1', ` +ifelse(LLVM_VERSION, `LLVM_3_1', ` gen_gather_factored(i8) gen_gather_factored(i16) gen_gather_factored(i32) From 2e724b095e030b2d548758b965529735f184ca43 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 7 Oct 2013 15:43:31 +0400 Subject: [PATCH 05/17] support of operators --- expr.cpp | 135 ++++++++++++++++++++++++++++++------------ expr.h | 2 + lex.ll | 8 +++ parse.yy | 14 ++--- tests/operators.ispc | 70 ++++++++++++++++++++++ tests/operators1.ispc | 64 ++++++++++++++++++++ tests/operators2.ispc | 51 ++++++++++++++++ 7 files changed, 299 insertions(+), 45 deletions(-) create mode 100644 tests/operators.ispc create mode 100644 tests/operators1.ispc create mode 100644 tests/operators2.ispc diff --git a/expr.cpp b/expr.cpp index 614cb5e5..c92503e0 100644 --- a/expr.cpp +++ b/expr.cpp @@ -1660,6 +1660,64 @@ BinaryExpr::BinaryExpr(Op o, Expr *a, Expr *b, SourcePos p) arg1 = b; } +Expr *lCreateBinaryOperatorCall(const BinaryExpr::Op bop, + Expr *a0, Expr *a1, + const SourcePos &sp) +{ + if ((a0 == NULL) || (a1 == NULL)) { + return NULL; + } + Expr *arg0 = a0->TypeCheck(); + Expr *arg1 = a1->TypeCheck(); + if ((arg0 == NULL) || (arg1 == NULL)) { + return NULL; + } + const Type *type0 = arg0->GetType(); + const Type *type1 = arg1->GetType(); + + // If either operand is a reference, dereference it before we move + // forward + if (CastType(type0) != NULL) { + arg0 = new RefDerefExpr(arg0, arg0->pos); + type0 = arg0->GetType(); + } + if (CastType(type1) != NULL) { + arg1 = new RefDerefExpr(arg1, arg1->pos); + type1 = arg1->GetType(); + } + if ((type0 == NULL) || (type1 == NULL)) { + return NULL; + } + if (CastType(type0) != NULL || + CastType(type1) != NULL) { + std::string opName = std::string("operator") + lOpString(bop); + std::vector funs; + m->symbolTable->LookupFunction(opName.c_str(), &funs); + if (funs.size() == 0) { + Error(sp, "operator %s(%s, %s) is not defined.", + opName.c_str(), (type0->GetString()).c_str(), (type1->GetString()).c_str()); + return NULL; + } + Expr *func = new FunctionSymbolExpr(opName.c_str(), funs, sp); + ExprList *args = new ExprList(sp); + args->exprs.push_back(arg0); + args->exprs.push_back(arg1); + Expr *opCallExpr = new FunctionCallExpr(func, args, sp); + return opCallExpr; + } + return NULL; +} + + +Expr * MakeBinaryExpr(BinaryExpr::Op o, Expr *a, Expr *b, SourcePos p) { + Expr * op = lCreateBinaryOperatorCall(o, a, b, p); + if (op != NULL) { + return op; + } + op = new BinaryExpr(o, a, b, p); + return op; +} + /** Emit code for a && or || logical operator. In particular, the code here handles "short-circuit" evaluation, where the second expression @@ -2985,29 +3043,10 @@ AssignExpr::TypeCheck() { if (lvalueIsReference) lvalue = new RefDerefExpr(lvalue, lvalue->pos); - FunctionSymbolExpr *fse; - if ((fse = dynamic_cast(rvalue)) != NULL) { - // Special case to use the type of the LHS to resolve function - // overloads when we're assigning a function pointer where the - // function is overloaded. - const Type *lvalueType = lvalue->GetType(); - const FunctionType *ftype; - if (CastType(lvalueType) == NULL || - (ftype = CastType(lvalueType->GetBaseType())) == NULL) { - Error(lvalue->pos, "Can't assign function pointer to type \"%s\".", - lvalueType ? lvalueType->GetString().c_str() : ""); - return NULL; - } - - std::vector paramTypes; - for (int i = 0; i < ftype->GetNumParameters(); ++i) - paramTypes.push_back(ftype->GetParameterType(i)); - - if (!fse->ResolveOverloads(rvalue->pos, paramTypes)) { - Error(pos, "Unable to find overloaded function for function " - "pointer assignment."); - return NULL; - } + if (PossiblyResolveFunctionOverloads(rvalue, lvalue->GetType()) == false) { + Error(pos, "Unable to find overloaded function for function " + "pointer assignment."); + return NULL; } const Type *lhsType = lvalue->GetType(); @@ -3650,10 +3689,37 @@ FunctionCallExpr::GetLValue(FunctionEmitContext *ctx) const { return NULL; } } - + + +bool FullResolveOverloads(Expr * func, ExprList * args, + std::vector *argTypes, + std::vector *argCouldBeNULL, + std::vector *argIsConstant) { + for (unsigned int i = 0; i < args->exprs.size(); ++i) { + Expr *expr = args->exprs[i]; + if (expr == NULL) + return false; + const Type *t = expr->GetType(); + if (t == NULL) + return false; + argTypes->push_back(t); + argCouldBeNULL->push_back(lIsAllIntZeros(expr) || dynamic_cast(expr)); + argIsConstant->push_back(dynamic_cast(expr) || dynamic_cast(expr)); + } + return true; +} + const Type * FunctionCallExpr::GetType() const { + std::vector argTypes; + std::vector argCouldBeNULL, argIsConstant; + if (FullResolveOverloads(func, args, &argTypes, &argCouldBeNULL, &argIsConstant) == true) { + FunctionSymbolExpr *fse = dynamic_cast(func); + if (fse != NULL) { + fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL, &argIsConstant); + } + } const FunctionType *ftype = lGetFunctionType(func); return ftype ? ftype->GetReturnType() : NULL; } @@ -3689,20 +3755,9 @@ FunctionCallExpr::TypeCheck() { std::vector argTypes; std::vector argCouldBeNULL, argIsConstant; - for (unsigned int i = 0; i < args->exprs.size(); ++i) { - Expr *expr = args->exprs[i]; - if (expr == NULL) - return NULL; - const Type *t = expr->GetType(); - if (t == NULL) - return NULL; - - argTypes.push_back(t); - argCouldBeNULL.push_back(lIsAllIntZeros(expr) || - dynamic_cast(expr)); - argIsConstant.push_back(dynamic_cast(expr) || - dynamic_cast(expr)); + if (FullResolveOverloads(func, args, &argTypes, &argCouldBeNULL, &argIsConstant) == false) { + return NULL; } FunctionSymbolExpr *fse = dynamic_cast(func); @@ -7010,7 +7065,8 @@ TypeCastExpr::GetLValue(FunctionEmitContext *ctx) const { const Type * TypeCastExpr::GetType() const { - AssertPos(pos, type->HasUnboundVariability() == false); + // We have to switch off this assert after supporting of operators. + //AssertPos(pos, type->HasUnboundVariability() == false); return type; } @@ -8190,6 +8246,9 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos, const std::vector *argCouldBeNULL, const std::vector *argIsConstant) { const char *funName = candidateFunctions.front()->name.c_str(); + if (triedToResolve == true) { + return true; + } triedToResolve = true; diff --git a/expr.h b/expr.h index 42fdff45..f8b96abd 100644 --- a/expr.h +++ b/expr.h @@ -730,6 +730,8 @@ bool CanConvertTypes(const Type *fromType, const Type *toType, */ Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase); +Expr * MakeBinaryExpr(BinaryExpr::Op o, Expr *a, Expr *b, SourcePos p); + /** Utility routine that emits code to initialize a symbol given an initializer expression. diff --git a/lex.ll b/lex.ll index 3655220f..87a80145 100644 --- a/lex.ll +++ b/lex.ll @@ -419,6 +419,14 @@ while { RT; return TOKEN_WHILE; } \"C\" { RT; return TOKEN_STRING_C_LITERAL; } \.\.\. { RT; return TOKEN_DOTDOTDOT; } +"operator*" { return TOKEN_IDENTIFIER; } +"operator+" { return TOKEN_IDENTIFIER; } +"operator-" { return TOKEN_IDENTIFIER; } +"operator<<" { return TOKEN_IDENTIFIER; } +"operator>>" { return TOKEN_IDENTIFIER; } +"operator/" { return TOKEN_IDENTIFIER; } +"operator%" { return TOKEN_IDENTIFIER; } + L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERAL; } {IDENT} { diff --git a/parse.yy b/parse.yy index 933a3455..38c5ba77 100644 --- a/parse.yy +++ b/parse.yy @@ -468,27 +468,27 @@ cast_expression multiplicative_expression : cast_expression | multiplicative_expression '*' cast_expression - { $$ = new BinaryExpr(BinaryExpr::Mul, $1, $3, Union(@1, @3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Mul, $1, $3, Union(@1, @3)); } | multiplicative_expression '/' cast_expression - { $$ = new BinaryExpr(BinaryExpr::Div, $1, $3, Union(@1, @3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Div, $1, $3, Union(@1, @3)); } | multiplicative_expression '%' cast_expression - { $$ = new BinaryExpr(BinaryExpr::Mod, $1, $3, Union(@1, @3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Mod, $1, $3, Union(@1, @3)); } ; additive_expression : multiplicative_expression | additive_expression '+' multiplicative_expression - { $$ = new BinaryExpr(BinaryExpr::Add, $1, $3, Union(@1, @3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Add, $1, $3, Union(@1, @3)); } | additive_expression '-' multiplicative_expression - { $$ = new BinaryExpr(BinaryExpr::Sub, $1, $3, Union(@1, @3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Sub, $1, $3, Union(@1, @3)); } ; shift_expression : additive_expression | shift_expression TOKEN_LEFT_OP additive_expression - { $$ = new BinaryExpr(BinaryExpr::Shl, $1, $3, Union(@1,@3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Shl, $1, $3, Union(@1, @3)); } | shift_expression TOKEN_RIGHT_OP additive_expression - { $$ = new BinaryExpr(BinaryExpr::Shr, $1, $3, Union(@1,@3)); } + { $$ = MakeBinaryExpr(BinaryExpr::Shr, $1, $3, Union(@1, @3)); } ; relational_expression diff --git a/tests/operators.ispc b/tests/operators.ispc new file mode 100644 index 00000000..95502bdd --- /dev/null +++ b/tests/operators.ispc @@ -0,0 +1,70 @@ + +export uniform int width() { return programCount; } + +struct S { + float a; +}; + +// References "struct&" were put in random order to test them. +struct S operator*(struct S& rr, struct S rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S operator/(struct S& rr, struct S& rv) { + struct S c; + c.a = rr.a - rr.a + 2; + return c; +} + +struct S operator%(struct S rr, struct S& rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S operator+(struct S rr, struct S rv) { + struct S c; + c.a = rr.a / rv.a + 3; + return c; +} + +struct S operator-(struct S rr, struct S& rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S operator>>(struct S& rr, struct S rv) { + struct S c; + c.a = rr.a / rv.a + 3; + return c; +} + +struct S operator<<(struct S& rr, struct S& rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S a, a1; +struct S b, b1; +struct S d1, d2, d3, d4, d5, d6, d7; + +export void f_f(uniform float RET[], uniform float aFOO[]) { + a.a = aFOO[programIndex]; + b.a = -aFOO[programIndex]; + d1 = a * b; + d2 = a / b; + d3 = a % b; + d4 = a + b; + d5 = a - b; + d6 = a >> b; + d7 = a << b; + RET[programIndex] = d1.a + d2.a + d3.a + d4.a + d5.a + d6.a + d7.a; +} + +export void result(uniform float RET[4]) { + RET[programIndex] = 14; +} diff --git a/tests/operators1.ispc b/tests/operators1.ispc new file mode 100644 index 00000000..f52c4c35 --- /dev/null +++ b/tests/operators1.ispc @@ -0,0 +1,64 @@ + +export uniform int width() { return programCount; } + +struct S { + float a; +}; + +// References "struct&" were put in random order to test them. +struct S operator*(struct S& rr, struct S rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S operator/(struct S& rr, struct S& rv) { + struct S c; + c.a = rr.a - rr.a + 2; + return c; +} + +struct S operator%(struct S rr, struct S& rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S operator+(struct S rr, struct S rv) { + struct S c; + c.a = rr.a / rv.a + 3; + return c; +} + +struct S operator-(struct S rr, struct S& rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S operator>>(struct S& rr, struct S rv) { + struct S c; + c.a = rr.a / rv.a + 3; + return c; +} + +struct S operator<<(struct S& rr, struct S& rv) { + struct S c; + c.a = rr.a + rv.a + 2; + return c; +} + +struct S a; +struct S b; +struct S d; + +export void f_f(uniform float RET[], uniform float aFOO[]) { + a.a = 5; + b.a = -5; + d = a * b + b / a - a << (b - b) - a; + RET[programIndex] = d.a; +} + +export void result(uniform float RET[4]) { + RET[programIndex] = 12; +} diff --git a/tests/operators2.ispc b/tests/operators2.ispc new file mode 100644 index 00000000..b732b24a --- /dev/null +++ b/tests/operators2.ispc @@ -0,0 +1,51 @@ +int off; + +export uniform int width() { return programCount; } + +struct S { + float a; +}; + +struct S operator+(struct S rr, struct S rv) { + struct S c; + c.a = rr.a / rv.a + 3; + if (off == 1) + c.a = 22; + return c; +} + +struct S operator/(struct S rr, struct S rv) { + struct S c; + c.a = rr.a + rv.a + 10; + if (off == 1) + c.a = 33; + return c; +} + +struct S a; +struct S b; +struct S d; + +export void f_f(uniform float RET[], uniform float aFOO[]) { + int T = programIndex; + a.a = aFOO[programIndex]; + b.a = -aFOO[programIndex]; + if (programIndex == 3) + off = 1; + else + off = 0; + if (T % 2) + d = a + b; + else + d = a / b; + + RET[programIndex] = d.a; +} + +export void result(uniform float RET[4]) { + if (programIndex % 2) + RET[programIndex] = 2; + else + RET[programIndex] = 10; + RET[3] = 22; +} From 2117002c0129fe54a77e0138bfb715a246fd1121 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Thu, 17 Oct 2013 23:35:21 +0400 Subject: [PATCH 06/17] Adding testing support for avx1.1-i64x4 and avx2-i64x4 targets --- alloy.py | 12 ++++++------ run_tests.py | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/alloy.py b/alloy.py index 19497b35..cdfc9127 100755 --- a/alloy.py +++ b/alloy.py @@ -212,10 +212,10 @@ def check_targets(): answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] if AVX11 == False and "rdrand" in f_lines[i]: AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"] + answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] if AVX2 == False and "avx2" in f_lines[i]: AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16"] + answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] if current_OS == "MacOS": f_lines = take_lines("sysctl machdep.cpu.features", "first") if "SSE2" in f_lines: @@ -229,10 +229,10 @@ def check_targets(): answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] if "RDRAND" in f_lines: AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"] + answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] if "AVX2.0" in f_lines: AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16"] + answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"] # now check what targets we have with the help of SDE @@ -257,9 +257,9 @@ def check_targets(): if AVX == False and "snb" in f_lines[i]: answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]] if AVX11 == False and "ivb" in f_lines[i]: - answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]] + answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]] if AVX2 == False and "hsw" in f_lines[i]: - answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]] + answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]] return [answer, answer_sde] def build_ispc(version_LLVM, make): diff --git a/run_tests.py b/run_tests.py index b5391e1f..40851a40 100755 --- a/run_tests.py +++ b/run_tests.py @@ -454,8 +454,9 @@ def verify(): check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"], ["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"], ["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8", - "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16", - "avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8", + "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", + "avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4", + "generic-1", "generic-4", "generic-8", "generic-16", "generic-32", "generic-64"]] for i in range (0,len(f_lines)): if f_lines[i][0] == "%": From 1bd5b704c639595f9081e3d75da13a76c9621291 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 18 Oct 2013 01:15:35 +0400 Subject: [PATCH 07/17] Adding support for build on Windows for avx1.1-i64x4 and avx2-i64x4 --- ispc.vcxproj | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/ispc.vcxproj b/ispc.vcxproj index 58fa5b08..b9a3b6c5 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -28,10 +28,14 @@ + + + + @@ -323,6 +327,24 @@ Building gen-bitcode-avx11-x2-64bit.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp + $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx11-i64x4-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx11-i64x4-64bit.cpp + + Document @@ -359,6 +381,24 @@ Building gen-bitcode-avx2-x2-64bit.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp + $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx2-i64x4-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx2-i64x4-64bit.cpp + + Document From 6244902931ddc9c2e1a272954d5339cfbc328414 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 18 Oct 2013 01:16:25 +0400 Subject: [PATCH 08/17] Updating fail_db with new Windows fails --- fail_db.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/fail_db.txt b/fail_db.txt index f1aaaab2..9c43c7f0 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -1025,3 +1025,38 @@ ./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * From c378429ffba43c51e60337e05cb45eecfafbb144 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Fri, 18 Oct 2013 19:45:39 +0400 Subject: [PATCH 09/17] time in alloy --- alloy.py | 31 ++++++++++++++++++++++++------- run_tests.py | 7 ++++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/alloy.py b/alloy.py index dbdc40aa..f18acf71 100755 --- a/alloy.py +++ b/alloy.py @@ -274,27 +274,38 @@ def build_ispc(version_LLVM, make): def execute_stability(stability, R, print_version): stability1 = copy.deepcopy(stability) - temp = run_tests.run_tests(stability1, [], print_version) + b_temp = run_tests.run_tests(stability1, [], print_version) + temp = b_temp[0] + time = b_temp[1] for j in range(0,4): R[j][0] = R[j][0] + temp[j] for i in range(0,len(temp[j])): R[j][1].append(temp[4]) number_of_fails = temp[5] number_of_new_fails = len(temp[0]) + len(temp[1]) + number_of_passes = len(temp[2]) + len(temp[3]) if number_of_fails == 0: str_fails = ". No fails" else: str_fails = ". Fails: " + str(number_of_fails) if number_of_new_fails == 0: - str_new_fails = ", No new fails.\n" + str_new_fails = ", No new fails" else: - str_new_fails = ", New fails: " + str(number_of_new_fails) + ".\n" - print_debug(temp[4][1:-3] + str_fails + str_new_fails, False, stability_log) + str_new_fails = ", New fails: " + str(number_of_new_fails) + if number_of_passes == 0: + str_new_passes = "." + else: + str_new_passes = ", " + str(number_of_passes) + " new passes." + if stability.time: + str_time = " " + time + "\n" + else: + str_time = "\n" + print_debug(temp[4][1:-3] + str_fails + str_new_fails + str_new_passes + str_time, False, stability_log) def run_special_tests(): i = 5 -def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm): +def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm, time): os.chdir(os.environ["ISPC_HOME"]) os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"] if options.notify != "": @@ -322,7 +333,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, stability.compiler_exe = None stability.num_jobs = speed_number stability.verbose = False - stability.time = False + stability.time = time stability.non_interactive = True stability.update = update stability.include_file = None @@ -572,13 +583,17 @@ def Main(): if options.branch == "master": options.branch = "trunk" try: + start_time = time.time() if options.build_llvm: build_LLVM(options.version, options.revision, options.folder, options.tarball, options.debug, options.selfbuild, options.extra, False, options.force, make) if options.validation_run: validation_run(options.only, options.only_targets, options.branch, options.number_for_performance, options.notify, options.update, int(options.speed), - make, options.perf_llvm) + make, options.perf_llvm, options.time) + elapsed_time = time.time() - start_time + if options.time: + print_debug("Elapsed time: " + time.strftime('%Hh%Mm%Ssec.', time.gmtime(elapsed_time)) + "\n", False, "") finally: os.chdir(current_path) date_name = "alloy_results_" + datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') @@ -668,6 +683,8 @@ run_group.add_option('--update-errors', dest='update', run_group.add_option('--only-targets', dest='only_targets', help='set list of targets to test. Possible values - all subnames of targets.', default="") +run_group.add_option('--time', dest='time', + help='display time of testing', default=False, action='store_true') run_group.add_option('--only', dest='only', help='set types of tests. Possible values:\n' + '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + diff --git a/run_tests.py b/run_tests.py index bf4f5d8a..e53f6419 100755 --- a/run_tests.py +++ b/run_tests.py @@ -649,7 +649,8 @@ def run_tests(options1, args, print_version): if options.non_interactive == False: print_debug("\n", s, run_tests_log) - elapsed_time = time.time() - start_time + temp_time = (time.time() - start_time) + elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time)) while not qret.empty(): (c, r, skip) = qret.get() @@ -684,9 +685,9 @@ def run_tests(options1, args, print_version): R = 0 if options.time: - print_debug("Elapsed time: %d s\n" % elapsed_time, s, run_tests_log) + print_debug("Elapsed time: " + elapsed_time + "\n", s, run_tests_log) - return R + return [R, elapsed_time] from optparse import OptionParser From d72590ede6c18cb06141f0f5c640eefa9ffd6701 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 21 Oct 2013 12:35:53 +0400 Subject: [PATCH 10/17] correction errors in generic targets after operators support --- cbackend.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 7d4b4cfc..814a4016 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -558,8 +558,15 @@ char CWriter::ID = 0; static std::string CBEMangle(const std::string &S) { std::string Result; - for (unsigned i = 0, e = S.size(); i != e; ++i) - if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') { + for (unsigned i = 0, e = S.size(); i != e; ++i) { + if (i+1 != e && ((S[i] == '>' && S[i+1] == '>') || + (S[i] == '<' && S[i+1] == '<'))) { + Result += '_'; + Result += 'A'+(S[i]&15); + Result += 'A'+((S[i]>>4)&15); + Result += '_'; + i++; + } else if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') { Result += S[i]; } else { Result += '_'; @@ -567,6 +574,7 @@ static std::string CBEMangle(const std::string &S) { Result += 'A'+((S[i]>>4)&15); Result += '_'; } + } return Result; } From 78a05777bc6a6d41bfb553cdc66cdf16fc4fc507 Mon Sep 17 00:00:00 2001 From: egaburov Date: Tue, 22 Oct 2013 16:18:40 +0200 Subject: [PATCH 11/17] added taskIndex_x,y,z and taskCount_x,y,z --- func.cpp | 39 +++++++++++++++++++++++++++++++++++++++ func.h | 5 ++++- parse.yy | 17 ++++++++++++++++- type.cpp | 6 ++++++ 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/func.cpp b/func.cpp index b975049b..dea45afc 100644 --- a/func.cpp +++ b/func.cpp @@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) { Assert(taskIndexSym); taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); + + taskIndexSym_x = m->symbolTable->LookupVariable("taskIndex_x"); + Assert(taskIndexSym_x); + taskIndexSym_y = m->symbolTable->LookupVariable("taskIndex_y"); + Assert(taskIndexSym_y); + taskIndexSym_z = m->symbolTable->LookupVariable("taskIndex_z"); + Assert(taskIndexSym_z); + + + taskCountSym_x = m->symbolTable->LookupVariable("taskCount_x"); + Assert(taskCountSym_x); + taskCountSym_y = m->symbolTable->LookupVariable("taskCount_y"); + Assert(taskCountSym_y); + taskCountSym_z = m->symbolTable->LookupVariable("taskCount_z"); + Assert(taskCountSym_z); } else + { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; + taskIndexSym_x = taskIndexSym_y = taskIndexSym_z = NULL; + taskCountSym_x = taskCountSym_y = taskCountSym_z = NULL; + } } @@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; + llvm::Value *taskIndex_x = argIter++; + llvm::Value *taskIndex_y = argIter++; + llvm::Value *taskIndex_z = argIter++; + llvm::Value *taskCount_x = argIter++; + llvm::Value *taskCount_y = argIter++; + llvm::Value *taskCount_z = argIter++; // Copy the function parameter values from the structure into local // storage @@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); + + taskIndexSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_x"); + ctx->StoreInst(taskIndex_x, taskIndexSym_x->storagePtr); + taskIndexSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_y"); + ctx->StoreInst(taskIndex_y, taskIndexSym_y->storagePtr); + taskIndexSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_z"); + ctx->StoreInst(taskIndex_z, taskIndexSym_z->storagePtr); + + taskCountSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_x"); + ctx->StoreInst(taskCount_x, taskCountSym_x->storagePtr); + taskCountSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_y"); + ctx->StoreInst(taskCount_y, taskCountSym_y->storagePtr); + taskCountSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_z"); + ctx->StoreInst(taskCount_z, taskCountSym_z->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index ac3e1447..ee44a6c5 100644 --- a/func.h +++ b/func.h @@ -60,7 +60,10 @@ private: Stmt *code; Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; - Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym_x, *taskCountSym_x; + Symbol *taskIndexSym_y, *taskCountSym_y; + Symbol *taskIndexSym_z, *taskCountSym_z; }; #endif // ISPC_FUNC_H diff --git a/parse.yy b/parse.yy index 38c5ba77..1de4644f 100644 --- a/parse.yy +++ b/parse.yy @@ -2214,9 +2214,24 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskIndexSym = new Symbol("taskIndex", pos, type); m->symbolTable->AddVariable(taskIndexSym); - + Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); + + Symbol *taskIndexSym_x = new Symbol("taskIndex_x", pos, type); + m->symbolTable->AddVariable(taskIndexSym_x); + Symbol *taskIndexSym_y = new Symbol("taskIndex_y", pos, type); + m->symbolTable->AddVariable(taskIndexSym_y); + Symbol *taskIndexSym_z = new Symbol("taskIndex_z", pos, type); + m->symbolTable->AddVariable(taskIndexSym_z); + + + Symbol *taskCountSym_x = new Symbol("taskCount_x", pos, type); + m->symbolTable->AddVariable(taskCountSym_x); + Symbol *taskCountSym_y = new Symbol("taskCount_y", pos, type); + m->symbolTable->AddVariable(taskCountSym_y); + Symbol *taskCountSym_z = new Symbol("taskCount_z", pos, type); + m->symbolTable->AddVariable(taskCountSym_z); } diff --git a/type.cpp b/type.cpp index 5fa1845b..d36c63c2 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_x + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_y + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_z + callTypes.push_back(LLVMTypes::Int32Type); // taskCount_x + callTypes.push_back(LLVMTypes::Int32Type); // taskCount_y + callTypes.push_back(LLVMTypes::Int32Type); // taskCount_z } else // Otherwise we already have the types of the arguments From ade8751442d1dbd427b13eec3a59016cdd01807d Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 23 Oct 2013 08:39:17 +0200 Subject: [PATCH 12/17] taskIndex_x,y,z are passed to the task --- examples/common.mk | 8 ++++---- examples/tasksys.cpp | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/common.mk b/examples/common.mk index db7b8eee..252c1196 100644 --- a/examples/common.mk +++ b/examples/common.mk @@ -1,11 +1,11 @@ -TASK_CXX=../tasksys.cpp +TASK_CXX=../tasksys3d.cpp TASK_LIB=-lpthread -TASK_OBJ=objs/tasksys.o +TASK_OBJ=objs/tasksys3d.o -CXX=clang++ +CXX=g++ -fopenmp CXXFLAGS+=-Iobjs/ -O2 -CC=clang +CC=gcc -fopenmp CCFLAGS+=-Iobjs/ -O2 LIBS=-lm $(TASK_LIB) -lstdc++ diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index c9c2fa7b..d7b524a8 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -59,7 +59,9 @@ #define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_CILK +*/ #define ISPC_USE_OMP +/* #define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_PARALLEL_FOR From f89bad1e945bfeb3f19c3190d87dafcd2e75e405 Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 23 Oct 2013 12:51:06 +0200 Subject: [PATCH 13/17] launch now passes the right info into tasking --- ast.cpp | 3 ++- builtins/util.m4 | 2 +- ctx.cpp | 6 ++++-- ctx.h | 2 +- expr.cpp | 31 +++++++++++++++++---------- expr.h | 5 +++-- lex.ll | 7 ++++++ parse.yy | 56 ++++++++++++++++++++++++++++++++++++++++++++---- 8 files changed, 90 insertions(+), 22 deletions(-) diff --git a/ast.cpp b/ast.cpp index 83ee207d..60b20a80 100644 --- a/ast.cpp +++ b/ast.cpp @@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, else if ((fce = dynamic_cast(node)) != NULL) { fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data); fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data); - fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc, + for (int k = 0; k < 3; k++) + fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc, postFunc, data); } else if ((ie = dynamic_cast(node)) != NULL) { diff --git a/builtins/util.m4 b/builtins/util.m4 index 11501780..c90e8adc 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1813,7 +1813,7 @@ define(`stdlib_core', ` declare i32 @__fast_masked_vload() declare i8* @ISPCAlloc(i8**, i64, i32) nounwind -declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind +declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind diff --git a/ctx.cpp b/ctx.cpp index c50d22f9..3aee776a 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3502,7 +3502,7 @@ FunctionEmitContext::ReturnInst() { llvm::Value * FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector &argVals, - llvm::Value *launchCount) { + llvm::Value *launchCount[3]){ if (callee == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; @@ -3563,7 +3563,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, args.push_back(launchGroupHandlePtr); args.push_back(fptr); args.push_back(voidmem); - args.push_back(launchCount); + args.push_back(launchCount[0]); + args.push_back(launchCount[1]); + args.push_back(launchCount[2]); return CallInst(flaunch, NULL, args, ""); } diff --git a/ctx.h b/ctx.h index 58f9aae3..4dd30053 100644 --- a/ctx.h +++ b/ctx.h @@ -542,7 +542,7 @@ public: he given argument values. */ llvm::Value *LaunchInst(llvm::Value *callee, std::vector &argVals, - llvm::Value *launchCount); + llvm::Value *launchCount[3]); void SyncInst(); diff --git a/expr.cpp b/expr.cpp index c92503e0..60d9ce66 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3540,11 +3540,13 @@ SelectExpr::Print() const { // FunctionCallExpr FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p, - bool il, Expr *lce) + bool il, Expr *lce[3]) : Expr(p), isLaunch(il) { func = f; args = a; - launchCountExpr = lce; + launchCountExpr[0] = lce[0]; + launchCountExpr[1] = lce[1]; + launchCountExpr[2] = lce[2]; } @@ -3662,9 +3664,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *retVal = NULL; ctx->SetDebugPos(pos); if (ft->isTask) { - AssertPos(pos, launchCountExpr != NULL); - llvm::Value *launchCount = launchCountExpr->GetValue(ctx); - if (launchCount != NULL) + AssertPos(pos, launchCountExpr[0] != NULL); + llvm::Value *launchCount[3] = + { launchCountExpr[0]->GetValue(ctx), + launchCountExpr[1]->GetValue(ctx), + launchCountExpr[2]->GetValue(ctx) }; + + if (launchCount[0] != NULL) ctx->LaunchInst(callee, argVals, launchCount); } else @@ -3787,14 +3793,17 @@ FunctionCallExpr::TypeCheck() { if (!isLaunch) Error(pos, "\"launch\" expression needed to call function " "with \"task\" qualifier."); - if (!launchCountExpr) + for (int k = 0; k < 3; k++) + { + if (!launchCountExpr[k]) return NULL; - launchCountExpr = - TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32, - "task launch count"); - if (launchCountExpr == NULL) + launchCountExpr[k] = + TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32, + "task launch count"); + if (launchCountExpr[k] == NULL) return NULL; + } } else { if (isLaunch) { @@ -3802,7 +3811,7 @@ FunctionCallExpr::TypeCheck() { "qualified function."); return NULL; } - AssertPos(pos, launchCountExpr == NULL); + AssertPos(pos, launchCountExpr[0] == NULL); } } else { diff --git a/expr.h b/expr.h index f8b96abd..0d46191b 100644 --- a/expr.h +++ b/expr.h @@ -246,7 +246,8 @@ public: class FunctionCallExpr : public Expr { public: FunctionCallExpr(Expr *func, ExprList *args, SourcePos p, - bool isLaunch = false, Expr *launchCountExpr = NULL); + bool isLaunch = false, + Expr *launchCountExpr[3] = (Expr*[3]){NULL, NULL, NULL}); llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; @@ -261,7 +262,7 @@ public: Expr *func; ExprList *args; bool isLaunch; - Expr *launchCountExpr; + Expr *launchCountExpr[3]; }; diff --git a/lex.ll b/lex.ll index 87a80145..b5db747d 100644 --- a/lex.ll +++ b/lex.ll @@ -76,6 +76,7 @@ static int allTokens[] = { TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNMASKED, TOKEN_UNSIGNED, TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL, TOKEN_DOTDOTDOT, + TOKEN_TRIPLECHEVRON_OPEN, TOKEN_TRIPLECHEVRON_CLOSE, TOKEN_FLOAT_CONSTANT, TOKEN_DOUBLE_CONSTANT, TOKEN_INT8_CONSTANT, TOKEN_UINT8_CONSTANT, TOKEN_INT16_CONSTANT, TOKEN_UINT16_CONSTANT, @@ -151,6 +152,8 @@ void ParserInit() { tokenToName[TOKEN_WHILE] = "while"; tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\""; tokenToName[TOKEN_DOTDOTDOT] = "..."; + tokenToName[TOKEN_TRIPLECHEVRON_OPEN] = "<<<"; + tokenToName[TOKEN_TRIPLECHEVRON_CLOSE] = ">>>"; tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT"; tokenToName[TOKEN_DOUBLE_CONSTANT] = "TOKEN_DOUBLE_CONSTANT"; tokenToName[TOKEN_INT8_CONSTANT] = "TOKEN_INT8_CONSTANT"; @@ -266,6 +269,8 @@ void ParserInit() { tokenNameRemap["TOKEN_WHILE"] = "\'while\'"; tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\""; tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'"; + tokenNameRemap["TOKEN_TRIPLECHEVRON_OPEN"] = "\'<<<\'"; + tokenNameRemap["TOKEN_TRIPLECHEVRON_CLOSE"] = "\'>>>\'"; tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant"; tokenNameRemap["TOKEN_DOUBLE_CONSTANT"] = "double constant"; tokenNameRemap["TOKEN_INT8_CONSTANT"] = "int8 constant"; @@ -418,6 +423,8 @@ void { RT; return TOKEN_VOID; } while { RT; return TOKEN_WHILE; } \"C\" { RT; return TOKEN_STRING_C_LITERAL; } \.\.\. { RT; return TOKEN_DOTDOTDOT; } +\<\<\< { RT; return TOKEN_TRIPLECHEVRON_OPEN; } +\>\>\> { RT; return TOKEN_TRIPLECHEVRON_CLOSE; } "operator*" { return TOKEN_IDENTIFIER; } "operator+" { return TOKEN_IDENTIFIER; } diff --git a/parse.yy b/parse.yy index 1de4644f..dfb50134 100644 --- a/parse.yy +++ b/parse.yy @@ -204,6 +204,7 @@ struct ForeachDimension { %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH %token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED %token TOKEN_FOREACH_UNIQUE TOKEN_FOREACH_ACTIVE TOKEN_DOTDOTDOT +%token TOKEN_TRIPLECHEVRON_OPEN TOKEN_TRIPLECHEVRON_CLOSE %token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE %token TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT @@ -353,17 +354,64 @@ launch_expression : TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); - $$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr); + Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount); } | TOKEN_LAUNCH postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); - $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr); + Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount); } | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')' - { $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); } + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); + } | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')' - { $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); } + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); + } + + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); + Expr *launchCount[3] = {$3, $5, oneExpr}; + $$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); + Expr *launchCount[3] = {$3, $5, oneExpr}; + $$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount); + } + + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + { + Expr *launchCount[3] = {$3, $5, $7}; + $$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + { + Expr *launchCount[3] = {$3, $5, $7}; + $$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount); + } + | TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>' { From e6c8765891af519b3b10607d90dde27ae16a53f4 Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 23 Oct 2013 13:18:22 +0200 Subject: [PATCH 14/17] fixed tasksys.cpp for 3d tasking --- examples/common.mk | 8 +- examples/mandelbrot_tasks3d/.gitignore | 2 + examples/mandelbrot_tasks3d/Makefile | 8 + .../mandelbrot_tasks.vcxproj | 180 ++++++++++++++++++ .../mandelbrot_tasks3d/mandelbrot_tasks3d.cpp | 146 ++++++++++++++ .../mandelbrot_tasks3d.ispc | 100 ++++++++++ .../mandelbrot_tasks_serial.cpp | 68 +++++++ examples/tasksys.cpp | 62 ++++-- 8 files changed, 557 insertions(+), 17 deletions(-) create mode 100644 examples/mandelbrot_tasks3d/.gitignore create mode 100644 examples/mandelbrot_tasks3d/Makefile create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp diff --git a/examples/common.mk b/examples/common.mk index 252c1196..db7b8eee 100644 --- a/examples/common.mk +++ b/examples/common.mk @@ -1,11 +1,11 @@ -TASK_CXX=../tasksys3d.cpp +TASK_CXX=../tasksys.cpp TASK_LIB=-lpthread -TASK_OBJ=objs/tasksys3d.o +TASK_OBJ=objs/tasksys.o -CXX=g++ -fopenmp +CXX=clang++ CXXFLAGS+=-Iobjs/ -O2 -CC=gcc -fopenmp +CC=clang CCFLAGS+=-Iobjs/ -O2 LIBS=-lm $(TASK_LIB) -lstdc++ diff --git a/examples/mandelbrot_tasks3d/.gitignore b/examples/mandelbrot_tasks3d/.gitignore new file mode 100644 index 00000000..c2471c27 --- /dev/null +++ b/examples/mandelbrot_tasks3d/.gitignore @@ -0,0 +1,2 @@ +mandelbrot +*.ppm diff --git a/examples/mandelbrot_tasks3d/Makefile b/examples/mandelbrot_tasks3d/Makefile new file mode 100644 index 00000000..3dd44d65 --- /dev/null +++ b/examples/mandelbrot_tasks3d/Makefile @@ -0,0 +1,8 @@ + +EXAMPLE=mandelbrot_tasks3d +CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp +ISPC_SRC=mandelbrot_tasks3d.ispc +ISPC_IA_TARGETS=avx,sse2,sse4 +ISPC_ARM_TARGETS=neon + +include ../common.mk diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj new file mode 100644 index 00000000..3a8fca79 --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj @@ -0,0 +1,180 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {E80DA7D4-AB22-4648-A068-327307156BE6} + Win32Proj + mandelbrot_tasks + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + true + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + false + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + false + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + + + + + + Document + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp new file mode 100644 index 00000000..9cbb966a --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp @@ -0,0 +1,146 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#define NOMINMAX +#pragma warning (disable: 4244) +#pragma warning (disable: 4305) +#endif + +#include +#include +#include +#include "../timing.h" +#include "mandelbrot_tasks3d_ispc.h" +using namespace ispc; + +extern void mandelbrot_serial(float x0, float y0, float x1, float y1, + int width, int height, int maxIterations, + int output[]); + +/* Write a PPM image file with the image of the Mandelbrot set */ +static void +writePPM(int *buf, int width, int height, const char *fn) { + FILE *fp = fopen(fn, "wb"); + fprintf(fp, "P6\n"); + fprintf(fp, "%d %d\n", width, height); + fprintf(fp, "255\n"); + for (int i = 0; i < width*height; ++i) { + // Map the iteration count to colors by just alternating between + // two greys. + char c = (buf[i] & 0x1) ? 240 : 20; + for (int j = 0; j < 3; ++j) + fputc(c, fp); + } + fclose(fp); + printf("Wrote image file %s\n", fn); +} + + +static void usage() { + fprintf(stderr, "usage: mandelbrot [--scale=]\n"); + exit(1); +} + +int main(int argc, char *argv[]) { + unsigned int width = 1536; + unsigned int height = 1024; + float x0 = -2; + float x1 = 1; + float y0 = -1; + float y1 = 1; + + if (argc == 1) + ; + else if (argc == 2) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + if (scale == 0.f) + usage(); + width *= scale; + height *= scale; + // round up to multiples of 16 + width = (width + 0xf) & ~0xf; + height = (height + 0xf) & ~0xf; + } + else + usage(); + } + else + usage(); + + int maxIterations = 512; + int *buf = new int[width*height]; + + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } + + printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); + writePPM(buf, width, height, "mandelbrot-ispc.ppm"); + + + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } + + printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); + writePPM(buf, width, height, "mandelbrot-serial.ppm"); + + printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); + + return 0; +} diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc new file mode 100644 index 00000000..60473a7f --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -0,0 +1,100 @@ +/* + Copyright (c) 2010-2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +static inline int +mandel(float c_re, float c_im, int count) { + float z_re = c_re, z_im = c_im; + int i; + for (i = 0; i < count; ++i) { + if (z_re * z_re + z_im * z_im > 4.) + break; + + float new_re = z_re*z_re - z_im*z_im; + float new_im = 2.f * z_re * z_im; + unmasked { + z_re = c_re + new_re; + z_im = c_im + new_im; + } + } + + return i; +} + + +/* Task to compute the Mandelbrot iterations for a single scanline. + */ +task void +mandelbrot_scanline(uniform float x0, uniform float dx, + uniform float y0, uniform float dy, + uniform int width, uniform int height, + uniform int xspan, uniform int yspan, + uniform int maxIterations, uniform int output[]) { +#if 0 + print("taskIndex = % : % \n", taskIndex); + print("taskIndex_x= % : % \n", taskIndex_x); + print("taskIndex_y= % : % \n", taskIndex_y); + print(" --- \n"); +#endif + const uniform int xstart = taskIndex_x * xspan; + const uniform int xend = min(xstart + xspan, width); + + const uniform int ystart = taskIndex_y * yspan; + const uniform int yend = min(ystart + yspan, height); + + + foreach (yi = ystart ... yend, xi = xstart ... xend) { + float x = x0 + xi * dx; + float y = y0 + yi * dy; + + int index = yi * width + xi; + output[index] = mandel(x, y, maxIterations); + } + +} + +#if 1 +export void +mandelbrot_ispc(uniform float x0, uniform float y0, + uniform float x1, uniform float y1, + uniform int width, uniform int height, + uniform int maxIterations, uniform int output[]) { + uniform float dx = (x1 - x0) / width; + uniform float dy = (y1 - y0) / height; + const uniform int xspan = 16; + const uniform int yspan = 16; + + launch <<>> + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + maxIterations, output); +} +#endif diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp new file mode 100644 index 00000000..a76fb5ca --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp @@ -0,0 +1,68 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +static int mandel(float c_re, float c_im, int count) { + float z_re = c_re, z_im = c_im; + int i; + for (i = 0; i < count; ++i) { + if (z_re * z_re + z_im * z_im > 4.f) + break; + + float new_re = z_re*z_re - z_im*z_im; + float new_im = 2.f * z_re * z_im; + z_re = c_re + new_re; + z_im = c_im + new_im; + } + + return i; +} + +void mandelbrot_serial(float x0, float y0, float x1, float y1, + int width, int height, int maxIterations, + int output[]) +{ + float dx = (x1 - x0) / width; + float dy = (y1 - y0) / height; + + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; ++i) { + float x = x0 + i * dx; + float y = y0 + j * dy; + + int index = (j * width + i); + output[index] = mandel(x, y, maxIterations); + } + } +} + diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index d7b524a8..fed368dc 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -59,9 +59,7 @@ #define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_CILK -*/ #define ISPC_USE_OMP -/* #define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_PARALLEL_FOR @@ -172,21 +170,39 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, - int taskIndex, int taskCount); + int taskIndex, int taskCount, + int taskIndex_x, int taskIndex_y, int taskIndex_z, + int taskCount_x, int taskCount_y, int taskCount_z); // Small structure used to hold the data for each task struct TaskInfo { TaskFuncType func; void *data; int taskIndex, taskCount; + int taskCount3d[3]; #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif + int taskIndex_x() const + { + return taskIndex % taskCount3d[0]; + } + int taskIndex_y() const + { + return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; + } + int taskIndex_z() const + { + return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); + } + int taskCount_x() const { return taskCount3d[0]; } + int taskCount_y() const { return taskCount3d[1]; } + int taskCount_z() const { return taskCount3d[2]; } }; // ispc expects these functions to have C linkage / not be mangled extern "C" { - void ISPCLaunch(void **handlePtr, void *f, void *data, int count); + void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void ISPCSync(void *handle); } @@ -520,7 +536,9 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, - taskInfo->taskIndex, taskInfo->taskCount); + taskInfo->taskIndex, taskInfo->taskCount, + taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(), + taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z()); } @@ -561,7 +579,9 @@ lRunTask(LPVOID param) { // will cause bugs in code that uses those. int threadIndex = 0; int threadCount = 1; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); // Signal the event that this task is done ti->taskEvent.set(); @@ -662,7 +682,9 @@ lTaskEntry(void *arg) { DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, - myTask->taskCount); + myTask->taskCount, + myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), + myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); // // Decrement the "number of unfinished tasks" counter in the task @@ -863,7 +885,9 @@ TaskGroup::Sync() { // Do work for _myTask_ // // FIXME: bogus values for thread index/thread count here as well.. - myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount); + myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, + myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), + myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); // // Decrement the number of unfinished tasks counter @@ -893,7 +917,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 - ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); } } @@ -922,7 +948,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); } } @@ -953,7 +981,9 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); }); } @@ -980,7 +1010,9 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); }); } } @@ -1033,7 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) { + const int count = countx*county*countz; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1050,6 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { ti->data = data; ti->taskIndex = i; ti->taskCount = count; + ti->taskCount3d[0] = countx; + ti->taskCount3d[1] = county; + ti->taskCount3d[2] = countz; } taskGroup->Launch(baseIndex, count); } From 43761173ec8f653531d73b346ff2d190f9206dba Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 24 Oct 2013 13:16:23 +0200 Subject: [PATCH 15/17] changed notation, task[Index,Count]_[x,y,z] -> task[Index,Count][1,2,3]. Change launch <<< nx,ny,nz >>> into launch [nx,ny,nz] or equivalent launch [nz][ny][nx]. Programmer can pick the one the is liked the most --- .../mandelbrot_tasks3d.ispc | 19 +++-- examples/tasksys.cpp | 58 +++++++------- func.cpp | 64 ++++++++-------- func.h | 6 +- lex.ll | 7 -- parse.yy | 76 +++++++++++-------- type.cpp | 12 +-- 7 files changed, 122 insertions(+), 120 deletions(-) diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc index 60473a7f..8bdf6f7a 100644 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -59,16 +59,10 @@ mandelbrot_scanline(uniform float x0, uniform float dx, uniform int width, uniform int height, uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { -#if 0 - print("taskIndex = % : % \n", taskIndex); - print("taskIndex_x= % : % \n", taskIndex_x); - print("taskIndex_y= % : % \n", taskIndex_y); - print(" --- \n"); -#endif - const uniform int xstart = taskIndex_x * xspan; + const uniform int xstart = taskIndex1 * xspan; const uniform int xend = min(xstart + xspan, width); - const uniform int ystart = taskIndex_y * yspan; + const uniform int ystart = taskIndex2 * yspan; const uniform int yend = min(ystart + yspan, height); @@ -90,10 +84,15 @@ mandelbrot_ispc(uniform float x0, uniform float y0, uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; - const uniform int xspan = 16; + const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */ const uniform int yspan = 16; - launch <<>> + +#if 1 + launch [width/xspan, height/yspan] +#else + launch [height/yspan][width/xspan] +#endif mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, maxIterations, output); } diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index fed368dc..5ef72ed9 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -171,8 +171,8 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, int taskIndex, int taskCount, - int taskIndex_x, int taskIndex_y, int taskIndex_z, - int taskCount_x, int taskCount_y, int taskCount_z); + int taskIndex1, int taskIndex2, int taskIndex3, + int taskCount1, int taskCount2, int taskCount3); // Small structure used to hold the data for each task struct TaskInfo { @@ -183,21 +183,21 @@ struct TaskInfo { #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif - int taskIndex_x() const + int taskIndex1() const { return taskIndex % taskCount3d[0]; } - int taskIndex_y() const + int taskIndex2() const { return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; } - int taskIndex_z() const + int taskIndex3() const { return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); } - int taskCount_x() const { return taskCount3d[0]; } - int taskCount_y() const { return taskCount3d[1]; } - int taskCount_z() const { return taskCount3d[2]; } + int taskCount1() const { return taskCount3d[0]; } + int taskCount2() const { return taskCount3d[1]; } + int taskCount3() const { return taskCount3d[2]; } }; // ispc expects these functions to have C linkage / not be mangled @@ -537,8 +537,8 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, taskInfo->taskIndex, taskInfo->taskCount, - taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(), - taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z()); + taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(), + taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3()); } @@ -580,8 +580,8 @@ lRunTask(LPVOID param) { int threadIndex = 0; int threadCount = 1; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); // Signal the event that this task is done ti->taskEvent.set(); @@ -683,8 +683,8 @@ lTaskEntry(void *arg) { TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, myTask->taskCount, - myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), - myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); + myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), + myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); // // Decrement the "number of unfinished tasks" counter in the task @@ -886,8 +886,8 @@ TaskGroup::Sync() { // // FIXME: bogus values for thread index/thread count here as well.. myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, - myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), - myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); + myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), + myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); // // Decrement the number of unfinished tasks counter @@ -918,8 +918,8 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } } @@ -949,8 +949,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } } @@ -982,8 +982,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); } @@ -1011,8 +1011,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); } } @@ -1065,8 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) { - const int count = countx*county*countz; +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, int count3) { + const int count = count1*count2*count3; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1083,9 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, ti->data = data; ti->taskIndex = i; ti->taskCount = count; - ti->taskCount3d[0] = countx; - ti->taskCount3d[1] = county; - ti->taskCount3d[2] = countz; + ti->taskCount3d[0] = count1; + ti->taskCount3d[1] = count2; + ti->taskCount3d[2] = count3; } taskGroup->Launch(baseIndex, count); } diff --git a/func.cpp b/func.cpp index dea45afc..086be6fe 100644 --- a/func.cpp +++ b/func.cpp @@ -133,26 +133,26 @@ Function::Function(Symbol *s, Stmt *c) { taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); - taskIndexSym_x = m->symbolTable->LookupVariable("taskIndex_x"); - Assert(taskIndexSym_x); - taskIndexSym_y = m->symbolTable->LookupVariable("taskIndex_y"); - Assert(taskIndexSym_y); - taskIndexSym_z = m->symbolTable->LookupVariable("taskIndex_z"); - Assert(taskIndexSym_z); + taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1"); + Assert(taskIndexSym1); + taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2"); + Assert(taskIndexSym2); + taskIndexSym3 = m->symbolTable->LookupVariable("taskIndex3"); + Assert(taskIndexSym3); - taskCountSym_x = m->symbolTable->LookupVariable("taskCount_x"); - Assert(taskCountSym_x); - taskCountSym_y = m->symbolTable->LookupVariable("taskCount_y"); - Assert(taskCountSym_y); - taskCountSym_z = m->symbolTable->LookupVariable("taskCount_z"); - Assert(taskCountSym_z); + taskCountSym1 = m->symbolTable->LookupVariable("taskCount1"); + Assert(taskCountSym1); + taskCountSym2 = m->symbolTable->LookupVariable("taskCount2"); + Assert(taskCountSym2); + taskCountSym3 = m->symbolTable->LookupVariable("taskCount3"); + Assert(taskCountSym3); } else { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; - taskIndexSym_x = taskIndexSym_y = taskIndexSym_z = NULL; - taskCountSym_x = taskCountSym_y = taskCountSym_z = NULL; + taskIndexSym1 = taskIndexSym2 = taskIndexSym3 = NULL; + taskCountSym1 = taskCountSym2 = taskCountSym3 = NULL; } } @@ -244,12 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; - llvm::Value *taskIndex_x = argIter++; - llvm::Value *taskIndex_y = argIter++; - llvm::Value *taskIndex_z = argIter++; - llvm::Value *taskCount_x = argIter++; - llvm::Value *taskCount_y = argIter++; - llvm::Value *taskCount_z = argIter++; + llvm::Value *taskIndex1 = argIter++; + llvm::Value *taskIndex2 = argIter++; + llvm::Value *taskIndex3 = argIter++; + llvm::Value *taskCount1 = argIter++; + llvm::Value *taskCount2 = argIter++; + llvm::Value *taskCount3 = argIter++; // Copy the function parameter values from the structure into local // storage @@ -282,19 +282,19 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); - taskIndexSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_x"); - ctx->StoreInst(taskIndex_x, taskIndexSym_x->storagePtr); - taskIndexSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_y"); - ctx->StoreInst(taskIndex_y, taskIndexSym_y->storagePtr); - taskIndexSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_z"); - ctx->StoreInst(taskIndex_z, taskIndexSym_z->storagePtr); + taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); + ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); + taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); + ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); + taskIndexSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex3"); + ctx->StoreInst(taskIndex3, taskIndexSym3->storagePtr); - taskCountSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_x"); - ctx->StoreInst(taskCount_x, taskCountSym_x->storagePtr); - taskCountSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_y"); - ctx->StoreInst(taskCount_y, taskCountSym_y->storagePtr); - taskCountSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_z"); - ctx->StoreInst(taskCount_z, taskCountSym_z->storagePtr); + taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); + ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); + taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); + ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); + taskCountSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount3"); + ctx->StoreInst(taskCount3, taskCountSym3->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index ee44a6c5..4181bba5 100644 --- a/func.h +++ b/func.h @@ -61,9 +61,9 @@ private: Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; Symbol *taskIndexSym, *taskCountSym; - Symbol *taskIndexSym_x, *taskCountSym_x; - Symbol *taskIndexSym_y, *taskCountSym_y; - Symbol *taskIndexSym_z, *taskCountSym_z; + Symbol *taskIndexSym1, *taskCountSym1; + Symbol *taskIndexSym2, *taskCountSym2; + Symbol *taskIndexSym3, *taskCountSym3; }; #endif // ISPC_FUNC_H diff --git a/lex.ll b/lex.ll index b5db747d..87a80145 100644 --- a/lex.ll +++ b/lex.ll @@ -76,7 +76,6 @@ static int allTokens[] = { TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNMASKED, TOKEN_UNSIGNED, TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL, TOKEN_DOTDOTDOT, - TOKEN_TRIPLECHEVRON_OPEN, TOKEN_TRIPLECHEVRON_CLOSE, TOKEN_FLOAT_CONSTANT, TOKEN_DOUBLE_CONSTANT, TOKEN_INT8_CONSTANT, TOKEN_UINT8_CONSTANT, TOKEN_INT16_CONSTANT, TOKEN_UINT16_CONSTANT, @@ -152,8 +151,6 @@ void ParserInit() { tokenToName[TOKEN_WHILE] = "while"; tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\""; tokenToName[TOKEN_DOTDOTDOT] = "..."; - tokenToName[TOKEN_TRIPLECHEVRON_OPEN] = "<<<"; - tokenToName[TOKEN_TRIPLECHEVRON_CLOSE] = ">>>"; tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT"; tokenToName[TOKEN_DOUBLE_CONSTANT] = "TOKEN_DOUBLE_CONSTANT"; tokenToName[TOKEN_INT8_CONSTANT] = "TOKEN_INT8_CONSTANT"; @@ -269,8 +266,6 @@ void ParserInit() { tokenNameRemap["TOKEN_WHILE"] = "\'while\'"; tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\""; tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'"; - tokenNameRemap["TOKEN_TRIPLECHEVRON_OPEN"] = "\'<<<\'"; - tokenNameRemap["TOKEN_TRIPLECHEVRON_CLOSE"] = "\'>>>\'"; tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant"; tokenNameRemap["TOKEN_DOUBLE_CONSTANT"] = "double constant"; tokenNameRemap["TOKEN_INT8_CONSTANT"] = "int8 constant"; @@ -423,8 +418,6 @@ void { RT; return TOKEN_VOID; } while { RT; return TOKEN_WHILE; } \"C\" { RT; return TOKEN_STRING_C_LITERAL; } \.\.\. { RT; return TOKEN_DOTDOTDOT; } -\<\<\< { RT; return TOKEN_TRIPLECHEVRON_OPEN; } -\>\>\> { RT; return TOKEN_TRIPLECHEVRON_CLOSE; } "operator*" { return TOKEN_IDENTIFIER; } "operator+" { return TOKEN_IDENTIFIER; } diff --git a/parse.yy b/parse.yy index dfb50134..653bba62 100644 --- a/parse.yy +++ b/parse.yy @@ -204,7 +204,6 @@ struct ForeachDimension { %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH %token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED %token TOKEN_FOREACH_UNIQUE TOKEN_FOREACH_ACTIVE TOKEN_DOTDOTDOT -%token TOKEN_TRIPLECHEVRON_OPEN TOKEN_TRIPLECHEVRON_CLOSE %token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE %token TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT @@ -363,54 +362,65 @@ launch_expression Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount); } - | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')' + + | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); Expr *launchCount[3] = {$3, oneExpr, oneExpr}; $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' - { - ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); - Expr *launchCount[3] = {$3, oneExpr, oneExpr}; - $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); - } - | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')' - { - ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); - Expr *launchCount[3] = {$3, oneExpr, oneExpr}; - $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); - } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); Expr *launchCount[3] = {$3, oneExpr, oneExpr}; $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); Expr *launchCount[3] = {$3, $5, oneExpr}; $$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); Expr *launchCount[3] = {$3, $5, oneExpr}; $$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount); } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8); + Expr *launchCount[3] = {$6, $3, oneExpr}; + $$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8); + Expr *launchCount[3] = {$6, $3, oneExpr}; + $$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount); + } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')' { Expr *launchCount[3] = {$3, $5, $7}; $$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')' { Expr *launchCount[3] = {$3, $5, $7}; $$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount); } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + Expr *launchCount[3] = {$9, $6, $3}; + $$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')' + { + Expr *launchCount[3] = {$9, $6, $3}; + $$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount); + } | TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>' @@ -425,13 +435,13 @@ launch_expression "around function call expression."); $$ = NULL; } - | TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' + | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' { Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' " "around function call expression."); $$ = NULL; } - | TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>' + | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>' { Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' " "around function call expression."); @@ -2266,20 +2276,20 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); - Symbol *taskIndexSym_x = new Symbol("taskIndex_x", pos, type); - m->symbolTable->AddVariable(taskIndexSym_x); - Symbol *taskIndexSym_y = new Symbol("taskIndex_y", pos, type); - m->symbolTable->AddVariable(taskIndexSym_y); - Symbol *taskIndexSym_z = new Symbol("taskIndex_z", pos, type); - m->symbolTable->AddVariable(taskIndexSym_z); + Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type); + m->symbolTable->AddVariable(taskIndexSym1); + Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type); + m->symbolTable->AddVariable(taskIndexSym2); + Symbol *taskIndexSym3 = new Symbol("taskIndex3", pos, type); + m->symbolTable->AddVariable(taskIndexSym3); - Symbol *taskCountSym_x = new Symbol("taskCount_x", pos, type); - m->symbolTable->AddVariable(taskCountSym_x); - Symbol *taskCountSym_y = new Symbol("taskCount_y", pos, type); - m->symbolTable->AddVariable(taskCountSym_y); - Symbol *taskCountSym_z = new Symbol("taskCount_z", pos, type); - m->symbolTable->AddVariable(taskCountSym_z); + Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type); + m->symbolTable->AddVariable(taskCountSym1); + Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type); + m->symbolTable->AddVariable(taskCountSym2); + Symbol *taskCountSym3 = new Symbol("taskCount3", pos, type); + m->symbolTable->AddVariable(taskCountSym3); } diff --git a/type.cpp b/type.cpp index d36c63c2..3ae0cab4 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,12 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_x - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_y - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_z - callTypes.push_back(LLVMTypes::Int32Type); // taskCount_x - callTypes.push_back(LLVMTypes::Int32Type); // taskCount_y - callTypes.push_back(LLVMTypes::Int32Type); // taskCount_z + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1 + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2 + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex3 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount1 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount2 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount3 } else // Otherwise we already have the types of the arguments From c5fc47cc1959cf8fb4b170a6b378614c811591a7 Mon Sep 17 00:00:00 2001 From: egaburov Date: Thu, 24 Oct 2013 14:09:46 +0200 Subject: [PATCH 16/17] tasksys cleaning --- examples/tasksys.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 5ef72ed9..4c85e119 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -178,11 +178,12 @@ typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, struct TaskInfo { TaskFuncType func; void *data; - int taskIndex, taskCount; + int taskIndex; int taskCount3d[3]; #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif + int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; } int taskIndex1() const { return taskIndex % taskCount3d[0]; @@ -198,7 +199,8 @@ struct TaskInfo { int taskCount1() const { return taskCount3d[0]; } int taskCount2() const { return taskCount3d[1]; } int taskCount3() const { return taskCount3d[2]; } -}; + TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); } +} __attribute__((aligned(32))); // ispc expects these functions to have C linkage / not be mangled extern "C" { @@ -536,7 +538,7 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, - taskInfo->taskIndex, taskInfo->taskCount, + taskInfo->taskIndex, taskInfo->taskCount(), taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(), taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3()); } @@ -579,7 +581,7 @@ lRunTask(LPVOID param) { // will cause bugs in code that uses those. int threadIndex = 0; int threadCount = 1; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); @@ -682,7 +684,7 @@ lTaskEntry(void *arg) { DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, - myTask->taskCount, + myTask->taskCount(), myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); @@ -885,7 +887,7 @@ TaskGroup::Sync() { // Do work for _myTask_ // // FIXME: bogus values for thread index/thread count here as well.. - myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, + myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(), myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); @@ -917,7 +919,7 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 - ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } @@ -948,7 +950,7 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } @@ -981,7 +983,7 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); @@ -1010,7 +1012,7 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); @@ -1082,7 +1084,6 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, ti->func = (TaskFuncType)func; ti->data = data; ti->taskIndex = i; - ti->taskCount = count; ti->taskCount3d[0] = count1; ti->taskCount3d[1] = count2; ti->taskCount3d[2] = count3; From 383e804ec1e591370899babf1c1aa549995e8bae Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 24 Oct 2013 17:20:56 +0200 Subject: [PATCH 17/17] changed notation form taskIndex1,2,3 -> taskIndex0,1,2 --- .../mandelbrot_tasks3d.ispc | 4 +- examples/tasksys.cpp | 58 +++++++++---------- func.cpp | 24 ++++---- func.h | 2 +- parse.yy | 8 +-- type.cpp | 4 +- 6 files changed, 50 insertions(+), 50 deletions(-) diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc index 8bdf6f7a..395bdca4 100644 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -59,10 +59,10 @@ mandelbrot_scanline(uniform float x0, uniform float dx, uniform int width, uniform int height, uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { - const uniform int xstart = taskIndex1 * xspan; + const uniform int xstart = taskIndex0 * xspan; const uniform int xend = min(xstart + xspan, width); - const uniform int ystart = taskIndex2 * yspan; + const uniform int ystart = taskIndex1 * yspan; const uniform int yend = min(ystart + yspan, height); diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 4c85e119..6bc60129 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -171,8 +171,8 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, int taskIndex, int taskCount, - int taskIndex1, int taskIndex2, int taskIndex3, - int taskCount1, int taskCount2, int taskCount3); + int taskIndex0, int taskIndex1, int taskIndex2, + int taskCount0, int taskCount1, int taskCount2); // Small structure used to hold the data for each task struct TaskInfo { @@ -184,21 +184,21 @@ struct TaskInfo { event taskEvent; #endif int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; } - int taskIndex1() const + int taskIndex0() const { return taskIndex % taskCount3d[0]; } - int taskIndex2() const + int taskIndex1() const { return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; } - int taskIndex3() const + int taskIndex2() const { return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); } - int taskCount1() const { return taskCount3d[0]; } - int taskCount2() const { return taskCount3d[1]; } - int taskCount3() const { return taskCount3d[2]; } + int taskCount0() const { return taskCount3d[0]; } + int taskCount1() const { return taskCount3d[1]; } + int taskCount2() const { return taskCount3d[2]; } TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); } } __attribute__((aligned(32))); @@ -539,8 +539,8 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, taskInfo->taskIndex, taskInfo->taskCount(), - taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(), - taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3()); + taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(), + taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2()); } @@ -582,8 +582,8 @@ lRunTask(LPVOID param) { int threadIndex = 0; int threadCount = 1; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); // Signal the event that this task is done ti->taskEvent.set(); @@ -685,8 +685,8 @@ lTaskEntry(void *arg) { TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, myTask->taskCount(), - myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), - myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); + myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(), + myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2()); // // Decrement the "number of unfinished tasks" counter in the task @@ -888,8 +888,8 @@ TaskGroup::Sync() { // // FIXME: bogus values for thread index/thread count here as well.. myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(), - myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), - myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); + myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(), + myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2()); // // Decrement the number of unfinished tasks counter @@ -920,8 +920,8 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 ti->func(ti->data, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } } @@ -951,8 +951,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } } @@ -984,8 +984,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); }); } @@ -1013,8 +1013,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); }); } } @@ -1067,8 +1067,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, int count3) { - const int count = count1*count2*count3; +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) { + const int count = count0*count1*count2; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1084,9 +1084,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, ti->func = (TaskFuncType)func; ti->data = data; ti->taskIndex = i; - ti->taskCount3d[0] = count1; - ti->taskCount3d[1] = count2; - ti->taskCount3d[2] = count3; + ti->taskCount3d[0] = count0; + ti->taskCount3d[1] = count1; + ti->taskCount3d[2] = count2; } taskGroup->Launch(baseIndex, count); } diff --git a/func.cpp b/func.cpp index 086be6fe..af2cc05a 100644 --- a/func.cpp +++ b/func.cpp @@ -133,26 +133,26 @@ Function::Function(Symbol *s, Stmt *c) { taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); + taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0"); + Assert(taskIndexSym0); taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1"); Assert(taskIndexSym1); taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2"); Assert(taskIndexSym2); - taskIndexSym3 = m->symbolTable->LookupVariable("taskIndex3"); - Assert(taskIndexSym3); + taskCountSym0 = m->symbolTable->LookupVariable("taskCount0"); + Assert(taskCountSym0); taskCountSym1 = m->symbolTable->LookupVariable("taskCount1"); Assert(taskCountSym1); taskCountSym2 = m->symbolTable->LookupVariable("taskCount2"); Assert(taskCountSym2); - taskCountSym3 = m->symbolTable->LookupVariable("taskCount3"); - Assert(taskCountSym3); } else { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; - taskIndexSym1 = taskIndexSym2 = taskIndexSym3 = NULL; - taskCountSym1 = taskCountSym2 = taskCountSym3 = NULL; + taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL; + taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL; } } @@ -244,12 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; + llvm::Value *taskIndex0 = argIter++; llvm::Value *taskIndex1 = argIter++; llvm::Value *taskIndex2 = argIter++; - llvm::Value *taskIndex3 = argIter++; + llvm::Value *taskCount0 = argIter++; llvm::Value *taskCount1 = argIter++; llvm::Value *taskCount2 = argIter++; - llvm::Value *taskCount3 = argIter++; // Copy the function parameter values from the structure into local // storage @@ -282,19 +282,19 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); + taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0"); + ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr); taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); - taskIndexSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex3"); - ctx->StoreInst(taskIndex3, taskIndexSym3->storagePtr); + taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0"); + ctx->StoreInst(taskCount0, taskCountSym0->storagePtr); taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); - taskCountSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount3"); - ctx->StoreInst(taskCount3, taskCountSym3->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index 4181bba5..88a96dbc 100644 --- a/func.h +++ b/func.h @@ -61,9 +61,9 @@ private: Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym0, *taskCountSym0; Symbol *taskIndexSym1, *taskCountSym1; Symbol *taskIndexSym2, *taskCountSym2; - Symbol *taskIndexSym3, *taskCountSym3; }; #endif // ISPC_FUNC_H diff --git a/parse.yy b/parse.yy index 653bba62..9a0377c5 100644 --- a/parse.yy +++ b/parse.yy @@ -2276,20 +2276,20 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); + Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type); + m->symbolTable->AddVariable(taskIndexSym0); Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type); m->symbolTable->AddVariable(taskIndexSym1); Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type); m->symbolTable->AddVariable(taskIndexSym2); - Symbol *taskIndexSym3 = new Symbol("taskIndex3", pos, type); - m->symbolTable->AddVariable(taskIndexSym3); + Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type); + m->symbolTable->AddVariable(taskCountSym0); Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type); m->symbolTable->AddVariable(taskCountSym1); Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type); m->symbolTable->AddVariable(taskCountSym2); - Symbol *taskCountSym3 = new Symbol("taskCount3", pos, type); - m->symbolTable->AddVariable(taskCountSym3); } diff --git a/type.cpp b/type.cpp index 3ae0cab4..516276f0 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,12 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2 - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex3 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount0 callTypes.push_back(LLVMTypes::Int32Type); // taskCount1 callTypes.push_back(LLVMTypes::Int32Type); // taskCount2 - callTypes.push_back(LLVMTypes::Int32Type); // taskCount3 } else // Otherwise we already have the types of the arguments