working on nvptx

This commit is contained in:
Evghenii
2013-10-26 16:12:33 +02:00
32 changed files with 1743 additions and 172 deletions

View File

@@ -140,7 +140,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
type.cpp util.cpp
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
TARGETS=nvptx64 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
TARGETS=nvptx64 avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
ifneq ($(ARM_ENABLED), 0)

109
alloy.py
View File

@@ -212,10 +212,10 @@ def check_targets():
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
if AVX11 == False and "rdrand" in f_lines[i]:
AVX11 = True;
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
if AVX2 == False and "avx2" in f_lines[i]:
AVX2 = True;
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
if current_OS == "MacOS":
f_lines = take_lines("sysctl machdep.cpu.features", "first")
if "SSE2" in f_lines:
@@ -229,10 +229,10 @@ def check_targets():
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
if "RDRAND" in f_lines:
AVX11 = True;
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
if "AVX2.0" in f_lines:
AVX2 = True;
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
# now check what targets we have with the help of SDE
@@ -257,9 +257,9 @@ def check_targets():
if AVX == False and "snb" in f_lines[i]:
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
if AVX11 == False and "ivb" in f_lines[i]:
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]]
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]]
if AVX2 == False and "hsw" in f_lines[i]:
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]]
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]]
return [answer, answer_sde]
def build_ispc(version_LLVM, make):
@@ -274,29 +274,38 @@ def build_ispc(version_LLVM, make):
def execute_stability(stability, R, print_version):
stability1 = copy.deepcopy(stability)
temp = run_tests.run_tests(stability1, [], print_version)
b_temp = run_tests.run_tests(stability1, [], print_version)
temp = b_temp[0]
time = b_temp[1]
for j in range(0,4):
R[j][0] = R[j][0] + temp[j]
for i in range(0,len(temp[j])):
R[j][1].append(temp[4])
number_of_fails = temp[5]
number_of_new_fails = len(temp[0]) + len(temp[1])
number_of_passes = len(temp[2]) + len(temp[3])
if number_of_fails == 0:
str_fails = ". No fails"
else:
str_fails = ". Fails: " + str(number_of_fails)
if number_of_new_fails == 0:
str_new_fails = ", No new fails.\n"
str_new_fails = ", No new fails"
else:
str_new_fails = ", New fails: " + str(number_of_new_fails) + ".\n"
print_debug(temp[4][1:-3] + str_fails + str_new_fails, False, stability_log)
str_new_fails = ", New fails: " + str(number_of_new_fails)
if number_of_passes == 0:
str_new_passes = "."
else:
str_new_passes = ", " + str(number_of_passes) + " new passes."
if stability.time:
str_time = " " + time + "\n"
else:
str_time = "\n"
print_debug(temp[4][1:-3] + str_fails + str_new_fails + str_new_passes + str_time, False, stability_log)
def run_special_tests():
i = 5
def validation_run(only, only_targets, reference_branch, number, notify, update, make):
if os.environ["ISPC_HOME"] != os.getcwd():
error("you ISPC_HOME and your current pass are different!\n", 2)
def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm, time):
os.chdir(os.environ["ISPC_HOME"])
os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"]
if options.notify != "":
@@ -322,9 +331,9 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
stability.random = False
stability.ispc_flags = ""
stability.compiler_exe = None
stability.num_jobs = 1024
stability.num_jobs = speed_number
stability.verbose = False
stability.time = False
stability.time = time
stability.non_interactive = True
stability.update = update
stability.include_file = None
@@ -476,28 +485,36 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
# prepare LLVM 3.3 as newest LLVM
need_LLVM = check_LLVM(["3.3"])
if len(need_LLVM) != 0:
build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make)
# prepare reference point. build both test and reference compilers
try_do_LLVM("apply git", "git branch", True)
temp4 = take_lines("git branch", "all")
for line in temp4:
if "*" in line:
current_branch = line[2:-1]
stashing = True
sys.stdout.write("Please, don't interrupt script here! You can have not sync git status after interruption!\n")
if "No local changes" in take_lines("git stash", "first"):
stashing = False
#try_do_LLVM("stash current branch ", "git stash", True)
try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True)
sys.stdout.write(".\n")
build_ispc("3.3", make)
sys.stdout.write(".\n")
os.rename("ispc", "ispc_ref")
try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True)
if stashing:
try_do_LLVM("return current branch ", "git stash pop", True)
sys.stdout.write("You can interrupt script now.\n")
build_ispc("3.3", make)
build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make)
if perf_llvm == False:
# prepare reference point. build both test and reference compilers
try_do_LLVM("apply git", "git branch", True)
temp4 = take_lines("git branch", "all")
for line in temp4:
if "*" in line:
current_branch = line[2:-1]
stashing = True
sys.stdout.write("Please, don't interrupt script here! You can have not sync git status after interruption!\n")
if "No local changes" in take_lines("git stash", "first"):
stashing = False
#try_do_LLVM("stash current branch ", "git stash", True)
try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True)
sys.stdout.write(".\n")
build_ispc("3.3", make)
sys.stdout.write(".\n")
os.rename("ispc", "ispc_ref")
try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True)
if stashing:
try_do_LLVM("return current branch ", "git stash pop", True)
sys.stdout.write("You can interrupt script now.\n")
build_ispc("3.3", make)
else:
# build compiler with two different LLVM versions
if len(check_LLVM([reference_branch])) != 0:
error("you haven't got llvm called " + reference_branch, 1)
build_ispc("3.3", make)
os.rename("ispc", "ispc_ref")
build_ispc(reference_branch, make)
# begin validation run for performance. output is inserted into perf()
perf.perf(performance, [])
if options.notify != "":
@@ -560,16 +577,26 @@ def Main():
stability_log = os.getcwd() + os.sep + f_date + os.sep + "stability.log"
current_path = os.getcwd()
make = "make -j" + options.speed
if os.environ["ISPC_HOME"] != os.getcwd():
error("you ISPC_HOME and your current path are different!\n", 2)
if options.perf_llvm == True:
if options.branch == "master":
options.branch = "trunk"
try:
start_time = time.time()
if options.build_llvm:
build_LLVM(options.version, options.revision, options.folder, options.tarball,
options.debug, options.selfbuild, options.extra, False, options.force, make)
if options.validation_run:
validation_run(options.only, options.only_targets, options.branch,
options.number_for_performance, options.notify, options.update, make)
options.number_for_performance, options.notify, options.update, int(options.speed),
make, options.perf_llvm, options.time)
elapsed_time = time.time() - start_time
if options.time:
print_debug("Elapsed time: " + time.strftime('%Hh%Mm%Ssec.', time.gmtime(elapsed_time)) + "\n", False, "")
finally:
os.chdir(current_path)
date_name = "alloy_results_" + datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S')
date_name = "alloy_results_" + datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
if os.path.exists(date_name):
error("It's forbidden to run alloy two times in a second, logs are in ./logs", 1)
os.rename(f_date, date_name)
@@ -656,11 +683,15 @@ run_group.add_option('--update-errors', dest='update',
run_group.add_option('--only-targets', dest='only_targets',
help='set list of targets to test. Possible values - all subnames of targets.',
default="")
run_group.add_option('--time', dest='time',
help='display time of testing', default=False, action='store_true')
run_group.add_option('--only', dest='only',
help='set types of tests. Possible values:\n' +
'-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' +
'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).',
default="")
run_group.add_option('--perf_LLVM', dest='perf_llvm',
help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true')
parser.add_option_group(run_group)
# options for activity "setup PATHS"
setup_group = OptionGroup(parser, "Options for setup",

View File

@@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc,
for (int k = 0; k < 3; k++)
fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc,
postFunc, data);
}
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {

View File

@@ -979,6 +979,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
case Target::AVX11: {
switch (g->target->getVectorWidth()) {
case 4:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_avx11_i64x4_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_avx11_i64x4_64bit);
}
break;
case 8:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_avx11_32bit);
@@ -1002,6 +1010,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
case Target::AVX2: {
switch (g->target->getVectorWidth()) {
case 4:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_avx2_i64x4_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_avx2_i64x4_64bit);
}
break;
case 8:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_avx2_32bit);

View File

@@ -0,0 +1,120 @@
;; Copyright (c) 2013, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(`target-avx1-i64x4base.ll')
ifelse(LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
`rdrand_definition()')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int min/max
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unsigned int min/max
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather
gen_gather(i8)
gen_gather(i16)
gen_gather(i32)
gen_gather(float)
gen_gather(i64)
gen_gather(double)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float/half conversions
define(`expand_4to8', `
%$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
')
define(`extract_4from8', `
%$3 = shufflevector <8 x $1> %$2, <8 x $1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
')
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
; 0 is round nearest even
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
define <4 x float> @__half_to_float_varying(<4 x i16> %v4) nounwind readnone {
expand_4to8(i16, v4, v)
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
extract_4from8(float, r, ret)
ret <4 x float> %ret
}
define <4 x i16> @__float_to_half_varying(<4 x float> %v4) nounwind readnone {
expand_4to8(float, v4, v)
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
extract_4from8(i16, r, ret)
ret <4 x i16> %ret
}
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
%v1 = bitcast i16 %v to <1 x i16>
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
%r = extractelement <8 x float> %rv, i32 0
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
%v1 = bitcast float %v to <1 x float>
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
; round to nearest even
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
%r = extractelement <8 x i16> %rv, i32 0
ret i16 %r
}

View File

@@ -0,0 +1,355 @@
;; Copyright (c) 2013, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ifelse(LLVM_VERSION, `LLVM_3_1', `',
`define(`HAVE_GATHER', `1')')
include(`target-avx1-i64x4base.ll')
ifelse(LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
`rdrand_definition()')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int min/max
;; declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
;; declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readonly
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unsigned int min/max
;; declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readonly
;; declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readonly
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%m = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %m
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float/half conversions
define(`expand_4to8', `
%$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
')
define(`extract_4from8', `
%$3 = shufflevector <8 x $1> %$2, <8 x $1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
')
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
; 0 is round nearest even
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
define <4 x float> @__half_to_float_varying(<4 x i16> %v4) nounwind readnone {
expand_4to8(i16, v4, v)
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
extract_4from8(float, r, ret)
ret <4 x float> %ret
}
define <4 x i16> @__float_to_half_varying(<4 x float> %v4) nounwind readnone {
expand_4to8(float, v4, v)
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
extract_4from8(i16, r, ret)
ret <4 x i16> %ret
}
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
%v1 = bitcast i16 %v to <1 x i16>
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
%r = extractelement <8 x float> %rv, i32 0
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
%v1 = bitcast float %v to <1 x float>
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
; round to nearest even
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
%r = extractelement <8 x i16> %rv, i32 0
ret i16 %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather
declare void @llvm.trap() noreturn nounwind
ifelse(LLVM_VERSION, `LLVM_3_1', `
gen_gather_factored(i8)
gen_gather_factored(i16)
gen_gather_factored(i32)
gen_gather_factored(float)
gen_gather_factored(i64)
gen_gather_factored(double)', `
gen_gather(i8)
gen_gather(i16)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int32 gathers
declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %target, i8 * %ptr,
<4 x i32> %indices, <4 x i32> %mask, i8 %scale) readonly nounwind
declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %target, i8 * %ptr,
<4 x i64> %indices, <4 x i32> %mask, i8 %scale) readonly nounwind
define <4 x i32> @__gather_base_offsets32_i32(i8 * %ptr,
i32 %scale, <4 x i32> %offsets,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8 * %ptr,
<4 x i32> %offsets, <4 x i32> %vecmask, i8 %scale8)
ret <4 x i32> %v
}
define <4 x i32> @__gather_base_offsets64_i32(i8 * %ptr,
i32 %scale, <4 x i64> %offsets,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8 * %ptr,
<4 x i64> %offsets, <4 x i32> %vecmask, i8 %scale8)
ret <4 x i32> %v
}
define <4 x i32> @__gather32_i32(<4 x i32> %ptrs,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8 * null,
<4 x i32> %ptrs, <4 x i32> %vecmask, i8 1)
ret <4 x i32> %v
}
define <4 x i32> @__gather64_i32(<4 x i64> %ptrs,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8 * null,
<4 x i64> %ptrs, <4 x i32> %vecmask, i8 1)
ret <4 x i32> %v
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float gathers
declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %target, i8 * %ptr,
<4 x i32> %indices, <4 x float> %mask, i8 %scale8) readonly nounwind
declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %target, i8 * %ptr,
<4 x i64> %indices, <4 x float> %mask, i8 %scale8) readonly nounwind
define <4 x float> @__gather_base_offsets32_float(i8 * %ptr,
i32 %scale, <4 x i32> %offsets,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%mask = bitcast <4 x i32> %vecmask to <4 x float>
%v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8 * %ptr,
<4 x i32> %offsets, <4 x float> %mask, i8 %scale8)
ret <4 x float> %v
}
define <4 x float> @__gather_base_offsets64_float(i8 * %ptr,
i32 %scale, <4 x i64> %offsets,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%mask = bitcast <4 x i32> %vecmask to <4 x float>
%v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8 * %ptr,
<4 x i64> %offsets, <4 x float> %mask, i8 %scale8)
ret <4 x float> %v
}
define <4 x float> @__gather32_float(<4 x i32> %ptrs,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%mask = bitcast <4 x i32> %vecmask to <4 x float>
%v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8 * null,
<4 x i32> %ptrs, <4 x float> %mask, i8 1)
ret <4 x float> %v
}
define <4 x float> @__gather64_float(<4 x i64> %ptrs,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
%mask = bitcast <4 x i32> %vecmask to <4 x float>
%v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8 * null,
<4 x i64> %ptrs, <4 x float> %mask, i8 1)
ret <4 x float> %v
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int64 gathers
declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %target, i8 * %ptr,
<4 x i32> %indices, <4 x i64> %mask, i8 %scale) readonly nounwind
declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %target, i8 * %ptr,
<4 x i64> %indices, <4 x i64> %mask, i8 %scale) readonly nounwind
define <4 x i64> @__gather_base_offsets32_i64(i8 * %ptr,
i32 %scale, <4 x i32> %offsets,
<4 x i64> %vecmask) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8 * %ptr,
<4 x i32> %offsets, <4 x i64> %vecmask, i8 %scale8)
ret <4 x i64> %v
}
define <4 x i64> @__gather_base_offsets64_i64(i8 * %ptr,
i32 %scale, <4 x i64> %offsets,
<4 x i64> %vecmask) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8 * %ptr,
<4 x i64> %offsets, <4 x i64> %vecmask, i8 %scale8)
ret <4 x i64> %v
}
define <4 x i64> @__gather32_i64(<4 x i32> %ptrs,
<4 x i64> %vecmask) nounwind readonly alwaysinline {
%v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8 * null,
<4 x i32> %ptrs, <4 x i64> %vecmask, i8 1)
ret <4 x i64> %v
}
define <4 x i64> @__gather64_i64(<4 x i64> %ptrs,
<4 x i64> %vecmask) nounwind readonly alwaysinline {
%v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8 * null,
<4 x i64> %ptrs, <4 x i64> %vecmask, i8 1)
ret <4 x i64> %v
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; double gathers
declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %target, i8 * %ptr,
<4 x i64> %indices, <4 x double> %mask, i8 %scale) readonly nounwind
declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %target, i8 * %ptr,
<4 x i32> %indices, <4 x double> %mask, i8 %scale) readonly nounwind
define <4 x double> @__gather_base_offsets32_double(i8 * %ptr,
i32 %scale, <4 x i32> %offsets,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
%v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8 * %ptr,
<4 x i32> %offsets, <4 x double> %vecmask, i8 %scale8)
ret <4 x double> %v
}
define <4 x double> @__gather_base_offsets64_double(i8 * %ptr,
i32 %scale, <4 x i64> %offsets,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%scale8 = trunc i32 %scale to i8
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
%v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8 * %ptr,
<4 x i64> %offsets, <4 x double> %vecmask, i8 %scale8)
ret <4 x double> %v
}
define <4 x double> @__gather32_double(<4 x i32> %ptrs,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
%v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8 * null,
<4 x i32> %ptrs, <4 x double> %vecmask, i8 1)
ret <4 x double> %v
}
define <4 x double> @__gather64_double(<4 x i64> %ptrs,
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
%v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8 * null,
<4 x i64> %ptrs, <4 x double> %vecmask, i8 1)
ret <4 x double> %v
}
')

View File

@@ -1813,7 +1813,7 @@ define(`stdlib_core', `
declare i32 @__fast_masked_vload()
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind
declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind
declare void @ISPCSync(i8*) nounwind
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind

View File

@@ -558,8 +558,15 @@ char CWriter::ID = 0;
static std::string CBEMangle(const std::string &S) {
std::string Result;
for (unsigned i = 0, e = S.size(); i != e; ++i)
if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') {
for (unsigned i = 0, e = S.size(); i != e; ++i) {
if (i+1 != e && ((S[i] == '>' && S[i+1] == '>') ||
(S[i] == '<' && S[i+1] == '<'))) {
Result += '_';
Result += 'A'+(S[i]&15);
Result += 'A'+((S[i]>>4)&15);
Result += '_';
i++;
} else if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') {
Result += S[i];
} else {
Result += '_';
@@ -567,6 +574,7 @@ static std::string CBEMangle(const std::string &S) {
Result += 'A'+((S[i]>>4)&15);
Result += '_';
}
}
return Result;
}

View File

@@ -3502,7 +3502,7 @@ FunctionEmitContext::ReturnInst() {
llvm::Value *
FunctionEmitContext::LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount) {
llvm::Value *launchCount[3]){
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
@@ -3563,7 +3563,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
args.push_back(launchGroupHandlePtr);
args.push_back(fptr);
args.push_back(voidmem);
args.push_back(launchCount);
args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, "");
}

2
ctx.h
View File

@@ -542,7 +542,7 @@ public:
he given argument values. */
llvm::Value *LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount);
llvm::Value *launchCount[3]);
void SyncInst();

View File

@@ -2009,43 +2009,37 @@ static FORCEINLINE void __masked_store_i16(void *p, __vec16_i16 val,
ptr[i] = val[i];
}
static FORCEINLINE void __masked_store_i32(void *p, __vec16_i32 val, __vec16_i1 mask)
static FORCEINLINE void __masked_store_i32(void *p, const __vec16_i32 val, const __vec16_i1 mask)
{
#ifdef ISPC_FORCE_ALIGNED_MEMORY
_mm512_mask_store_epi32(p, mask, val.v);
#else
__vec16_i32 tmp;
tmp.v = _mm512_extloadunpacklo_epi32(tmp.v, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
tmp.v = _mm512_extloadunpackhi_epi32(tmp.v, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
tmp.v = _mm512_mask_mov_epi32(tmp.v, mask, val.v);
_mm512_extpackstorelo_epi32(p, tmp.v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
_mm512_extpackstorehi_epi32((uint8_t*)p+64, tmp.v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
_mm512_mask_i32extscatter_epi32(p, mask, __ispc_stride1, val, _MM_DOWNCONV_EPI32_NONE, _MM_SCALE_4, _MM_HINT_NONE);
#endif
}
static FORCEINLINE void __masked_store_float(void *p, __vec16_f val, __vec16_i1 mask)
static FORCEINLINE void __masked_store_float(void *p, const __vec16_f val, const __vec16_i1 mask)
{
#ifdef ISPC_FORCE_ALIGNED_MEMORY
_mm512_mask_store_ps(p, mask, val.v);
#else
__vec16_f tmp;
tmp.v = _mm512_extloadunpacklo_ps(tmp.v, p, _MM_UPCONV_PS_NONE, _MM_HINT_NONE);
tmp.v = _mm512_extloadunpackhi_ps(tmp.v, (uint8_t*)p+64, _MM_UPCONV_PS_NONE, _MM_HINT_NONE);
tmp.v = _mm512_mask_mov_ps(tmp.v, mask, val.v);
_mm512_extpackstorelo_ps(p, tmp.v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE);
_mm512_extpackstorehi_ps((uint8_t*)p+64, tmp.v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE);
_mm512_mask_i32extscatter_ps(p, mask, __ispc_stride1, val, _MM_DOWNCONV_PS_NONE, _MM_SCALE_4, _MM_HINT_NONE);
#endif
}
static FORCEINLINE void __masked_store_i64(void *p, __vec16_i64 val,
__vec16_i1 mask) {
int64_t *ptr = (int64_t *)p;
for (int i = 0; i < 16; ++i)
if ((mask.v & (1 << i)) != 0)
ptr[i] = val[i];
static FORCEINLINE void __masked_store_i64(void *p, const __vec16_i64 val, const __vec16_i1 mask) {
#ifdef ISPC_FORCE_ALIGNED_MEMORY
__vec16_i1 tmp_m = mask;
tmp_m = _mm512_kswapb(tmp_m, tmp_m);
_mm512_mask_store_epi64(p, mask, val.v1);
_mm512_mask_store_epi64((uint8_t*)p+64, tmp_m, val.v2);
#else
_mm512_mask_i32loextscatter_epi64( p, mask, __ispc_stride1, val.v1, _MM_DOWNCONV_EPI64_NONE, _MM_SCALE_8, _MM_HINT_NONE);
_mm512_mask_i32loextscatter_epi64((int64_t*)p+8, _mm512_kswapb(mask,mask), __ispc_stride1, val.v2, _MM_DOWNCONV_EPI64_NONE, _MM_SCALE_8, _MM_HINT_NONE);
#endif
}
static FORCEINLINE void __masked_store_double(void *p, __vec16_d val, __vec16_i1 mask)
static FORCEINLINE void __masked_store_double(void *p, const __vec16_d val, const __vec16_i1 mask)
{
#ifdef ISPC_FORCE_ALIGNED_MEMORY
__vec16_i1 tmp_m = mask;
@@ -2053,19 +2047,8 @@ static FORCEINLINE void __masked_store_double(void *p, __vec16_d val, __vec16_i1
_mm512_mask_store_pd(p, mask, val.v1);
_mm512_mask_store_pd((uint8_t*)p+64, tmp_m, val.v2);
#else
__vec16_d tmp;
__vec16_i1 tmp_m = mask;
tmp_m = _mm512_kswapb(tmp_m, tmp_m);
tmp.v1 = _mm512_extloadunpacklo_pd(tmp.v1, p, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
tmp.v1 = _mm512_extloadunpackhi_pd(tmp.v1, (uint8_t*)p+64, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
tmp.v2 = _mm512_extloadunpacklo_pd(tmp.v2, (uint8_t*)p+64, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
tmp.v2 = _mm512_extloadunpackhi_pd(tmp.v2, (uint8_t*)p+128, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
tmp.v1 = _mm512_mask_mov_pd(tmp.v1, mask, val.v1);
tmp.v2 = _mm512_mask_mov_pd(tmp.v2, tmp_m, val.v2);
_mm512_extpackstorelo_pd(p, tmp.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
_mm512_extpackstorehi_pd((uint8_t*)p+64, tmp.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
_mm512_extpackstorelo_pd((uint8_t*)p+64, tmp.v2, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
_mm512_extpackstorehi_pd((uint8_t*)p+128, tmp.v2, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
_mm512_mask_i32loextscatter_pd( p, mask, __ispc_stride1, val.v1, _MM_DOWNCONV_PD_NONE, _MM_SCALE_8, _MM_HINT_NONE);
_mm512_mask_i32loextscatter_pd((double*)p+8, _mm512_kswapb(mask,mask), __ispc_stride1, val.v2, _MM_DOWNCONV_PD_NONE, _MM_SCALE_8, _MM_HINT_NONE);
#endif
}

View File

@@ -0,0 +1,2 @@
mandelbrot
*.ppm

View File

@@ -0,0 +1,8 @@
EXAMPLE=mandelbrot_tasks3d
CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp
ISPC_SRC=mandelbrot_tasks3d.ispc
ISPC_IA_TARGETS=avx,sse2,sse4
ISPC_ARM_TARGETS=neon
include ../common.mk

View File

@@ -0,0 +1,180 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>mandelbrot_tasks</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="mandelbrot_tasks.cpp" />
<ClCompile Include="mandelbrot_tasks_serial.cpp" />
<ClCompile Include="../tasksys.cpp" />
</ItemGroup>
<ItemGroup>
<CustomBuild Include="mandelbrot_tasks.ispc">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
</CustomBuild>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -0,0 +1,146 @@
/*
Copyright (c) 2010-2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#define NOMINMAX
#pragma warning (disable: 4244)
#pragma warning (disable: 4305)
#endif
#include <stdio.h>
#include <algorithm>
#include <string.h>
#include "../timing.h"
#include "mandelbrot_tasks3d_ispc.h"
using namespace ispc;
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
int width, int height, int maxIterations,
int output[]);
/* Write a PPM image file with the image of the Mandelbrot set */
static void
writePPM(int *buf, int width, int height, const char *fn) {
FILE *fp = fopen(fn, "wb");
fprintf(fp, "P6\n");
fprintf(fp, "%d %d\n", width, height);
fprintf(fp, "255\n");
for (int i = 0; i < width*height; ++i) {
// Map the iteration count to colors by just alternating between
// two greys.
char c = (buf[i] & 0x1) ? 240 : 20;
for (int j = 0; j < 3; ++j)
fputc(c, fp);
}
fclose(fp);
printf("Wrote image file %s\n", fn);
}
static void usage() {
fprintf(stderr, "usage: mandelbrot [--scale=<factor>]\n");
exit(1);
}
int main(int argc, char *argv[]) {
unsigned int width = 1536;
unsigned int height = 1024;
float x0 = -2;
float x1 = 1;
float y0 = -1;
float y1 = 1;
if (argc == 1)
;
else if (argc == 2) {
if (strncmp(argv[1], "--scale=", 8) == 0) {
float scale = atof(argv[1] + 8);
if (scale == 0.f)
usage();
width *= scale;
height *= scale;
// round up to multiples of 16
width = (width + 0xf) & ~0xf;
height = (height + 0xf) & ~0xf;
}
else
usage();
}
else
usage();
int maxIterations = 512;
int *buf = new int[width*height];
//
// Compute the image using the ispc implementation; report the minimum
// time of three runs.
//
double minISPC = 1e30;
for (int i = 0; i < 3; ++i) {
// Clear out the buffer
for (unsigned int i = 0; i < width * height; ++i)
buf[i] = 0;
reset_and_start_timer();
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
double dt = get_elapsed_mcycles();
minISPC = std::min(minISPC, dt);
}
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
//
// And run the serial implementation 3 times, again reporting the
// minimum time.
//
double minSerial = 1e30;
for (int i = 0; i < 3; ++i) {
// Clear out the buffer
for (unsigned int i = 0; i < width * height; ++i)
buf[i] = 0;
reset_and_start_timer();
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
double dt = get_elapsed_mcycles();
minSerial = std::min(minSerial, dt);
}
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
writePPM(buf, width, height, "mandelbrot-serial.ppm");
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
return 0;
}

View File

@@ -0,0 +1,99 @@
/*
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static inline int
mandel(float c_re, float c_im, int count) {
float z_re = c_re, z_im = c_im;
int i;
for (i = 0; i < count; ++i) {
if (z_re * z_re + z_im * z_im > 4.)
break;
float new_re = z_re*z_re - z_im*z_im;
float new_im = 2.f * z_re * z_im;
unmasked {
z_re = c_re + new_re;
z_im = c_im + new_im;
}
}
return i;
}
/* Task to compute the Mandelbrot iterations for a single scanline.
*/
task void
mandelbrot_scanline(uniform float x0, uniform float dx,
uniform float y0, uniform float dy,
uniform int width, uniform int height,
uniform int xspan, uniform int yspan,
uniform int maxIterations, uniform int output[]) {
const uniform int xstart = taskIndex0 * xspan;
const uniform int xend = min(xstart + xspan, width);
const uniform int ystart = taskIndex1 * yspan;
const uniform int yend = min(ystart + yspan, height);
foreach (yi = ystart ... yend, xi = xstart ... xend) {
float x = x0 + xi * dx;
float y = y0 + yi * dy;
int index = yi * width + xi;
output[index] = mandel(x, y, maxIterations);
}
}
#if 1
export void
mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1,
uniform int width, uniform int height,
uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height;
const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */
const uniform int yspan = 16;
#if 1
launch [width/xspan, height/yspan]
#else
launch [height/yspan][width/xspan]
#endif
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
maxIterations, output);
}
#endif

View File

@@ -0,0 +1,68 @@
/*
Copyright (c) 2010-2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static int mandel(float c_re, float c_im, int count) {
float z_re = c_re, z_im = c_im;
int i;
for (i = 0; i < count; ++i) {
if (z_re * z_re + z_im * z_im > 4.f)
break;
float new_re = z_re*z_re - z_im*z_im;
float new_im = 2.f * z_re * z_im;
z_re = c_re + new_re;
z_im = c_im + new_im;
}
return i;
}
void mandelbrot_serial(float x0, float y0, float x1, float y1,
int width, int height, int maxIterations,
int output[])
{
float dx = (x1 - x0) / width;
float dy = (y1 - y0) / height;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; ++i) {
float x = x0 + i * dx;
float y = y0 + j * dy;
int index = (j * width + i);
output[index] = mandel(x, y, maxIterations);
}
}
}

View File

@@ -170,21 +170,41 @@
// Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount);
int taskIndex, int taskCount,
int taskIndex0, int taskIndex1, int taskIndex2,
int taskCount0, int taskCount1, int taskCount2);
// Small structure used to hold the data for each task
struct TaskInfo {
TaskFuncType func;
void *data;
int taskIndex, taskCount;
int taskIndex;
int taskCount3d[3];
#if defined(ISPC_IS_WINDOWS)
event taskEvent;
#endif
};
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
int taskIndex0() const
{
return taskIndex % taskCount3d[0];
}
int taskIndex1() const
{
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
}
int taskIndex2() const
{
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
}
int taskCount0() const { return taskCount3d[0]; }
int taskCount1() const { return taskCount3d[1]; }
int taskCount2() const { return taskCount3d[2]; }
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
} __attribute__((aligned(32)));
// ispc expects these functions to have C linkage / not be mangled
extern "C" {
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz);
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
void ISPCSync(void *handle);
}
@@ -518,7 +538,9 @@ lRunTask(void *ti) {
// Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount,
taskInfo->taskIndex, taskInfo->taskCount);
taskInfo->taskIndex, taskInfo->taskCount(),
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
}
@@ -559,7 +581,9 @@ lRunTask(LPVOID param) {
// will cause bugs in code that uses those.
int threadIndex = 0;
int threadCount = 1;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
// Signal the event that this task is done
ti->taskEvent.set();
@@ -660,7 +684,9 @@ lTaskEntry(void *arg) {
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
myTask->taskCount);
myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
//
// Decrement the "number of unfinished tasks" counter in the task
@@ -861,7 +887,9 @@ TaskGroup::Sync() {
// Do work for _myTask_
//
// FIXME: bogus values for thread index/thread count here as well..
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
//
// Decrement the number of unfinished tasks counter
@@ -891,7 +919,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}
}
@@ -920,7 +950,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task.
int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}
}
@@ -951,7 +983,9 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
});
}
@@ -978,7 +1012,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
});
}
}
@@ -1031,7 +1067,8 @@ FreeTaskGroup(TaskGroup *tg) {
///////////////////////////////////////////////////////////////////////////
void
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
const int count = count0*count1*count2;
TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) {
InitTaskSystem();
@@ -1047,7 +1084,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ti->func = (TaskFuncType)func;
ti->data = data;
ti->taskIndex = i;
ti->taskCount = count;
ti->taskCount3d[0] = count0;
ti->taskCount3d[1] = count1;
ti->taskCount3d[2] = count2;
}
taskGroup->Launch(baseIndex, count);
}

164
expr.cpp
View File

@@ -1660,6 +1660,64 @@ BinaryExpr::BinaryExpr(Op o, Expr *a, Expr *b, SourcePos p)
arg1 = b;
}
Expr *lCreateBinaryOperatorCall(const BinaryExpr::Op bop,
Expr *a0, Expr *a1,
const SourcePos &sp)
{
if ((a0 == NULL) || (a1 == NULL)) {
return NULL;
}
Expr *arg0 = a0->TypeCheck();
Expr *arg1 = a1->TypeCheck();
if ((arg0 == NULL) || (arg1 == NULL)) {
return NULL;
}
const Type *type0 = arg0->GetType();
const Type *type1 = arg1->GetType();
// If either operand is a reference, dereference it before we move
// forward
if (CastType<ReferenceType>(type0) != NULL) {
arg0 = new RefDerefExpr(arg0, arg0->pos);
type0 = arg0->GetType();
}
if (CastType<ReferenceType>(type1) != NULL) {
arg1 = new RefDerefExpr(arg1, arg1->pos);
type1 = arg1->GetType();
}
if ((type0 == NULL) || (type1 == NULL)) {
return NULL;
}
if (CastType<StructType>(type0) != NULL ||
CastType<StructType>(type1) != NULL) {
std::string opName = std::string("operator") + lOpString(bop);
std::vector<Symbol *> funs;
m->symbolTable->LookupFunction(opName.c_str(), &funs);
if (funs.size() == 0) {
Error(sp, "operator %s(%s, %s) is not defined.",
opName.c_str(), (type0->GetString()).c_str(), (type1->GetString()).c_str());
return NULL;
}
Expr *func = new FunctionSymbolExpr(opName.c_str(), funs, sp);
ExprList *args = new ExprList(sp);
args->exprs.push_back(arg0);
args->exprs.push_back(arg1);
Expr *opCallExpr = new FunctionCallExpr(func, args, sp);
return opCallExpr;
}
return NULL;
}
Expr * MakeBinaryExpr(BinaryExpr::Op o, Expr *a, Expr *b, SourcePos p) {
Expr * op = lCreateBinaryOperatorCall(o, a, b, p);
if (op != NULL) {
return op;
}
op = new BinaryExpr(o, a, b, p);
return op;
}
/** Emit code for a && or || logical operator. In particular, the code
here handles "short-circuit" evaluation, where the second expression
@@ -2985,29 +3043,10 @@ AssignExpr::TypeCheck() {
if (lvalueIsReference)
lvalue = new RefDerefExpr(lvalue, lvalue->pos);
FunctionSymbolExpr *fse;
if ((fse = dynamic_cast<FunctionSymbolExpr *>(rvalue)) != NULL) {
// Special case to use the type of the LHS to resolve function
// overloads when we're assigning a function pointer where the
// function is overloaded.
const Type *lvalueType = lvalue->GetType();
const FunctionType *ftype;
if (CastType<PointerType>(lvalueType) == NULL ||
(ftype = CastType<FunctionType>(lvalueType->GetBaseType())) == NULL) {
Error(lvalue->pos, "Can't assign function pointer to type \"%s\".",
lvalueType ? lvalueType->GetString().c_str() : "<unknown>");
return NULL;
}
std::vector<const Type *> paramTypes;
for (int i = 0; i < ftype->GetNumParameters(); ++i)
paramTypes.push_back(ftype->GetParameterType(i));
if (!fse->ResolveOverloads(rvalue->pos, paramTypes)) {
Error(pos, "Unable to find overloaded function for function "
"pointer assignment.");
return NULL;
}
if (PossiblyResolveFunctionOverloads(rvalue, lvalue->GetType()) == false) {
Error(pos, "Unable to find overloaded function for function "
"pointer assignment.");
return NULL;
}
const Type *lhsType = lvalue->GetType();
@@ -3501,11 +3540,13 @@ SelectExpr::Print() const {
// FunctionCallExpr
FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p,
bool il, Expr *lce)
bool il, Expr *lce[3])
: Expr(p), isLaunch(il) {
func = f;
args = a;
launchCountExpr = lce;
launchCountExpr[0] = lce[0];
launchCountExpr[1] = lce[1];
launchCountExpr[2] = lce[2];
}
@@ -3623,9 +3664,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
llvm::Value *retVal = NULL;
ctx->SetDebugPos(pos);
if (ft->isTask) {
AssertPos(pos, launchCountExpr != NULL);
llvm::Value *launchCount = launchCountExpr->GetValue(ctx);
if (launchCount != NULL)
AssertPos(pos, launchCountExpr[0] != NULL);
llvm::Value *launchCount[3] =
{ launchCountExpr[0]->GetValue(ctx),
launchCountExpr[1]->GetValue(ctx),
launchCountExpr[2]->GetValue(ctx) };
if (launchCount[0] != NULL)
ctx->LaunchInst(callee, argVals, launchCount);
}
else
@@ -3652,8 +3697,35 @@ FunctionCallExpr::GetLValue(FunctionEmitContext *ctx) const {
}
bool FullResolveOverloads(Expr * func, ExprList * args,
std::vector<const Type *> *argTypes,
std::vector<bool> *argCouldBeNULL,
std::vector<bool> *argIsConstant) {
for (unsigned int i = 0; i < args->exprs.size(); ++i) {
Expr *expr = args->exprs[i];
if (expr == NULL)
return false;
const Type *t = expr->GetType();
if (t == NULL)
return false;
argTypes->push_back(t);
argCouldBeNULL->push_back(lIsAllIntZeros(expr) || dynamic_cast<NullPointerExpr *>(expr));
argIsConstant->push_back(dynamic_cast<ConstExpr *>(expr) || dynamic_cast<NullPointerExpr *>(expr));
}
return true;
}
const Type *
FunctionCallExpr::GetType() const {
std::vector<const Type *> argTypes;
std::vector<bool> argCouldBeNULL, argIsConstant;
if (FullResolveOverloads(func, args, &argTypes, &argCouldBeNULL, &argIsConstant) == true) {
FunctionSymbolExpr *fse = dynamic_cast<FunctionSymbolExpr *>(func);
if (fse != NULL) {
fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL, &argIsConstant);
}
}
const FunctionType *ftype = lGetFunctionType(func);
return ftype ? ftype->GetReturnType() : NULL;
}
@@ -3689,20 +3761,9 @@ FunctionCallExpr::TypeCheck() {
std::vector<const Type *> argTypes;
std::vector<bool> argCouldBeNULL, argIsConstant;
for (unsigned int i = 0; i < args->exprs.size(); ++i) {
Expr *expr = args->exprs[i];
if (expr == NULL)
return NULL;
const Type *t = expr->GetType();
if (t == NULL)
return NULL;
argTypes.push_back(t);
argCouldBeNULL.push_back(lIsAllIntZeros(expr) ||
dynamic_cast<NullPointerExpr *>(expr));
argIsConstant.push_back(dynamic_cast<ConstExpr *>(expr) ||
dynamic_cast<NullPointerExpr *>(expr));
if (FullResolveOverloads(func, args, &argTypes, &argCouldBeNULL, &argIsConstant) == false) {
return NULL;
}
FunctionSymbolExpr *fse = dynamic_cast<FunctionSymbolExpr *>(func);
@@ -3732,14 +3793,17 @@ FunctionCallExpr::TypeCheck() {
if (!isLaunch)
Error(pos, "\"launch\" expression needed to call function "
"with \"task\" qualifier.");
if (!launchCountExpr)
for (int k = 0; k < 3; k++)
{
if (!launchCountExpr[k])
return NULL;
launchCountExpr =
TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32,
"task launch count");
if (launchCountExpr == NULL)
launchCountExpr[k] =
TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32,
"task launch count");
if (launchCountExpr[k] == NULL)
return NULL;
}
}
else {
if (isLaunch) {
@@ -3747,7 +3811,7 @@ FunctionCallExpr::TypeCheck() {
"qualified function.");
return NULL;
}
AssertPos(pos, launchCountExpr == NULL);
AssertPos(pos, launchCountExpr[0] == NULL);
}
}
else {
@@ -7010,7 +7074,8 @@ TypeCastExpr::GetLValue(FunctionEmitContext *ctx) const {
const Type *
TypeCastExpr::GetType() const {
AssertPos(pos, type->HasUnboundVariability() == false);
// We have to switch off this assert after supporting of operators.
//AssertPos(pos, type->HasUnboundVariability() == false);
return type;
}
@@ -8190,6 +8255,9 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos,
const std::vector<bool> *argCouldBeNULL,
const std::vector<bool> *argIsConstant) {
const char *funName = candidateFunctions.front()->name.c_str();
if (triedToResolve == true) {
return true;
}
triedToResolve = true;

7
expr.h
View File

@@ -246,7 +246,8 @@ public:
class FunctionCallExpr : public Expr {
public:
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p,
bool isLaunch = false, Expr *launchCountExpr = NULL);
bool isLaunch = false,
Expr *launchCountExpr[3] = (Expr*[3]){NULL, NULL, NULL});
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
@@ -261,7 +262,7 @@ public:
Expr *func;
ExprList *args;
bool isLaunch;
Expr *launchCountExpr;
Expr *launchCountExpr[3];
};
@@ -730,6 +731,8 @@ bool CanConvertTypes(const Type *fromType, const Type *toType,
*/
Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase);
Expr * MakeBinaryExpr(BinaryExpr::Op o, Expr *a, Expr *b, SourcePos p);
/** Utility routine that emits code to initialize a symbol given an
initializer expression.

View File

@@ -1025,3 +1025,38 @@
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 *
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *

View File

@@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) {
Assert(taskIndexSym);
taskCountSym = m->symbolTable->LookupVariable("taskCount");
Assert(taskCountSym);
taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0");
Assert(taskIndexSym0);
taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1");
Assert(taskIndexSym1);
taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2");
Assert(taskIndexSym2);
taskCountSym0 = m->symbolTable->LookupVariable("taskCount0");
Assert(taskCountSym0);
taskCountSym1 = m->symbolTable->LookupVariable("taskCount1");
Assert(taskCountSym1);
taskCountSym2 = m->symbolTable->LookupVariable("taskCount2");
Assert(taskCountSym2);
}
else
{
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL;
taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL;
}
}
@@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
llvm::Value *threadCount = argIter++;
llvm::Value *taskIndex = argIter++;
llvm::Value *taskCount = argIter++;
llvm::Value *taskIndex0 = argIter++;
llvm::Value *taskIndex1 = argIter++;
llvm::Value *taskIndex2 = argIter++;
llvm::Value *taskCount0 = argIter++;
llvm::Value *taskCount1 = argIter++;
llvm::Value *taskCount2 = argIter++;
// Copy the function parameter values from the structure into local
// storage
@@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
}
else {
// Regular, non-task function

5
func.h
View File

@@ -60,7 +60,10 @@ private:
Stmt *code;
Symbol *maskSymbol;
Symbol *threadIndexSym, *threadCountSym;
Symbol *taskIndexSym, *taskCountSym;
Symbol *taskIndexSym, *taskCountSym;
Symbol *taskIndexSym0, *taskCountSym0;
Symbol *taskIndexSym1, *taskCountSym1;
Symbol *taskIndexSym2, *taskCountSym2;
};
#endif // ISPC_FUNC_H

View File

@@ -515,6 +515,25 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
this->m_hasRand = true;
#endif
}
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
this->m_vectorWidth = 4;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
#if defined(LLVM_3_4)
",+rdrnd"
#else
",+rdrand"
#endif
;
this->m_maskingIsFree = false;
this->m_maskBitCount = 64;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
this->m_hasRand = true;
#endif
}
else if (!strcasecmp(isa, "avx2") ||
@@ -563,6 +582,29 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
// LLVM 3.2+ only
this->m_hasRand = true;
this->m_hasGather = true;
#endif
}
else if (!strcasecmp(isa, "avx2-i64x4")) {
this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
this->m_vectorWidth = 4;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
#if defined(LLVM_3_4)
",+rdrnd"
#else
",+rdrand"
#endif
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
this->m_maskingIsFree = false;
this->m_maskBitCount = 64;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
this->m_hasRand = true;
this->m_hasGather = true;
#endif
}
#ifdef ISPC_ARM_ENABLED
@@ -740,8 +782,8 @@ Target::SupportedTargets() {
"sse2-i32x4, sse2-i32x8, "
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
"avx1-i32x8, avx1-i32x16, avx1-i64x4, "
"avx1.1-i32x8, avx1.1-i32x16, "
"avx2-i32x8, avx2-i32x16, "
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
"generic-x1, generic-x4, generic-x8, generic-x16, "
"generic-x32, generic-x64";
}

View File

@@ -28,10 +28,14 @@
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-c-32.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-c-64.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-dispatch.cpp" />
@@ -323,6 +327,24 @@
<Message>Building gen-bitcode-avx11-x2-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit &gt; $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx11-i64x4-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit &gt; $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx11-i64x4-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx2.ll">
<FileType>Document</FileType>
@@ -359,6 +381,24 @@
<Message>Building gen-bitcode-avx2-x2-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit &gt; $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx2-i64x4-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit &gt; $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx2-i64x4-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-1.ll">
<FileType>Document</FileType>

8
lex.ll
View File

@@ -419,6 +419,14 @@ while { RT; return TOKEN_WHILE; }
\"C\" { RT; return TOKEN_STRING_C_LITERAL; }
\.\.\. { RT; return TOKEN_DOTDOTDOT; }
"operator*" { return TOKEN_IDENTIFIER; }
"operator+" { return TOKEN_IDENTIFIER; }
"operator-" { return TOKEN_IDENTIFIER; }
"operator<<" { return TOKEN_IDENTIFIER; }
"operator>>" { return TOKEN_IDENTIFIER; }
"operator/" { return TOKEN_IDENTIFIER; }
"operator%" { return TOKEN_IDENTIFIER; }
L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERAL; }
{IDENT} {

103
parse.yy
View File

@@ -353,17 +353,75 @@ launch_expression
: TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr);
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount);
}
| TOKEN_LAUNCH postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr);
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount);
}
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')'
{ $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); }
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')'
{ $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); }
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
$$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
$$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
Expr *launchCount[3] = {$3, $5, oneExpr};
$$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
Expr *launchCount[3] = {$3, $5, oneExpr};
$$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
Expr *launchCount[3] = {$6, $3, oneExpr};
$$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
{
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
Expr *launchCount[3] = {$6, $3, oneExpr};
$$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
Expr *launchCount[3] = {$3, $5, $7};
$$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
{
Expr *launchCount[3] = {$3, $5, $7};
$$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
{
Expr *launchCount[3] = {$9, $6, $3};
$$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount);
}
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
{
Expr *launchCount[3] = {$9, $6, $3};
$$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount);
}
| TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>'
{
@@ -377,13 +435,13 @@ launch_expression
"around function call expression.");
$$ = NULL;
}
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
{
Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' "
"around function call expression.");
$$ = NULL;
}
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>'
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>'
{
Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' "
"around function call expression.");
@@ -468,27 +526,27 @@ cast_expression
multiplicative_expression
: cast_expression
| multiplicative_expression '*' cast_expression
{ $$ = new BinaryExpr(BinaryExpr::Mul, $1, $3, Union(@1, @3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Mul, $1, $3, Union(@1, @3)); }
| multiplicative_expression '/' cast_expression
{ $$ = new BinaryExpr(BinaryExpr::Div, $1, $3, Union(@1, @3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Div, $1, $3, Union(@1, @3)); }
| multiplicative_expression '%' cast_expression
{ $$ = new BinaryExpr(BinaryExpr::Mod, $1, $3, Union(@1, @3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Mod, $1, $3, Union(@1, @3)); }
;
additive_expression
: multiplicative_expression
| additive_expression '+' multiplicative_expression
{ $$ = new BinaryExpr(BinaryExpr::Add, $1, $3, Union(@1, @3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Add, $1, $3, Union(@1, @3)); }
| additive_expression '-' multiplicative_expression
{ $$ = new BinaryExpr(BinaryExpr::Sub, $1, $3, Union(@1, @3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Sub, $1, $3, Union(@1, @3)); }
;
shift_expression
: additive_expression
| shift_expression TOKEN_LEFT_OP additive_expression
{ $$ = new BinaryExpr(BinaryExpr::Shl, $1, $3, Union(@1,@3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Shl, $1, $3, Union(@1, @3)); }
| shift_expression TOKEN_RIGHT_OP additive_expression
{ $$ = new BinaryExpr(BinaryExpr::Shr, $1, $3, Union(@1,@3)); }
{ $$ = MakeBinaryExpr(BinaryExpr::Shr, $1, $3, Union(@1, @3)); }
;
relational_expression
@@ -2217,6 +2275,21 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) {
Symbol *taskCountSym = new Symbol("taskCount", pos, type);
m->symbolTable->AddVariable(taskCountSym);
Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type);
m->symbolTable->AddVariable(taskIndexSym0);
Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type);
m->symbolTable->AddVariable(taskIndexSym1);
Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type);
m->symbolTable->AddVariable(taskIndexSym2);
Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type);
m->symbolTable->AddVariable(taskCountSym0);
Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type);
m->symbolTable->AddVariable(taskCountSym1);
Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type);
m->symbolTable->AddVariable(taskCountSym2);
}

View File

@@ -378,6 +378,11 @@ def file_check(compfails, runfails):
compiler_version = options.compiler_exe + temp3.group()
else:
compiler_version = "cl"
possible_compilers = ["g++4.4", "g++4.7", "clang++3.3", "cl"]
if not compiler_version in possible_compilers:
error("\n**********\nWe don't have history of fails for compiler " +
compiler_version +
"\nAll fails will be new!!!\n**********", 2)
new_line = " "+options.arch.rjust(6)+" "+options.target.rjust(14)+" "+OS.rjust(7)+" "+llvm_version+" "+compiler_version.rjust(10)+" "+opt+" *\n"
new_compfails = compfails[:]
@@ -449,8 +454,9 @@ def verify():
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16",
"avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8",
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8",
"avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4",
"generic-1", "generic-4", "generic-8",
"generic-16", "generic-32", "generic-64"]]
for i in range (0,len(f_lines)):
if f_lines[i][0] == "%":
@@ -647,7 +653,8 @@ def run_tests(options1, args, print_version):
if options.non_interactive == False:
print_debug("\n", s, run_tests_log)
elapsed_time = time.time() - start_time
temp_time = (time.time() - start_time)
elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time))
while not qret.empty():
(c, r, skip) = qret.get()
@@ -675,12 +682,16 @@ def run_tests(options1, args, print_version):
if len(compile_error_files) == 0 and len(run_error_files) == 0:
print_debug("No fails\n", s, run_tests_log)
R = file_check(compile_error_files, run_error_files)
if len(args) == 0:
R = file_check(compile_error_files, run_error_files)
else:
error("don't check new fails for incomplete suite of tests", 2)
R = 0
if options.time:
print_debug("Elapsed time: %d s\n" % elapsed_time, s, run_tests_log)
print_debug("Elapsed time: " + elapsed_time + "\n", s, run_tests_log)
return R
return [R, elapsed_time]
from optparse import OptionParser

70
tests/operators.ispc Normal file
View File

@@ -0,0 +1,70 @@
export uniform int width() { return programCount; }
struct S {
float a;
};
// References "struct&" were put in random order to test them.
struct S operator*(struct S& rr, struct S rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S operator/(struct S& rr, struct S& rv) {
struct S c;
c.a = rr.a - rr.a + 2;
return c;
}
struct S operator%(struct S rr, struct S& rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S operator+(struct S rr, struct S rv) {
struct S c;
c.a = rr.a / rv.a + 3;
return c;
}
struct S operator-(struct S rr, struct S& rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S operator>>(struct S& rr, struct S rv) {
struct S c;
c.a = rr.a / rv.a + 3;
return c;
}
struct S operator<<(struct S& rr, struct S& rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S a, a1;
struct S b, b1;
struct S d1, d2, d3, d4, d5, d6, d7;
export void f_f(uniform float RET[], uniform float aFOO[]) {
a.a = aFOO[programIndex];
b.a = -aFOO[programIndex];
d1 = a * b;
d2 = a / b;
d3 = a % b;
d4 = a + b;
d5 = a - b;
d6 = a >> b;
d7 = a << b;
RET[programIndex] = d1.a + d2.a + d3.a + d4.a + d5.a + d6.a + d7.a;
}
export void result(uniform float RET[4]) {
RET[programIndex] = 14;
}

64
tests/operators1.ispc Normal file
View File

@@ -0,0 +1,64 @@
export uniform int width() { return programCount; }
struct S {
float a;
};
// References "struct&" were put in random order to test them.
struct S operator*(struct S& rr, struct S rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S operator/(struct S& rr, struct S& rv) {
struct S c;
c.a = rr.a - rr.a + 2;
return c;
}
struct S operator%(struct S rr, struct S& rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S operator+(struct S rr, struct S rv) {
struct S c;
c.a = rr.a / rv.a + 3;
return c;
}
struct S operator-(struct S rr, struct S& rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S operator>>(struct S& rr, struct S rv) {
struct S c;
c.a = rr.a / rv.a + 3;
return c;
}
struct S operator<<(struct S& rr, struct S& rv) {
struct S c;
c.a = rr.a + rv.a + 2;
return c;
}
struct S a;
struct S b;
struct S d;
export void f_f(uniform float RET[], uniform float aFOO[]) {
a.a = 5;
b.a = -5;
d = a * b + b / a - a << (b - b) - a;
RET[programIndex] = d.a;
}
export void result(uniform float RET[4]) {
RET[programIndex] = 12;
}

51
tests/operators2.ispc Normal file
View File

@@ -0,0 +1,51 @@
int off;
export uniform int width() { return programCount; }
struct S {
float a;
};
struct S operator+(struct S rr, struct S rv) {
struct S c;
c.a = rr.a / rv.a + 3;
if (off == 1)
c.a = 22;
return c;
}
struct S operator/(struct S rr, struct S rv) {
struct S c;
c.a = rr.a + rv.a + 10;
if (off == 1)
c.a = 33;
return c;
}
struct S a;
struct S b;
struct S d;
export void f_f(uniform float RET[], uniform float aFOO[]) {
int T = programIndex;
a.a = aFOO[programIndex];
b.a = -aFOO[programIndex];
if (programIndex == 3)
off = 1;
else
off = 0;
if (T % 2)
d = a + b;
else
d = a / b;
RET[programIndex] = d.a;
}
export void result(uniform float RET[4]) {
if (programIndex % 2)
RET[programIndex] = 2;
else
RET[programIndex] = 10;
RET[3] = 22;
}

View File

@@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
}
else
// Otherwise we already have the types of the arguments