working on nvptx
This commit is contained in:
2
Makefile
2
Makefile
@@ -140,7 +140,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
type.cpp util.cpp
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=nvptx64 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
TARGETS=nvptx64 avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||
ifneq ($(ARM_ENABLED), 0)
|
||||
|
||||
109
alloy.py
109
alloy.py
@@ -212,10 +212,10 @@ def check_targets():
|
||||
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
||||
if AVX11 == False and "rdrand" in f_lines[i]:
|
||||
AVX11 = True;
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
|
||||
if AVX2 == False and "avx2" in f_lines[i]:
|
||||
AVX2 = True;
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
|
||||
if current_OS == "MacOS":
|
||||
f_lines = take_lines("sysctl machdep.cpu.features", "first")
|
||||
if "SSE2" in f_lines:
|
||||
@@ -229,10 +229,10 @@ def check_targets():
|
||||
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
||||
if "RDRAND" in f_lines:
|
||||
AVX11 = True;
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
|
||||
if "AVX2.0" in f_lines:
|
||||
AVX2 = True;
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
|
||||
|
||||
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
|
||||
# now check what targets we have with the help of SDE
|
||||
@@ -257,9 +257,9 @@ def check_targets():
|
||||
if AVX == False and "snb" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
|
||||
if AVX11 == False and "ivb" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]]
|
||||
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]]
|
||||
if AVX2 == False and "hsw" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]]
|
||||
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]]
|
||||
return [answer, answer_sde]
|
||||
|
||||
def build_ispc(version_LLVM, make):
|
||||
@@ -274,29 +274,38 @@ def build_ispc(version_LLVM, make):
|
||||
|
||||
def execute_stability(stability, R, print_version):
|
||||
stability1 = copy.deepcopy(stability)
|
||||
temp = run_tests.run_tests(stability1, [], print_version)
|
||||
b_temp = run_tests.run_tests(stability1, [], print_version)
|
||||
temp = b_temp[0]
|
||||
time = b_temp[1]
|
||||
for j in range(0,4):
|
||||
R[j][0] = R[j][0] + temp[j]
|
||||
for i in range(0,len(temp[j])):
|
||||
R[j][1].append(temp[4])
|
||||
number_of_fails = temp[5]
|
||||
number_of_new_fails = len(temp[0]) + len(temp[1])
|
||||
number_of_passes = len(temp[2]) + len(temp[3])
|
||||
if number_of_fails == 0:
|
||||
str_fails = ". No fails"
|
||||
else:
|
||||
str_fails = ". Fails: " + str(number_of_fails)
|
||||
if number_of_new_fails == 0:
|
||||
str_new_fails = ", No new fails.\n"
|
||||
str_new_fails = ", No new fails"
|
||||
else:
|
||||
str_new_fails = ", New fails: " + str(number_of_new_fails) + ".\n"
|
||||
print_debug(temp[4][1:-3] + str_fails + str_new_fails, False, stability_log)
|
||||
str_new_fails = ", New fails: " + str(number_of_new_fails)
|
||||
if number_of_passes == 0:
|
||||
str_new_passes = "."
|
||||
else:
|
||||
str_new_passes = ", " + str(number_of_passes) + " new passes."
|
||||
if stability.time:
|
||||
str_time = " " + time + "\n"
|
||||
else:
|
||||
str_time = "\n"
|
||||
print_debug(temp[4][1:-3] + str_fails + str_new_fails + str_new_passes + str_time, False, stability_log)
|
||||
|
||||
def run_special_tests():
|
||||
i = 5
|
||||
|
||||
def validation_run(only, only_targets, reference_branch, number, notify, update, make):
|
||||
if os.environ["ISPC_HOME"] != os.getcwd():
|
||||
error("you ISPC_HOME and your current pass are different!\n", 2)
|
||||
def validation_run(only, only_targets, reference_branch, number, notify, update, speed_number, make, perf_llvm, time):
|
||||
os.chdir(os.environ["ISPC_HOME"])
|
||||
os.environ["PATH"] = os.environ["ISPC_HOME"] + ":" + os.environ["PATH"]
|
||||
if options.notify != "":
|
||||
@@ -322,9 +331,9 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
stability.random = False
|
||||
stability.ispc_flags = ""
|
||||
stability.compiler_exe = None
|
||||
stability.num_jobs = 1024
|
||||
stability.num_jobs = speed_number
|
||||
stability.verbose = False
|
||||
stability.time = False
|
||||
stability.time = time
|
||||
stability.non_interactive = True
|
||||
stability.update = update
|
||||
stability.include_file = None
|
||||
@@ -476,28 +485,36 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
# prepare LLVM 3.3 as newest LLVM
|
||||
need_LLVM = check_LLVM(["3.3"])
|
||||
if len(need_LLVM) != 0:
|
||||
build_LLVM(need_LLVM[i], "", "", "", False, False, False, True, False, make)
|
||||
# prepare reference point. build both test and reference compilers
|
||||
try_do_LLVM("apply git", "git branch", True)
|
||||
temp4 = take_lines("git branch", "all")
|
||||
for line in temp4:
|
||||
if "*" in line:
|
||||
current_branch = line[2:-1]
|
||||
stashing = True
|
||||
sys.stdout.write("Please, don't interrupt script here! You can have not sync git status after interruption!\n")
|
||||
if "No local changes" in take_lines("git stash", "first"):
|
||||
stashing = False
|
||||
#try_do_LLVM("stash current branch ", "git stash", True)
|
||||
try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True)
|
||||
sys.stdout.write(".\n")
|
||||
build_ispc("3.3", make)
|
||||
sys.stdout.write(".\n")
|
||||
os.rename("ispc", "ispc_ref")
|
||||
try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True)
|
||||
if stashing:
|
||||
try_do_LLVM("return current branch ", "git stash pop", True)
|
||||
sys.stdout.write("You can interrupt script now.\n")
|
||||
build_ispc("3.3", make)
|
||||
build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make)
|
||||
if perf_llvm == False:
|
||||
# prepare reference point. build both test and reference compilers
|
||||
try_do_LLVM("apply git", "git branch", True)
|
||||
temp4 = take_lines("git branch", "all")
|
||||
for line in temp4:
|
||||
if "*" in line:
|
||||
current_branch = line[2:-1]
|
||||
stashing = True
|
||||
sys.stdout.write("Please, don't interrupt script here! You can have not sync git status after interruption!\n")
|
||||
if "No local changes" in take_lines("git stash", "first"):
|
||||
stashing = False
|
||||
#try_do_LLVM("stash current branch ", "git stash", True)
|
||||
try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True)
|
||||
sys.stdout.write(".\n")
|
||||
build_ispc("3.3", make)
|
||||
sys.stdout.write(".\n")
|
||||
os.rename("ispc", "ispc_ref")
|
||||
try_do_LLVM("checkout test branch " + current_branch + " ", "git checkout " + current_branch, True)
|
||||
if stashing:
|
||||
try_do_LLVM("return current branch ", "git stash pop", True)
|
||||
sys.stdout.write("You can interrupt script now.\n")
|
||||
build_ispc("3.3", make)
|
||||
else:
|
||||
# build compiler with two different LLVM versions
|
||||
if len(check_LLVM([reference_branch])) != 0:
|
||||
error("you haven't got llvm called " + reference_branch, 1)
|
||||
build_ispc("3.3", make)
|
||||
os.rename("ispc", "ispc_ref")
|
||||
build_ispc(reference_branch, make)
|
||||
# begin validation run for performance. output is inserted into perf()
|
||||
perf.perf(performance, [])
|
||||
if options.notify != "":
|
||||
@@ -560,16 +577,26 @@ def Main():
|
||||
stability_log = os.getcwd() + os.sep + f_date + os.sep + "stability.log"
|
||||
current_path = os.getcwd()
|
||||
make = "make -j" + options.speed
|
||||
if os.environ["ISPC_HOME"] != os.getcwd():
|
||||
error("you ISPC_HOME and your current path are different!\n", 2)
|
||||
if options.perf_llvm == True:
|
||||
if options.branch == "master":
|
||||
options.branch = "trunk"
|
||||
try:
|
||||
start_time = time.time()
|
||||
if options.build_llvm:
|
||||
build_LLVM(options.version, options.revision, options.folder, options.tarball,
|
||||
options.debug, options.selfbuild, options.extra, False, options.force, make)
|
||||
if options.validation_run:
|
||||
validation_run(options.only, options.only_targets, options.branch,
|
||||
options.number_for_performance, options.notify, options.update, make)
|
||||
options.number_for_performance, options.notify, options.update, int(options.speed),
|
||||
make, options.perf_llvm, options.time)
|
||||
elapsed_time = time.time() - start_time
|
||||
if options.time:
|
||||
print_debug("Elapsed time: " + time.strftime('%Hh%Mm%Ssec.', time.gmtime(elapsed_time)) + "\n", False, "")
|
||||
finally:
|
||||
os.chdir(current_path)
|
||||
date_name = "alloy_results_" + datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S')
|
||||
date_name = "alloy_results_" + datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
|
||||
if os.path.exists(date_name):
|
||||
error("It's forbidden to run alloy two times in a second, logs are in ./logs", 1)
|
||||
os.rename(f_date, date_name)
|
||||
@@ -656,11 +683,15 @@ run_group.add_option('--update-errors', dest='update',
|
||||
run_group.add_option('--only-targets', dest='only_targets',
|
||||
help='set list of targets to test. Possible values - all subnames of targets.',
|
||||
default="")
|
||||
run_group.add_option('--time', dest='time',
|
||||
help='display time of testing', default=False, action='store_true')
|
||||
run_group.add_option('--only', dest='only',
|
||||
help='set types of tests. Possible values:\n' +
|
||||
'-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' +
|
||||
'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).',
|
||||
default="")
|
||||
run_group.add_option('--perf_LLVM', dest='perf_llvm',
|
||||
help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true')
|
||||
parser.add_option_group(run_group)
|
||||
# options for activity "setup PATHS"
|
||||
setup_group = OptionGroup(parser, "Options for setup",
|
||||
|
||||
3
ast.cpp
3
ast.cpp
@@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
||||
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
|
||||
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
|
||||
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc,
|
||||
for (int k = 0; k < 3; k++)
|
||||
fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {
|
||||
|
||||
16
builtins.cpp
16
builtins.cpp
@@ -979,6 +979,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
case Target::AVX11: {
|
||||
switch (g->target->getVectorWidth()) {
|
||||
case 4:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_avx11_i64x4_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_avx11_i64x4_64bit);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_avx11_32bit);
|
||||
@@ -1002,6 +1010,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
case Target::AVX2: {
|
||||
switch (g->target->getVectorWidth()) {
|
||||
case 4:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_avx2_i64x4_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_avx2_i64x4_64bit);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_avx2_32bit);
|
||||
|
||||
120
builtins/target-avx11-i64x4.ll
Normal file
120
builtins/target-avx11-i64x4.ll
Normal file
@@ -0,0 +1,120 @@
|
||||
;; Copyright (c) 2013, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx1-i64x4base.ll')
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
|
||||
`rdrand_definition()')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(i8)
|
||||
gen_gather(i16)
|
||||
gen_gather(i32)
|
||||
gen_gather(float)
|
||||
gen_gather(i64)
|
||||
gen_gather(double)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
define(`expand_4to8', `
|
||||
%$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
define(`extract_4from8', `
|
||||
%$3 = shufflevector <8 x $1> %$2, <8 x $1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @__half_to_float_varying(<4 x i16> %v4) nounwind readnone {
|
||||
expand_4to8(i16, v4, v)
|
||||
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||
extract_4from8(float, r, ret)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x i16> @__float_to_half_varying(<4 x float> %v4) nounwind readnone {
|
||||
expand_4to8(float, v4, v)
|
||||
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||
extract_4from8(i16, r, ret)
|
||||
ret <4 x i16> %ret
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
355
builtins/target-avx2-i64x4.ll
Normal file
355
builtins/target-avx2-i64x4.ll
Normal file
@@ -0,0 +1,355 @@
|
||||
;; Copyright (c) 2013, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1', `',
|
||||
`define(`HAVE_GATHER', `1')')
|
||||
|
||||
include(`target-avx1-i64x4base.ll')
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
|
||||
`rdrand_definition()')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
;; declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
;; declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readonly
|
||||
|
||||
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
;; declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readonly
|
||||
;; declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readonly
|
||||
|
||||
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %m
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
|
||||
|
||||
define(`expand_4to8', `
|
||||
%$3 = shufflevector <4 x $1> %$2, <4 x $1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
define(`extract_4from8', `
|
||||
%$3 = shufflevector <8 x $1> %$2, <8 x $1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @__half_to_float_varying(<4 x i16> %v4) nounwind readnone {
|
||||
expand_4to8(i16, v4, v)
|
||||
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||
extract_4from8(float, r, ret)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x i16> @__float_to_half_varying(<4 x float> %v4) nounwind readnone {
|
||||
expand_4to8(float, v4, v)
|
||||
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||
extract_4from8(i16, r, ret)
|
||||
ret <4 x i16> %ret
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
declare void @llvm.trap() noreturn nounwind
|
||||
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1', `
|
||||
gen_gather_factored(i8)
|
||||
gen_gather_factored(i16)
|
||||
gen_gather_factored(i32)
|
||||
gen_gather_factored(float)
|
||||
gen_gather_factored(i64)
|
||||
gen_gather_factored(double)', `
|
||||
|
||||
gen_gather(i8)
|
||||
gen_gather(i16)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int32 gathers
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %target, i8 * %ptr,
|
||||
<4 x i32> %indices, <4 x i32> %mask, i8 %scale) readonly nounwind
|
||||
declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %target, i8 * %ptr,
|
||||
<4 x i64> %indices, <4 x i32> %mask, i8 %scale) readonly nounwind
|
||||
|
||||
define <4 x i32> @__gather_base_offsets32_i32(i8 * %ptr,
|
||||
i32 %scale, <4 x i32> %offsets,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
|
||||
%v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8 * %ptr,
|
||||
<4 x i32> %offsets, <4 x i32> %vecmask, i8 %scale8)
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @__gather_base_offsets64_i32(i8 * %ptr,
|
||||
i32 %scale, <4 x i64> %offsets,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
|
||||
%v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8 * %ptr,
|
||||
<4 x i64> %offsets, <4 x i32> %vecmask, i8 %scale8)
|
||||
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @__gather32_i32(<4 x i32> %ptrs,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
|
||||
%v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8 * null,
|
||||
<4 x i32> %ptrs, <4 x i32> %vecmask, i8 1)
|
||||
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @__gather64_i32(<4 x i64> %ptrs,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
|
||||
%v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8 * null,
|
||||
<4 x i64> %ptrs, <4 x i32> %vecmask, i8 1)
|
||||
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float gathers
|
||||
|
||||
declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %target, i8 * %ptr,
|
||||
<4 x i32> %indices, <4 x float> %mask, i8 %scale8) readonly nounwind
|
||||
declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %target, i8 * %ptr,
|
||||
<4 x i64> %indices, <4 x float> %mask, i8 %scale8) readonly nounwind
|
||||
|
||||
define <4 x float> @__gather_base_offsets32_float(i8 * %ptr,
|
||||
i32 %scale, <4 x i32> %offsets,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
%mask = bitcast <4 x i32> %vecmask to <4 x float>
|
||||
|
||||
%v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8 * %ptr,
|
||||
<4 x i32> %offsets, <4 x float> %mask, i8 %scale8)
|
||||
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x float> @__gather_base_offsets64_float(i8 * %ptr,
|
||||
i32 %scale, <4 x i64> %offsets,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
%mask = bitcast <4 x i32> %vecmask to <4 x float>
|
||||
|
||||
%v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8 * %ptr,
|
||||
<4 x i64> %offsets, <4 x float> %mask, i8 %scale8)
|
||||
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x float> @__gather32_float(<4 x i32> %ptrs,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
%mask = bitcast <4 x i32> %vecmask to <4 x float>
|
||||
|
||||
%v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8 * null,
|
||||
<4 x i32> %ptrs, <4 x float> %mask, i8 1)
|
||||
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x float> @__gather64_float(<4 x i64> %ptrs,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%vecmask = trunc <4 x i64> %vecmask64 to <4 x i32>
|
||||
%mask = bitcast <4 x i32> %vecmask to <4 x float>
|
||||
|
||||
%v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8 * null,
|
||||
<4 x i64> %ptrs, <4 x float> %mask, i8 1)
|
||||
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int64 gathers
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %target, i8 * %ptr,
|
||||
<4 x i32> %indices, <4 x i64> %mask, i8 %scale) readonly nounwind
|
||||
declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %target, i8 * %ptr,
|
||||
<4 x i64> %indices, <4 x i64> %mask, i8 %scale) readonly nounwind
|
||||
|
||||
define <4 x i64> @__gather_base_offsets32_i64(i8 * %ptr,
|
||||
i32 %scale, <4 x i32> %offsets,
|
||||
<4 x i64> %vecmask) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
|
||||
%v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8 * %ptr,
|
||||
<4 x i32> %offsets, <4 x i64> %vecmask, i8 %scale8)
|
||||
|
||||
ret <4 x i64> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x i64> @__gather_base_offsets64_i64(i8 * %ptr,
|
||||
i32 %scale, <4 x i64> %offsets,
|
||||
<4 x i64> %vecmask) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
|
||||
%v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8 * %ptr,
|
||||
<4 x i64> %offsets, <4 x i64> %vecmask, i8 %scale8)
|
||||
|
||||
ret <4 x i64> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x i64> @__gather32_i64(<4 x i32> %ptrs,
|
||||
<4 x i64> %vecmask) nounwind readonly alwaysinline {
|
||||
|
||||
%v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8 * null,
|
||||
<4 x i32> %ptrs, <4 x i64> %vecmask, i8 1)
|
||||
ret <4 x i64> %v
|
||||
}
|
||||
|
||||
|
||||
define <4 x i64> @__gather64_i64(<4 x i64> %ptrs,
|
||||
<4 x i64> %vecmask) nounwind readonly alwaysinline {
|
||||
%v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8 * null,
|
||||
<4 x i64> %ptrs, <4 x i64> %vecmask, i8 1)
|
||||
ret <4 x i64> %v
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double gathers
|
||||
|
||||
declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %target, i8 * %ptr,
|
||||
<4 x i64> %indices, <4 x double> %mask, i8 %scale) readonly nounwind
|
||||
declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %target, i8 * %ptr,
|
||||
<4 x i32> %indices, <4 x double> %mask, i8 %scale) readonly nounwind
|
||||
|
||||
define <4 x double> @__gather_base_offsets32_double(i8 * %ptr,
|
||||
i32 %scale, <4 x i32> %offsets,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
|
||||
|
||||
%v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8 * %ptr,
|
||||
<4 x i32> %offsets, <4 x double> %vecmask, i8 %scale8)
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
||||
define <4 x double> @__gather_base_offsets64_double(i8 * %ptr,
|
||||
i32 %scale, <4 x i64> %offsets,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%scale8 = trunc i32 %scale to i8
|
||||
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
|
||||
|
||||
%v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8 * %ptr,
|
||||
<4 x i64> %offsets, <4 x double> %vecmask, i8 %scale8)
|
||||
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
||||
define <4 x double> @__gather32_double(<4 x i32> %ptrs,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
|
||||
|
||||
%v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8 * null,
|
||||
<4 x i32> %ptrs, <4 x double> %vecmask, i8 1)
|
||||
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
||||
define <4 x double> @__gather64_double(<4 x i64> %ptrs,
|
||||
<4 x i64> %vecmask64) nounwind readonly alwaysinline {
|
||||
%vecmask = bitcast <4 x i64> %vecmask64 to <4 x double>
|
||||
|
||||
%v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8 * null,
|
||||
<4 x i64> %ptrs, <4 x double> %vecmask, i8 1)
|
||||
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
||||
')
|
||||
@@ -1813,7 +1813,7 @@ define(`stdlib_core', `
|
||||
declare i32 @__fast_masked_vload()
|
||||
|
||||
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
|
||||
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind
|
||||
declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind
|
||||
declare void @ISPCSync(i8*) nounwind
|
||||
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
||||
|
||||
|
||||
12
cbackend.cpp
12
cbackend.cpp
@@ -558,8 +558,15 @@ char CWriter::ID = 0;
|
||||
static std::string CBEMangle(const std::string &S) {
|
||||
std::string Result;
|
||||
|
||||
for (unsigned i = 0, e = S.size(); i != e; ++i)
|
||||
if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') {
|
||||
for (unsigned i = 0, e = S.size(); i != e; ++i) {
|
||||
if (i+1 != e && ((S[i] == '>' && S[i+1] == '>') ||
|
||||
(S[i] == '<' && S[i+1] == '<'))) {
|
||||
Result += '_';
|
||||
Result += 'A'+(S[i]&15);
|
||||
Result += 'A'+((S[i]>>4)&15);
|
||||
Result += '_';
|
||||
i++;
|
||||
} else if (isalnum(S[i]) || S[i] == '_' || S[i] == '<' || S[i] == '>') {
|
||||
Result += S[i];
|
||||
} else {
|
||||
Result += '_';
|
||||
@@ -567,6 +574,7 @@ static std::string CBEMangle(const std::string &S) {
|
||||
Result += 'A'+((S[i]>>4)&15);
|
||||
Result += '_';
|
||||
}
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
|
||||
6
ctx.cpp
6
ctx.cpp
@@ -3502,7 +3502,7 @@ FunctionEmitContext::ReturnInst() {
|
||||
llvm::Value *
|
||||
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount) {
|
||||
llvm::Value *launchCount[3]){
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
@@ -3563,7 +3563,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
args.push_back(launchGroupHandlePtr);
|
||||
args.push_back(fptr);
|
||||
args.push_back(voidmem);
|
||||
args.push_back(launchCount);
|
||||
args.push_back(launchCount[0]);
|
||||
args.push_back(launchCount[1]);
|
||||
args.push_back(launchCount[2]);
|
||||
return CallInst(flaunch, NULL, args, "");
|
||||
}
|
||||
|
||||
|
||||
2
ctx.h
2
ctx.h
@@ -542,7 +542,7 @@ public:
|
||||
he given argument values. */
|
||||
llvm::Value *LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount);
|
||||
llvm::Value *launchCount[3]);
|
||||
|
||||
void SyncInst();
|
||||
|
||||
|
||||
@@ -2009,43 +2009,37 @@ static FORCEINLINE void __masked_store_i16(void *p, __vec16_i16 val,
|
||||
ptr[i] = val[i];
|
||||
}
|
||||
|
||||
static FORCEINLINE void __masked_store_i32(void *p, __vec16_i32 val, __vec16_i1 mask)
|
||||
static FORCEINLINE void __masked_store_i32(void *p, const __vec16_i32 val, const __vec16_i1 mask)
|
||||
{
|
||||
#ifdef ISPC_FORCE_ALIGNED_MEMORY
|
||||
_mm512_mask_store_epi32(p, mask, val.v);
|
||||
#else
|
||||
__vec16_i32 tmp;
|
||||
tmp.v = _mm512_extloadunpacklo_epi32(tmp.v, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
tmp.v = _mm512_extloadunpackhi_epi32(tmp.v, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
tmp.v = _mm512_mask_mov_epi32(tmp.v, mask, val.v);
|
||||
_mm512_extpackstorelo_epi32(p, tmp.v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
_mm512_extpackstorehi_epi32((uint8_t*)p+64, tmp.v, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
_mm512_mask_i32extscatter_epi32(p, mask, __ispc_stride1, val, _MM_DOWNCONV_EPI32_NONE, _MM_SCALE_4, _MM_HINT_NONE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCEINLINE void __masked_store_float(void *p, __vec16_f val, __vec16_i1 mask)
|
||||
static FORCEINLINE void __masked_store_float(void *p, const __vec16_f val, const __vec16_i1 mask)
|
||||
{
|
||||
#ifdef ISPC_FORCE_ALIGNED_MEMORY
|
||||
_mm512_mask_store_ps(p, mask, val.v);
|
||||
#else
|
||||
__vec16_f tmp;
|
||||
tmp.v = _mm512_extloadunpacklo_ps(tmp.v, p, _MM_UPCONV_PS_NONE, _MM_HINT_NONE);
|
||||
tmp.v = _mm512_extloadunpackhi_ps(tmp.v, (uint8_t*)p+64, _MM_UPCONV_PS_NONE, _MM_HINT_NONE);
|
||||
tmp.v = _mm512_mask_mov_ps(tmp.v, mask, val.v);
|
||||
_mm512_extpackstorelo_ps(p, tmp.v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE);
|
||||
_mm512_extpackstorehi_ps((uint8_t*)p+64, tmp.v, _MM_DOWNCONV_PS_NONE, _MM_HINT_NONE);
|
||||
_mm512_mask_i32extscatter_ps(p, mask, __ispc_stride1, val, _MM_DOWNCONV_PS_NONE, _MM_SCALE_4, _MM_HINT_NONE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCEINLINE void __masked_store_i64(void *p, __vec16_i64 val,
|
||||
__vec16_i1 mask) {
|
||||
int64_t *ptr = (int64_t *)p;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
if ((mask.v & (1 << i)) != 0)
|
||||
ptr[i] = val[i];
|
||||
static FORCEINLINE void __masked_store_i64(void *p, const __vec16_i64 val, const __vec16_i1 mask) {
|
||||
#ifdef ISPC_FORCE_ALIGNED_MEMORY
|
||||
__vec16_i1 tmp_m = mask;
|
||||
tmp_m = _mm512_kswapb(tmp_m, tmp_m);
|
||||
_mm512_mask_store_epi64(p, mask, val.v1);
|
||||
_mm512_mask_store_epi64((uint8_t*)p+64, tmp_m, val.v2);
|
||||
#else
|
||||
_mm512_mask_i32loextscatter_epi64( p, mask, __ispc_stride1, val.v1, _MM_DOWNCONV_EPI64_NONE, _MM_SCALE_8, _MM_HINT_NONE);
|
||||
_mm512_mask_i32loextscatter_epi64((int64_t*)p+8, _mm512_kswapb(mask,mask), __ispc_stride1, val.v2, _MM_DOWNCONV_EPI64_NONE, _MM_SCALE_8, _MM_HINT_NONE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCEINLINE void __masked_store_double(void *p, __vec16_d val, __vec16_i1 mask)
|
||||
static FORCEINLINE void __masked_store_double(void *p, const __vec16_d val, const __vec16_i1 mask)
|
||||
{
|
||||
#ifdef ISPC_FORCE_ALIGNED_MEMORY
|
||||
__vec16_i1 tmp_m = mask;
|
||||
@@ -2053,19 +2047,8 @@ static FORCEINLINE void __masked_store_double(void *p, __vec16_d val, __vec16_i1
|
||||
_mm512_mask_store_pd(p, mask, val.v1);
|
||||
_mm512_mask_store_pd((uint8_t*)p+64, tmp_m, val.v2);
|
||||
#else
|
||||
__vec16_d tmp;
|
||||
__vec16_i1 tmp_m = mask;
|
||||
tmp_m = _mm512_kswapb(tmp_m, tmp_m);
|
||||
tmp.v1 = _mm512_extloadunpacklo_pd(tmp.v1, p, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
|
||||
tmp.v1 = _mm512_extloadunpackhi_pd(tmp.v1, (uint8_t*)p+64, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
|
||||
tmp.v2 = _mm512_extloadunpacklo_pd(tmp.v2, (uint8_t*)p+64, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
|
||||
tmp.v2 = _mm512_extloadunpackhi_pd(tmp.v2, (uint8_t*)p+128, _MM_UPCONV_PD_NONE, _MM_HINT_NONE);
|
||||
tmp.v1 = _mm512_mask_mov_pd(tmp.v1, mask, val.v1);
|
||||
tmp.v2 = _mm512_mask_mov_pd(tmp.v2, tmp_m, val.v2);
|
||||
_mm512_extpackstorelo_pd(p, tmp.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
|
||||
_mm512_extpackstorehi_pd((uint8_t*)p+64, tmp.v1, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
|
||||
_mm512_extpackstorelo_pd((uint8_t*)p+64, tmp.v2, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
|
||||
_mm512_extpackstorehi_pd((uint8_t*)p+128, tmp.v2, _MM_DOWNCONV_PD_NONE, _MM_HINT_NONE);
|
||||
_mm512_mask_i32loextscatter_pd( p, mask, __ispc_stride1, val.v1, _MM_DOWNCONV_PD_NONE, _MM_SCALE_8, _MM_HINT_NONE);
|
||||
_mm512_mask_i32loextscatter_pd((double*)p+8, _mm512_kswapb(mask,mask), __ispc_stride1, val.v2, _MM_DOWNCONV_PD_NONE, _MM_SCALE_8, _MM_HINT_NONE);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
2
examples/mandelbrot_tasks3d/.gitignore
vendored
Normal file
2
examples/mandelbrot_tasks3d/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
mandelbrot
|
||||
*.ppm
|
||||
8
examples/mandelbrot_tasks3d/Makefile
Normal file
8
examples/mandelbrot_tasks3d/Makefile
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
EXAMPLE=mandelbrot_tasks3d
|
||||
CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp
|
||||
ISPC_SRC=mandelbrot_tasks3d.ispc
|
||||
ISPC_IA_TARGETS=avx,sse2,sse4
|
||||
ISPC_ARM_TARGETS=neon
|
||||
|
||||
include ../common.mk
|
||||
180
examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj
Normal file
180
examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj
Normal file
@@ -0,0 +1,180 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>mandelbrot_tasks</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
<TargetName>mandelbrot_tasks</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
<TargetName>mandelbrot_tasks</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
<TargetName>mandelbrot_tasks</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
<TargetName>mandelbrot_tasks</TargetName>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="mandelbrot_tasks.cpp" />
|
||||
<ClCompile Include="mandelbrot_tasks_serial.cpp" />
|
||||
<ClCompile Include="../tasksys.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="mandelbrot_tasks.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
146
examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp
Normal file
146
examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp
Normal file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include "../timing.h"
|
||||
#include "mandelbrot_tasks3d_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
int output[]);
|
||||
|
||||
/* Write a PPM image file with the image of the Mandelbrot set */
|
||||
static void
|
||||
writePPM(int *buf, int width, int height, const char *fn) {
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fprintf(fp, "P6\n");
|
||||
fprintf(fp, "%d %d\n", width, height);
|
||||
fprintf(fp, "255\n");
|
||||
for (int i = 0; i < width*height; ++i) {
|
||||
// Map the iteration count to colors by just alternating between
|
||||
// two greys.
|
||||
char c = (buf[i] & 0x1) ? 240 : 20;
|
||||
for (int j = 0; j < 3; ++j)
|
||||
fputc(c, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
printf("Wrote image file %s\n", fn);
|
||||
}
|
||||
|
||||
|
||||
static void usage() {
|
||||
fprintf(stderr, "usage: mandelbrot [--scale=<factor>]\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
unsigned int width = 1536;
|
||||
unsigned int height = 1024;
|
||||
float x0 = -2;
|
||||
float x1 = 1;
|
||||
float y0 = -1;
|
||||
float y1 = 1;
|
||||
|
||||
if (argc == 1)
|
||||
;
|
||||
else if (argc == 2) {
|
||||
if (strncmp(argv[1], "--scale=", 8) == 0) {
|
||||
float scale = atof(argv[1] + 8);
|
||||
if (scale == 0.f)
|
||||
usage();
|
||||
width *= scale;
|
||||
height *= scale;
|
||||
// round up to multiples of 16
|
||||
width = (width + 0xf) & ~0xf;
|
||||
height = (height + 0xf) & ~0xf;
|
||||
}
|
||||
else
|
||||
usage();
|
||||
}
|
||||
else
|
||||
usage();
|
||||
|
||||
int maxIterations = 512;
|
||||
int *buf = new int[width*height];
|
||||
|
||||
//
|
||||
// Compute the image using the ispc implementation; report the minimum
|
||||
// time of three runs.
|
||||
//
|
||||
double minISPC = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
reset_and_start_timer();
|
||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minISPC = std::min(minISPC, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
|
||||
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
||||
|
||||
|
||||
//
|
||||
// And run the serial implementation 3 times, again reporting the
|
||||
// minimum time.
|
||||
//
|
||||
double minSerial = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
reset_and_start_timer();
|
||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minSerial = std::min(minSerial, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial);
|
||||
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
99
examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc
Normal file
99
examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
static inline int
|
||||
mandel(float c_re, float c_im, int count) {
|
||||
float z_re = c_re, z_im = c_im;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
if (z_re * z_re + z_im * z_im > 4.)
|
||||
break;
|
||||
|
||||
float new_re = z_re*z_re - z_im*z_im;
|
||||
float new_im = 2.f * z_re * z_im;
|
||||
unmasked {
|
||||
z_re = c_re + new_re;
|
||||
z_im = c_im + new_im;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/* Task to compute the Mandelbrot iterations for a single scanline.
|
||||
*/
|
||||
task void
|
||||
mandelbrot_scanline(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int xspan, uniform int yspan,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
const uniform int xstart = taskIndex0 * xspan;
|
||||
const uniform int xend = min(xstart + xspan, width);
|
||||
|
||||
const uniform int ystart = taskIndex1 * yspan;
|
||||
const uniform int yend = min(ystart + yspan, height);
|
||||
|
||||
|
||||
foreach (yi = ystart ... yend, xi = xstart ... xend) {
|
||||
float x = x0 + xi * dx;
|
||||
float y = y0 + yi * dy;
|
||||
|
||||
int index = yi * width + xi;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#if 1
|
||||
export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */
|
||||
const uniform int yspan = 16;
|
||||
|
||||
|
||||
#if 1
|
||||
launch [width/xspan, height/yspan]
|
||||
#else
|
||||
launch [height/yspan][width/xspan]
|
||||
#endif
|
||||
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
|
||||
maxIterations, output);
|
||||
}
|
||||
#endif
|
||||
68
examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp
Normal file
68
examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
static int mandel(float c_re, float c_im, int count) {
|
||||
float z_re = c_re, z_im = c_im;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
if (z_re * z_re + z_im * z_im > 4.f)
|
||||
break;
|
||||
|
||||
float new_re = z_re*z_re - z_im*z_im;
|
||||
float new_im = 2.f * z_re * z_im;
|
||||
z_re = c_re + new_re;
|
||||
z_im = c_im + new_im;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
int output[])
|
||||
{
|
||||
float dx = (x1 - x0) / width;
|
||||
float dy = (y1 - y0) / height;
|
||||
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int i = 0; i < width; ++i) {
|
||||
float x = x0 + i * dx;
|
||||
float y = y0 + j * dy;
|
||||
|
||||
int index = (j * width + i);
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,21 +170,41 @@
|
||||
|
||||
// Signature of ispc-generated 'task' functions
|
||||
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
||||
int taskIndex, int taskCount);
|
||||
int taskIndex, int taskCount,
|
||||
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||
int taskCount0, int taskCount1, int taskCount2);
|
||||
|
||||
// Small structure used to hold the data for each task
|
||||
struct TaskInfo {
|
||||
TaskFuncType func;
|
||||
void *data;
|
||||
int taskIndex, taskCount;
|
||||
int taskIndex;
|
||||
int taskCount3d[3];
|
||||
#if defined(ISPC_IS_WINDOWS)
|
||||
event taskEvent;
|
||||
#endif
|
||||
};
|
||||
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||
int taskIndex0() const
|
||||
{
|
||||
return taskIndex % taskCount3d[0];
|
||||
}
|
||||
int taskIndex1() const
|
||||
{
|
||||
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
|
||||
}
|
||||
int taskIndex2() const
|
||||
{
|
||||
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
|
||||
}
|
||||
int taskCount0() const { return taskCount3d[0]; }
|
||||
int taskCount1() const { return taskCount3d[1]; }
|
||||
int taskCount2() const { return taskCount3d[2]; }
|
||||
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
|
||||
} __attribute__((aligned(32)));
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int count);
|
||||
void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz);
|
||||
void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment);
|
||||
void ISPCSync(void *handle);
|
||||
}
|
||||
@@ -518,7 +538,9 @@ lRunTask(void *ti) {
|
||||
|
||||
// Actually run the task
|
||||
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
||||
taskInfo->taskIndex, taskInfo->taskCount);
|
||||
taskInfo->taskIndex, taskInfo->taskCount(),
|
||||
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
|
||||
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
|
||||
}
|
||||
|
||||
|
||||
@@ -559,7 +581,9 @@ lRunTask(LPVOID param) {
|
||||
// will cause bugs in code that uses those.
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
|
||||
// Signal the event that this task is done
|
||||
ti->taskEvent.set();
|
||||
@@ -660,7 +684,9 @@ lTaskEntry(void *arg) {
|
||||
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
||||
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
||||
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
||||
myTask->taskCount);
|
||||
myTask->taskCount(),
|
||||
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||
|
||||
//
|
||||
// Decrement the "number of unfinished tasks" counter in the task
|
||||
@@ -861,7 +887,9 @@ TaskGroup::Sync() {
|
||||
// Do work for _myTask_
|
||||
//
|
||||
// FIXME: bogus values for thread index/thread count here as well..
|
||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
|
||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
|
||||
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||
|
||||
//
|
||||
// Decrement the number of unfinished tasks counter
|
||||
@@ -891,7 +919,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
|
||||
// Actually run the task.
|
||||
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -920,7 +950,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// Actually run the task.
|
||||
int threadIndex = omp_get_thread_num();
|
||||
int threadCount = omp_get_num_threads();
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -951,7 +983,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -978,7 +1012,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1031,7 +1067,8 @@ FreeTaskGroup(TaskGroup *tg) {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
|
||||
const int count = count0*count1*count2;
|
||||
TaskGroup *taskGroup;
|
||||
if (*taskGroupPtr == NULL) {
|
||||
InitTaskSystem();
|
||||
@@ -1047,7 +1084,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
||||
ti->func = (TaskFuncType)func;
|
||||
ti->data = data;
|
||||
ti->taskIndex = i;
|
||||
ti->taskCount = count;
|
||||
ti->taskCount3d[0] = count0;
|
||||
ti->taskCount3d[1] = count1;
|
||||
ti->taskCount3d[2] = count2;
|
||||
}
|
||||
taskGroup->Launch(baseIndex, count);
|
||||
}
|
||||
|
||||
164
expr.cpp
164
expr.cpp
@@ -1660,6 +1660,64 @@ BinaryExpr::BinaryExpr(Op o, Expr *a, Expr *b, SourcePos p)
|
||||
arg1 = b;
|
||||
}
|
||||
|
||||
Expr *lCreateBinaryOperatorCall(const BinaryExpr::Op bop,
|
||||
Expr *a0, Expr *a1,
|
||||
const SourcePos &sp)
|
||||
{
|
||||
if ((a0 == NULL) || (a1 == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
Expr *arg0 = a0->TypeCheck();
|
||||
Expr *arg1 = a1->TypeCheck();
|
||||
if ((arg0 == NULL) || (arg1 == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
const Type *type0 = arg0->GetType();
|
||||
const Type *type1 = arg1->GetType();
|
||||
|
||||
// If either operand is a reference, dereference it before we move
|
||||
// forward
|
||||
if (CastType<ReferenceType>(type0) != NULL) {
|
||||
arg0 = new RefDerefExpr(arg0, arg0->pos);
|
||||
type0 = arg0->GetType();
|
||||
}
|
||||
if (CastType<ReferenceType>(type1) != NULL) {
|
||||
arg1 = new RefDerefExpr(arg1, arg1->pos);
|
||||
type1 = arg1->GetType();
|
||||
}
|
||||
if ((type0 == NULL) || (type1 == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
if (CastType<StructType>(type0) != NULL ||
|
||||
CastType<StructType>(type1) != NULL) {
|
||||
std::string opName = std::string("operator") + lOpString(bop);
|
||||
std::vector<Symbol *> funs;
|
||||
m->symbolTable->LookupFunction(opName.c_str(), &funs);
|
||||
if (funs.size() == 0) {
|
||||
Error(sp, "operator %s(%s, %s) is not defined.",
|
||||
opName.c_str(), (type0->GetString()).c_str(), (type1->GetString()).c_str());
|
||||
return NULL;
|
||||
}
|
||||
Expr *func = new FunctionSymbolExpr(opName.c_str(), funs, sp);
|
||||
ExprList *args = new ExprList(sp);
|
||||
args->exprs.push_back(arg0);
|
||||
args->exprs.push_back(arg1);
|
||||
Expr *opCallExpr = new FunctionCallExpr(func, args, sp);
|
||||
return opCallExpr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
Expr * MakeBinaryExpr(BinaryExpr::Op o, Expr *a, Expr *b, SourcePos p) {
|
||||
Expr * op = lCreateBinaryOperatorCall(o, a, b, p);
|
||||
if (op != NULL) {
|
||||
return op;
|
||||
}
|
||||
op = new BinaryExpr(o, a, b, p);
|
||||
return op;
|
||||
}
|
||||
|
||||
|
||||
/** Emit code for a && or || logical operator. In particular, the code
|
||||
here handles "short-circuit" evaluation, where the second expression
|
||||
@@ -2985,29 +3043,10 @@ AssignExpr::TypeCheck() {
|
||||
if (lvalueIsReference)
|
||||
lvalue = new RefDerefExpr(lvalue, lvalue->pos);
|
||||
|
||||
FunctionSymbolExpr *fse;
|
||||
if ((fse = dynamic_cast<FunctionSymbolExpr *>(rvalue)) != NULL) {
|
||||
// Special case to use the type of the LHS to resolve function
|
||||
// overloads when we're assigning a function pointer where the
|
||||
// function is overloaded.
|
||||
const Type *lvalueType = lvalue->GetType();
|
||||
const FunctionType *ftype;
|
||||
if (CastType<PointerType>(lvalueType) == NULL ||
|
||||
(ftype = CastType<FunctionType>(lvalueType->GetBaseType())) == NULL) {
|
||||
Error(lvalue->pos, "Can't assign function pointer to type \"%s\".",
|
||||
lvalueType ? lvalueType->GetString().c_str() : "<unknown>");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::vector<const Type *> paramTypes;
|
||||
for (int i = 0; i < ftype->GetNumParameters(); ++i)
|
||||
paramTypes.push_back(ftype->GetParameterType(i));
|
||||
|
||||
if (!fse->ResolveOverloads(rvalue->pos, paramTypes)) {
|
||||
Error(pos, "Unable to find overloaded function for function "
|
||||
"pointer assignment.");
|
||||
return NULL;
|
||||
}
|
||||
if (PossiblyResolveFunctionOverloads(rvalue, lvalue->GetType()) == false) {
|
||||
Error(pos, "Unable to find overloaded function for function "
|
||||
"pointer assignment.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Type *lhsType = lvalue->GetType();
|
||||
@@ -3501,11 +3540,13 @@ SelectExpr::Print() const {
|
||||
// FunctionCallExpr
|
||||
|
||||
FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p,
|
||||
bool il, Expr *lce)
|
||||
bool il, Expr *lce[3])
|
||||
: Expr(p), isLaunch(il) {
|
||||
func = f;
|
||||
args = a;
|
||||
launchCountExpr = lce;
|
||||
launchCountExpr[0] = lce[0];
|
||||
launchCountExpr[1] = lce[1];
|
||||
launchCountExpr[2] = lce[2];
|
||||
}
|
||||
|
||||
|
||||
@@ -3623,9 +3664,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
llvm::Value *retVal = NULL;
|
||||
ctx->SetDebugPos(pos);
|
||||
if (ft->isTask) {
|
||||
AssertPos(pos, launchCountExpr != NULL);
|
||||
llvm::Value *launchCount = launchCountExpr->GetValue(ctx);
|
||||
if (launchCount != NULL)
|
||||
AssertPos(pos, launchCountExpr[0] != NULL);
|
||||
llvm::Value *launchCount[3] =
|
||||
{ launchCountExpr[0]->GetValue(ctx),
|
||||
launchCountExpr[1]->GetValue(ctx),
|
||||
launchCountExpr[2]->GetValue(ctx) };
|
||||
|
||||
if (launchCount[0] != NULL)
|
||||
ctx->LaunchInst(callee, argVals, launchCount);
|
||||
}
|
||||
else
|
||||
@@ -3652,8 +3697,35 @@ FunctionCallExpr::GetLValue(FunctionEmitContext *ctx) const {
|
||||
}
|
||||
|
||||
|
||||
bool FullResolveOverloads(Expr * func, ExprList * args,
|
||||
std::vector<const Type *> *argTypes,
|
||||
std::vector<bool> *argCouldBeNULL,
|
||||
std::vector<bool> *argIsConstant) {
|
||||
for (unsigned int i = 0; i < args->exprs.size(); ++i) {
|
||||
Expr *expr = args->exprs[i];
|
||||
if (expr == NULL)
|
||||
return false;
|
||||
const Type *t = expr->GetType();
|
||||
if (t == NULL)
|
||||
return false;
|
||||
argTypes->push_back(t);
|
||||
argCouldBeNULL->push_back(lIsAllIntZeros(expr) || dynamic_cast<NullPointerExpr *>(expr));
|
||||
argIsConstant->push_back(dynamic_cast<ConstExpr *>(expr) || dynamic_cast<NullPointerExpr *>(expr));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
FunctionCallExpr::GetType() const {
|
||||
std::vector<const Type *> argTypes;
|
||||
std::vector<bool> argCouldBeNULL, argIsConstant;
|
||||
if (FullResolveOverloads(func, args, &argTypes, &argCouldBeNULL, &argIsConstant) == true) {
|
||||
FunctionSymbolExpr *fse = dynamic_cast<FunctionSymbolExpr *>(func);
|
||||
if (fse != NULL) {
|
||||
fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL, &argIsConstant);
|
||||
}
|
||||
}
|
||||
const FunctionType *ftype = lGetFunctionType(func);
|
||||
return ftype ? ftype->GetReturnType() : NULL;
|
||||
}
|
||||
@@ -3689,20 +3761,9 @@ FunctionCallExpr::TypeCheck() {
|
||||
|
||||
std::vector<const Type *> argTypes;
|
||||
std::vector<bool> argCouldBeNULL, argIsConstant;
|
||||
for (unsigned int i = 0; i < args->exprs.size(); ++i) {
|
||||
Expr *expr = args->exprs[i];
|
||||
|
||||
if (expr == NULL)
|
||||
return NULL;
|
||||
const Type *t = expr->GetType();
|
||||
if (t == NULL)
|
||||
return NULL;
|
||||
|
||||
argTypes.push_back(t);
|
||||
argCouldBeNULL.push_back(lIsAllIntZeros(expr) ||
|
||||
dynamic_cast<NullPointerExpr *>(expr));
|
||||
argIsConstant.push_back(dynamic_cast<ConstExpr *>(expr) ||
|
||||
dynamic_cast<NullPointerExpr *>(expr));
|
||||
if (FullResolveOverloads(func, args, &argTypes, &argCouldBeNULL, &argIsConstant) == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FunctionSymbolExpr *fse = dynamic_cast<FunctionSymbolExpr *>(func);
|
||||
@@ -3732,14 +3793,17 @@ FunctionCallExpr::TypeCheck() {
|
||||
if (!isLaunch)
|
||||
Error(pos, "\"launch\" expression needed to call function "
|
||||
"with \"task\" qualifier.");
|
||||
if (!launchCountExpr)
|
||||
for (int k = 0; k < 3; k++)
|
||||
{
|
||||
if (!launchCountExpr[k])
|
||||
return NULL;
|
||||
|
||||
launchCountExpr =
|
||||
TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32,
|
||||
"task launch count");
|
||||
if (launchCountExpr == NULL)
|
||||
launchCountExpr[k] =
|
||||
TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32,
|
||||
"task launch count");
|
||||
if (launchCountExpr[k] == NULL)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (isLaunch) {
|
||||
@@ -3747,7 +3811,7 @@ FunctionCallExpr::TypeCheck() {
|
||||
"qualified function.");
|
||||
return NULL;
|
||||
}
|
||||
AssertPos(pos, launchCountExpr == NULL);
|
||||
AssertPos(pos, launchCountExpr[0] == NULL);
|
||||
}
|
||||
}
|
||||
else {
|
||||
@@ -7010,7 +7074,8 @@ TypeCastExpr::GetLValue(FunctionEmitContext *ctx) const {
|
||||
|
||||
const Type *
|
||||
TypeCastExpr::GetType() const {
|
||||
AssertPos(pos, type->HasUnboundVariability() == false);
|
||||
// We have to switch off this assert after supporting of operators.
|
||||
//AssertPos(pos, type->HasUnboundVariability() == false);
|
||||
return type;
|
||||
}
|
||||
|
||||
@@ -8190,6 +8255,9 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos,
|
||||
const std::vector<bool> *argCouldBeNULL,
|
||||
const std::vector<bool> *argIsConstant) {
|
||||
const char *funName = candidateFunctions.front()->name.c_str();
|
||||
if (triedToResolve == true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
triedToResolve = true;
|
||||
|
||||
|
||||
7
expr.h
7
expr.h
@@ -246,7 +246,8 @@ public:
|
||||
class FunctionCallExpr : public Expr {
|
||||
public:
|
||||
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p,
|
||||
bool isLaunch = false, Expr *launchCountExpr = NULL);
|
||||
bool isLaunch = false,
|
||||
Expr *launchCountExpr[3] = (Expr*[3]){NULL, NULL, NULL});
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
@@ -261,7 +262,7 @@ public:
|
||||
Expr *func;
|
||||
ExprList *args;
|
||||
bool isLaunch;
|
||||
Expr *launchCountExpr;
|
||||
Expr *launchCountExpr[3];
|
||||
};
|
||||
|
||||
|
||||
@@ -730,6 +731,8 @@ bool CanConvertTypes(const Type *fromType, const Type *toType,
|
||||
*/
|
||||
Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase);
|
||||
|
||||
Expr * MakeBinaryExpr(BinaryExpr::Op o, Expr *a, Expr *b, SourcePos p);
|
||||
|
||||
/** Utility routine that emits code to initialize a symbol given an
|
||||
initializer expression.
|
||||
|
||||
|
||||
35
fail_db.txt
35
fail_db.txt
@@ -1025,3 +1025,38 @@
|
||||
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||
.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||
|
||||
39
func.cpp
39
func.cpp
@@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) {
|
||||
Assert(taskIndexSym);
|
||||
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
||||
Assert(taskCountSym);
|
||||
|
||||
taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0");
|
||||
Assert(taskIndexSym0);
|
||||
taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1");
|
||||
Assert(taskIndexSym1);
|
||||
taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2");
|
||||
Assert(taskIndexSym2);
|
||||
|
||||
|
||||
taskCountSym0 = m->symbolTable->LookupVariable("taskCount0");
|
||||
Assert(taskCountSym0);
|
||||
taskCountSym1 = m->symbolTable->LookupVariable("taskCount1");
|
||||
Assert(taskCountSym1);
|
||||
taskCountSym2 = m->symbolTable->LookupVariable("taskCount2");
|
||||
Assert(taskCountSym2);
|
||||
}
|
||||
else
|
||||
{
|
||||
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
||||
taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL;
|
||||
taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::Value *threadCount = argIter++;
|
||||
llvm::Value *taskIndex = argIter++;
|
||||
llvm::Value *taskCount = argIter++;
|
||||
llvm::Value *taskIndex0 = argIter++;
|
||||
llvm::Value *taskIndex1 = argIter++;
|
||||
llvm::Value *taskIndex2 = argIter++;
|
||||
llvm::Value *taskCount0 = argIter++;
|
||||
llvm::Value *taskCount1 = argIter++;
|
||||
llvm::Value *taskCount2 = argIter++;
|
||||
|
||||
// Copy the function parameter values from the structure into local
|
||||
// storage
|
||||
@@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
|
||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||
|
||||
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||
|
||||
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||
}
|
||||
else {
|
||||
// Regular, non-task function
|
||||
|
||||
5
func.h
5
func.h
@@ -60,7 +60,10 @@ private:
|
||||
Stmt *code;
|
||||
Symbol *maskSymbol;
|
||||
Symbol *threadIndexSym, *threadCountSym;
|
||||
Symbol *taskIndexSym, *taskCountSym;
|
||||
Symbol *taskIndexSym, *taskCountSym;
|
||||
Symbol *taskIndexSym0, *taskCountSym0;
|
||||
Symbol *taskIndexSym1, *taskCountSym1;
|
||||
Symbol *taskIndexSym2, *taskCountSym2;
|
||||
};
|
||||
|
||||
#endif // ISPC_FUNC_H
|
||||
|
||||
46
ispc.cpp
46
ispc.cpp
@@ -515,6 +515,25 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
#endif
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
|
||||
this->m_isa = Target::AVX11;
|
||||
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 64;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
#endif
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2") ||
|
||||
@@ -563,6 +582,29 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
#endif
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-i64x4")) {
|
||||
this->m_isa = Target::AVX2;
|
||||
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
#if defined(LLVM_3_4)
|
||||
",+rdrnd"
|
||||
#else
|
||||
",+rdrand"
|
||||
#endif
|
||||
#ifndef LLVM_3_1
|
||||
",+fma"
|
||||
#endif // !LLVM_3_1
|
||||
;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 64;
|
||||
this->m_hasHalf = true;
|
||||
#if !defined(LLVM_3_1)
|
||||
// LLVM 3.2+ only
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
#endif
|
||||
}
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
@@ -740,8 +782,8 @@ Target::SupportedTargets() {
|
||||
"sse2-i32x4, sse2-i32x8, "
|
||||
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
|
||||
"avx1-i32x8, avx1-i32x16, avx1-i64x4, "
|
||||
"avx1.1-i32x8, avx1.1-i32x16, "
|
||||
"avx2-i32x8, avx2-i32x16, "
|
||||
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
|
||||
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
||||
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
||||
"generic-x32, generic-x64";
|
||||
}
|
||||
|
||||
40
ispc.vcxproj
40
ispc.vcxproj
@@ -28,10 +28,14 @@
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-c-32.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-c-64.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-dispatch.cpp" />
|
||||
@@ -323,6 +327,24 @@
|
||||
<Message>Building gen-bitcode-avx11-x2-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx11-i64x4-32bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx11-i64x4-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2.ll">
|
||||
<FileType>Document</FileType>
|
||||
@@ -359,6 +381,24 @@
|
||||
<Message>Building gen-bitcode-avx2-x2-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx2-i64x4-32bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx2-i64x4-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-1.ll">
|
||||
<FileType>Document</FileType>
|
||||
|
||||
8
lex.ll
8
lex.ll
@@ -419,6 +419,14 @@ while { RT; return TOKEN_WHILE; }
|
||||
\"C\" { RT; return TOKEN_STRING_C_LITERAL; }
|
||||
\.\.\. { RT; return TOKEN_DOTDOTDOT; }
|
||||
|
||||
"operator*" { return TOKEN_IDENTIFIER; }
|
||||
"operator+" { return TOKEN_IDENTIFIER; }
|
||||
"operator-" { return TOKEN_IDENTIFIER; }
|
||||
"operator<<" { return TOKEN_IDENTIFIER; }
|
||||
"operator>>" { return TOKEN_IDENTIFIER; }
|
||||
"operator/" { return TOKEN_IDENTIFIER; }
|
||||
"operator%" { return TOKEN_IDENTIFIER; }
|
||||
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERAL; }
|
||||
|
||||
{IDENT} {
|
||||
|
||||
103
parse.yy
103
parse.yy
@@ -353,17 +353,75 @@ launch_expression
|
||||
: TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
||||
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr);
|
||||
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2);
|
||||
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr);
|
||||
Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{ $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); }
|
||||
| TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')'
|
||||
{ $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); }
|
||||
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
|
||||
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5);
|
||||
Expr *launchCount[3] = {$3, oneExpr, oneExpr};
|
||||
$$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount);
|
||||
}
|
||||
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
|
||||
Expr *launchCount[3] = {$3, $5, oneExpr};
|
||||
$$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7);
|
||||
Expr *launchCount[3] = {$3, $5, oneExpr};
|
||||
$$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
|
||||
Expr *launchCount[3] = {$6, $3, oneExpr};
|
||||
$$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8);
|
||||
Expr *launchCount[3] = {$6, $3, oneExpr};
|
||||
$$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount);
|
||||
}
|
||||
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$3, $5, $7};
|
||||
$$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$3, $5, $7};
|
||||
$$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$9, $6, $3};
|
||||
$$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount);
|
||||
}
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')'
|
||||
{
|
||||
Expr *launchCount[3] = {$9, $6, $3};
|
||||
$$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount);
|
||||
}
|
||||
|
||||
|
||||
| TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||
{
|
||||
@@ -377,13 +435,13 @@ launch_expression
|
||||
"around function call expression.");
|
||||
$$ = NULL;
|
||||
}
|
||||
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>'
|
||||
{
|
||||
Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' "
|
||||
"around function call expression.");
|
||||
$$ = NULL;
|
||||
}
|
||||
| TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>'
|
||||
| TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>'
|
||||
{
|
||||
Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' "
|
||||
"around function call expression.");
|
||||
@@ -468,27 +526,27 @@ cast_expression
|
||||
multiplicative_expression
|
||||
: cast_expression
|
||||
| multiplicative_expression '*' cast_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Mul, $1, $3, Union(@1, @3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Mul, $1, $3, Union(@1, @3)); }
|
||||
| multiplicative_expression '/' cast_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Div, $1, $3, Union(@1, @3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Div, $1, $3, Union(@1, @3)); }
|
||||
| multiplicative_expression '%' cast_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Mod, $1, $3, Union(@1, @3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Mod, $1, $3, Union(@1, @3)); }
|
||||
;
|
||||
|
||||
additive_expression
|
||||
: multiplicative_expression
|
||||
| additive_expression '+' multiplicative_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Add, $1, $3, Union(@1, @3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Add, $1, $3, Union(@1, @3)); }
|
||||
| additive_expression '-' multiplicative_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Sub, $1, $3, Union(@1, @3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Sub, $1, $3, Union(@1, @3)); }
|
||||
;
|
||||
|
||||
shift_expression
|
||||
: additive_expression
|
||||
| shift_expression TOKEN_LEFT_OP additive_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Shl, $1, $3, Union(@1,@3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Shl, $1, $3, Union(@1, @3)); }
|
||||
| shift_expression TOKEN_RIGHT_OP additive_expression
|
||||
{ $$ = new BinaryExpr(BinaryExpr::Shr, $1, $3, Union(@1,@3)); }
|
||||
{ $$ = MakeBinaryExpr(BinaryExpr::Shr, $1, $3, Union(@1, @3)); }
|
||||
;
|
||||
|
||||
relational_expression
|
||||
@@ -2217,6 +2275,21 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) {
|
||||
|
||||
Symbol *taskCountSym = new Symbol("taskCount", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym);
|
||||
|
||||
Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym0);
|
||||
Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym1);
|
||||
Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type);
|
||||
m->symbolTable->AddVariable(taskIndexSym2);
|
||||
|
||||
|
||||
Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym0);
|
||||
Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym1);
|
||||
Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type);
|
||||
m->symbolTable->AddVariable(taskCountSym2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
23
run_tests.py
23
run_tests.py
@@ -378,6 +378,11 @@ def file_check(compfails, runfails):
|
||||
compiler_version = options.compiler_exe + temp3.group()
|
||||
else:
|
||||
compiler_version = "cl"
|
||||
possible_compilers = ["g++4.4", "g++4.7", "clang++3.3", "cl"]
|
||||
if not compiler_version in possible_compilers:
|
||||
error("\n**********\nWe don't have history of fails for compiler " +
|
||||
compiler_version +
|
||||
"\nAll fails will be new!!!\n**********", 2)
|
||||
new_line = " "+options.arch.rjust(6)+" "+options.target.rjust(14)+" "+OS.rjust(7)+" "+llvm_version+" "+compiler_version.rjust(10)+" "+opt+" *\n"
|
||||
|
||||
new_compfails = compfails[:]
|
||||
@@ -449,8 +454,9 @@ def verify():
|
||||
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
|
||||
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
|
||||
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
|
||||
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16",
|
||||
"avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8",
|
||||
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8",
|
||||
"avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4",
|
||||
"generic-1", "generic-4", "generic-8",
|
||||
"generic-16", "generic-32", "generic-64"]]
|
||||
for i in range (0,len(f_lines)):
|
||||
if f_lines[i][0] == "%":
|
||||
@@ -647,7 +653,8 @@ def run_tests(options1, args, print_version):
|
||||
if options.non_interactive == False:
|
||||
print_debug("\n", s, run_tests_log)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
temp_time = (time.time() - start_time)
|
||||
elapsed_time = time.strftime('%Hh%Mm%Ssec.', time.gmtime(temp_time))
|
||||
|
||||
while not qret.empty():
|
||||
(c, r, skip) = qret.get()
|
||||
@@ -675,12 +682,16 @@ def run_tests(options1, args, print_version):
|
||||
if len(compile_error_files) == 0 and len(run_error_files) == 0:
|
||||
print_debug("No fails\n", s, run_tests_log)
|
||||
|
||||
R = file_check(compile_error_files, run_error_files)
|
||||
if len(args) == 0:
|
||||
R = file_check(compile_error_files, run_error_files)
|
||||
else:
|
||||
error("don't check new fails for incomplete suite of tests", 2)
|
||||
R = 0
|
||||
|
||||
if options.time:
|
||||
print_debug("Elapsed time: %d s\n" % elapsed_time, s, run_tests_log)
|
||||
print_debug("Elapsed time: " + elapsed_time + "\n", s, run_tests_log)
|
||||
|
||||
return R
|
||||
return [R, elapsed_time]
|
||||
|
||||
|
||||
from optparse import OptionParser
|
||||
|
||||
70
tests/operators.ispc
Normal file
70
tests/operators.ispc
Normal file
@@ -0,0 +1,70 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
struct S {
|
||||
float a;
|
||||
};
|
||||
|
||||
// References "struct&" were put in random order to test them.
|
||||
struct S operator*(struct S& rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator/(struct S& rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a - rr.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator%(struct S rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator+(struct S rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a / rv.a + 3;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator-(struct S rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator>>(struct S& rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a / rv.a + 3;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator<<(struct S& rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S a, a1;
|
||||
struct S b, b1;
|
||||
struct S d1, d2, d3, d4, d5, d6, d7;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
a.a = aFOO[programIndex];
|
||||
b.a = -aFOO[programIndex];
|
||||
d1 = a * b;
|
||||
d2 = a / b;
|
||||
d3 = a % b;
|
||||
d4 = a + b;
|
||||
d5 = a - b;
|
||||
d6 = a >> b;
|
||||
d7 = a << b;
|
||||
RET[programIndex] = d1.a + d2.a + d3.a + d4.a + d5.a + d6.a + d7.a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[4]) {
|
||||
RET[programIndex] = 14;
|
||||
}
|
||||
64
tests/operators1.ispc
Normal file
64
tests/operators1.ispc
Normal file
@@ -0,0 +1,64 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
struct S {
|
||||
float a;
|
||||
};
|
||||
|
||||
// References "struct&" were put in random order to test them.
|
||||
struct S operator*(struct S& rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator/(struct S& rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a - rr.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator%(struct S rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator+(struct S rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a / rv.a + 3;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator-(struct S rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator>>(struct S& rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a / rv.a + 3;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator<<(struct S& rr, struct S& rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 2;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S a;
|
||||
struct S b;
|
||||
struct S d;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
a.a = 5;
|
||||
b.a = -5;
|
||||
d = a * b + b / a - a << (b - b) - a;
|
||||
RET[programIndex] = d.a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[4]) {
|
||||
RET[programIndex] = 12;
|
||||
}
|
||||
51
tests/operators2.ispc
Normal file
51
tests/operators2.ispc
Normal file
@@ -0,0 +1,51 @@
|
||||
int off;
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
struct S {
|
||||
float a;
|
||||
};
|
||||
|
||||
struct S operator+(struct S rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a / rv.a + 3;
|
||||
if (off == 1)
|
||||
c.a = 22;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S operator/(struct S rr, struct S rv) {
|
||||
struct S c;
|
||||
c.a = rr.a + rv.a + 10;
|
||||
if (off == 1)
|
||||
c.a = 33;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct S a;
|
||||
struct S b;
|
||||
struct S d;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int T = programIndex;
|
||||
a.a = aFOO[programIndex];
|
||||
b.a = -aFOO[programIndex];
|
||||
if (programIndex == 3)
|
||||
off = 1;
|
||||
else
|
||||
off = 0;
|
||||
if (T % 2)
|
||||
d = a + b;
|
||||
else
|
||||
d = a / b;
|
||||
|
||||
RET[programIndex] = d.a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[4]) {
|
||||
if (programIndex % 2)
|
||||
RET[programIndex] = 2;
|
||||
else
|
||||
RET[programIndex] = 10;
|
||||
RET[3] = 22;
|
||||
}
|
||||
6
type.cpp
6
type.cpp
@@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
||||
}
|
||||
else
|
||||
// Otherwise we already have the types of the arguments
|
||||
|
||||
Reference in New Issue
Block a user