merged with master
This commit is contained in:
23
alloy.py
23
alloy.py
@@ -89,7 +89,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
if version_LLVM == "trunk":
|
if version_LLVM == "trunk":
|
||||||
SVN_PATH="trunk"
|
SVN_PATH="trunk"
|
||||||
if version_LLVM == "3.4":
|
if version_LLVM == "3.4":
|
||||||
SVN_PATH="tags/RELEASE_34/rc2"
|
SVN_PATH="tags/RELEASE_34/final"
|
||||||
version_LLVM = "3_4"
|
version_LLVM = "3_4"
|
||||||
if version_LLVM == "3.3":
|
if version_LLVM == "3.3":
|
||||||
SVN_PATH="tags/RELEASE_33/final"
|
SVN_PATH="tags/RELEASE_33/final"
|
||||||
@@ -129,8 +129,23 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
|
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
|
||||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
|
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
|
||||||
from_validation)
|
from_validation)
|
||||||
|
os.chdir("..")
|
||||||
|
if current_OS == "MacOS" and int(current_OS_version.split(".")[0]) >= 13:
|
||||||
|
# Starting with MacOS 10.9 Maverics, the system doesn't contain headers for standard C++ library and
|
||||||
|
# the default library is libc++, bit libstdc++. The headers are part of XCode now. But we are checking out
|
||||||
|
# headers as part of LLVM source tree, so they will be installed in clang location and clang will be able
|
||||||
|
# to find them. Though they may not match to the library installed in the system, but seems that this should
|
||||||
|
# not happen.
|
||||||
|
# Note, that we can also build a libc++ library, but it must be on system default location or should be passed
|
||||||
|
# to the linker explicitly (either through command line or environment variables). So we are not doing it
|
||||||
|
# currently to make the build process easier.
|
||||||
|
os.chdir("projects")
|
||||||
|
try_do_LLVM("load libcxx http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " ",
|
||||||
|
"svn co " + revision + " http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " libcxx",
|
||||||
|
from_validation)
|
||||||
|
os.chdir("..")
|
||||||
if extra == True:
|
if extra == True:
|
||||||
os.chdir("./clang/tools")
|
os.chdir("tools/clang/tools")
|
||||||
try_do_LLVM("load extra clang extra tools ",
|
try_do_LLVM("load extra clang extra tools ",
|
||||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
|
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
|
||||||
from_validation)
|
from_validation)
|
||||||
@@ -138,7 +153,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
try_do_LLVM("load extra clang compiler-rt ",
|
try_do_LLVM("load extra clang compiler-rt ",
|
||||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
|
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
|
||||||
from_validation)
|
from_validation)
|
||||||
os.chdir("../")
|
os.chdir("..")
|
||||||
else:
|
else:
|
||||||
tar = tarball.split(" ")
|
tar = tarball.split(" ")
|
||||||
os.makedirs(LLVM_SRC)
|
os.makedirs(LLVM_SRC)
|
||||||
@@ -563,6 +578,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
|||||||
|
|
||||||
def Main():
|
def Main():
|
||||||
global current_OS
|
global current_OS
|
||||||
|
global current_OS_version
|
||||||
|
current_OS_version = platform.release()
|
||||||
if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True:
|
if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True:
|
||||||
current_OS = "Windows"
|
current_OS = "Windows"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -3,13 +3,13 @@
|
|||||||
define(`MASK',`i32')
|
define(`MASK',`i32')
|
||||||
define(`WIDTH',`1')
|
define(`WIDTH',`1')
|
||||||
include(`util.m4')
|
include(`util.m4')
|
||||||
|
rdrand_decls()
|
||||||
; Define some basics for a 1-wide target
|
; Define some basics for a 1-wide target
|
||||||
stdlib_core()
|
stdlib_core()
|
||||||
packed_load_and_store()
|
packed_load_and_store()
|
||||||
scans()
|
scans()
|
||||||
int64minmax()
|
int64minmax()
|
||||||
aossoa()
|
aossoa()
|
||||||
rdrand_decls()
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; masked store
|
;; masked store
|
||||||
@@ -653,10 +653,121 @@ define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; svml stuff
|
; svml stuff
|
||||||
|
|
||||||
include(`svml.m4')
|
declare <1 x float> @__svml_sind(<1 x float>) nounwind readnone alwaysinline
|
||||||
svml_stubs(float,f,WIDTH)
|
declare <1 x float> @__svml_asind(<1 x float>) nounwind readnone alwaysinline
|
||||||
svml_stubs(double,d,WIDTH)
|
declare <1 x float> @__svml_cosd(<1 x float>) nounwind readnone alwaysinline
|
||||||
|
declare void @__svml_sincosd(<1 x float>, <1 x double> *, <1 x double> *) nounwind readnone alwaysinline
|
||||||
|
declare <1 x float> @__svml_tand(<1 x float>) nounwind readnone alwaysinline
|
||||||
|
declare <1 x float> @__svml_atand(<1 x float>) nounwind readnone alwaysinline
|
||||||
|
declare <1 x float> @__svml_atan2d(<1 x float>, <1 x float>) nounwind readnone alwaysinline
|
||||||
|
declare <1 x float> @__svml_expd(<1 x float>) nounwind readnone alwaysinline
|
||||||
|
declare <1 x float> @__svml_logd(<1 x float>) nounwind readnone alwaysinline
|
||||||
|
declare <1 x float> @__svml_powd(<1 x float>, <1 x float>) nounwind readnone alwaysinline
|
||||||
|
|
||||||
|
define <1 x float> @__svml_sinf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
;%r = extractelement <1 x float> %0, i32 0
|
||||||
|
;%s = call float @llvm.sin.f32(float %r)
|
||||||
|
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||||
|
;ret <1 x float> %rv
|
||||||
|
unary1to1(float,@llvm.sin.f32)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_asinf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_asinf4(<1 x float> %0)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
;%r = extractelement <1 x float> %0, i32 0
|
||||||
|
;%s = call float @llvm.asin.f32(float %r)
|
||||||
|
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||||
|
;ret <1 x float> %rv
|
||||||
|
unary1to1(float,@llvm.asin.f32)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_cosf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
;%r = extractelement <1 x float> %0, i32 0
|
||||||
|
;%s = call float @llvm.cos.f32(float %r)
|
||||||
|
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||||
|
;ret <1 x float> %rv
|
||||||
|
unary1to1(float, @llvm.cos.f32)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__svml_sincosf(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
|
||||||
|
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
|
||||||
|
; store <1 x float> %s, <1 x float> * %1
|
||||||
|
; ret void
|
||||||
|
%sin = call <1 x float> @__svml_sinf(<1 x float> %0)
|
||||||
|
%cos = call <1 x float> @__svml_cosf(<1 x float> %0)
|
||||||
|
store <1 x float> %sin, <1 x float> * %1
|
||||||
|
store <1 x float> %cos, <1 x float> * %2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_tanf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
;%r = extractelement <1 x float> %0, i32 0
|
||||||
|
;%s = call float @llvm_tan_f32(float %r)
|
||||||
|
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||||
|
;ret <1 x float> %rv
|
||||||
|
;unasry1to1(float, @llvm.tan.f32)
|
||||||
|
; UNSUPPORTED!
|
||||||
|
ret <1 x float > %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_atanf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
|
||||||
|
; ret <1 x float> %ret
|
||||||
|
;%r = extractelement <1 x float> %0, i32 0
|
||||||
|
;%s = call float @llvm_atan_f32(float %r)
|
||||||
|
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||||
|
;ret <1 x float> %rv
|
||||||
|
;unsary1to1(float,@llvm.atan.f32)
|
||||||
|
;UNSUPPORTED!
|
||||||
|
ret <1 x float > %0
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_atan2f(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
;%y = extractelement <1 x float> %0, i32 0
|
||||||
|
;%x = extractelement <1 x float> %1, i32 0
|
||||||
|
;%q = fdiv float %y, %x
|
||||||
|
;%a = call float @llvm.atan.f32 (float %q)
|
||||||
|
;%rv = insertelement <1 x float> undef, float %a, i32 0
|
||||||
|
;ret <1 x float> %rv
|
||||||
|
; UNSUPPORTED!
|
||||||
|
ret <1 x float > %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_expf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
unary1to1(float, @llvm.exp.f32)
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_logf(<1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
unary1to1(float, @llvm.log.f32)
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x float> @__svml_powf(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||||
|
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
|
||||||
|
;ret <1 x float> %ret
|
||||||
|
%r = extractelement <1 x float> %0, i32 0
|
||||||
|
%e = extractelement <1 x float> %1, i32 0
|
||||||
|
%s = call float @llvm.pow.f32(float %r,float %e)
|
||||||
|
%rv = insertelement <1 x float> undef, float %s, i32 0
|
||||||
|
ret <1 x float> %rv
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; float min/max
|
;; float min/max
|
||||||
@@ -881,14 +992,3 @@ declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind read
|
|||||||
|
|
||||||
define_avgs()
|
define_avgs()
|
||||||
|
|
||||||
;;;;;;; nvptx64
|
|
||||||
|
|
||||||
declare i32 @__tid_x() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__warpsize() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__ctaid_x() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__ctaid_y() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__ctaid_z() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__nctaid_x() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__nctaid_y() nounwind readnone alwaysinline
|
|
||||||
declare i32 @__nctaid_z() nounwind readnone alwaysinline
|
|
||||||
|
|
||||||
|
|||||||
@@ -371,6 +371,8 @@ declare i32 @__packed_load_active(i32 * nocapture, <WIDTH x i32> * nocapture,
|
|||||||
<WIDTH x i1>) nounwind
|
<WIDTH x i1>) nounwind
|
||||||
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
|
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
|
||||||
<WIDTH x i1>) nounwind
|
<WIDTH x i1>) nounwind
|
||||||
|
declare i32 @__packed_store_active2(i32 * nocapture, <WIDTH x i32> %vals,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ syn keyword ispcConditional cif
|
|||||||
syn keyword ispcRepeat cdo cfor cwhile
|
syn keyword ispcRepeat cdo cfor cwhile
|
||||||
syn keyword ispcBuiltin programCount programIndex
|
syn keyword ispcBuiltin programCount programIndex
|
||||||
syn keyword ispcType export uniform varying int8 int16 int32 int64
|
syn keyword ispcType export uniform varying int8 int16 int32 int64
|
||||||
|
syn keyword ispcOperator operator
|
||||||
|
|
||||||
"double precision floating point number, with dot, optional exponent
|
"double precision floating point number, with dot, optional exponent
|
||||||
syn match cFloat display contained "\d\+\.\d*d[-+]\=\d*\>"
|
syn match cFloat display contained "\d\+\.\d*d[-+]\=\d*\>"
|
||||||
@@ -33,6 +34,7 @@ HiLink ispcConditional Conditional
|
|||||||
HiLink ispcRepeat Repeat
|
HiLink ispcRepeat Repeat
|
||||||
HiLink ispcBuiltin Statement
|
HiLink ispcBuiltin Statement
|
||||||
HiLink ispcType Type
|
HiLink ispcType Type
|
||||||
|
HiLink ispcOperator Operator
|
||||||
delcommand HiLink
|
delcommand HiLink
|
||||||
|
|
||||||
let b:current_syntax = "ispc"
|
let b:current_syntax = "ispc"
|
||||||
|
|||||||
@@ -1,3 +1,47 @@
|
|||||||
|
=== v1.6.0 === (19 December 2013)
|
||||||
|
|
||||||
|
A major new version of ISPC with major improvements in performance and
|
||||||
|
stability. Linux and MacOS binaries are based on patched version of LLVM 3.3,
|
||||||
|
while Windows version is based on LLVM 3.4rc3. LLVM 3.4 significantly improves
|
||||||
|
stability on Win32 platform, so we've decided not to wait for official LLVM 3.4
|
||||||
|
release.
|
||||||
|
|
||||||
|
The list of the most significant changes is:
|
||||||
|
|
||||||
|
* New avx1-i32x4 target was added. It may play well for you, if you are focused
|
||||||
|
on integer computations or FP unit in your hardware is 128 bit wide.
|
||||||
|
|
||||||
|
* Support for calculations in double precision was extended with two new
|
||||||
|
targets avx1.1-i64x4 and avx2-i64x4.
|
||||||
|
|
||||||
|
* Language support for overloaded operators was added.
|
||||||
|
|
||||||
|
* New library shift() function was added, which is similar to rotate(), but is
|
||||||
|
non-circular.
|
||||||
|
|
||||||
|
* The language was extended to accept 3 dimensional tasking - a syntactic sugar,
|
||||||
|
which may facilitate programming of some tasks.
|
||||||
|
|
||||||
|
* Regression, which broke --opt=force-aligned-memory is fixed.
|
||||||
|
|
||||||
|
If you are not using pre-built binaries, you may notice the following changes:
|
||||||
|
|
||||||
|
* VS2012/VS2013 are supported.
|
||||||
|
|
||||||
|
* alloy.py (with -b switch) can build LLVM for you on any platform now
|
||||||
|
(except MacOS 10.9, but we know about the problem and working on it).
|
||||||
|
This is a preferred way to build LLVM for ISPC, as all required patches for
|
||||||
|
better performance and stability will automatically apply.
|
||||||
|
|
||||||
|
* LLVM 3.5 (current trunk) is supported.
|
||||||
|
|
||||||
|
There are also multiple fixes for better performance and stability, most
|
||||||
|
notable are:
|
||||||
|
|
||||||
|
* Fixed performance problem for x2 targets.
|
||||||
|
|
||||||
|
* Fixed a problem with incorrect vzeroupper insertion on AVX target on Win32.
|
||||||
|
|
||||||
=== v1.5.0 === (27 September 2013)
|
=== v1.5.0 === (27 September 2013)
|
||||||
|
|
||||||
A major new version of ISPC with several new targets and important bug fixes.
|
A major new version of ISPC with several new targets and important bug fixes.
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ Contents:
|
|||||||
+ `Updating ISPC Programs For Changes In ISPC 1.1`_
|
+ `Updating ISPC Programs For Changes In ISPC 1.1`_
|
||||||
+ `Updating ISPC Programs For Changes In ISPC 1.2`_
|
+ `Updating ISPC Programs For Changes In ISPC 1.2`_
|
||||||
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
|
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
|
||||||
|
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
|
||||||
|
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
|
||||||
|
|
||||||
* `Getting Started with ISPC`_
|
* `Getting Started with ISPC`_
|
||||||
|
|
||||||
@@ -97,6 +99,9 @@ Contents:
|
|||||||
* `Short Vector Types`_
|
* `Short Vector Types`_
|
||||||
* `Array Types`_
|
* `Array Types`_
|
||||||
* `Struct Types`_
|
* `Struct Types`_
|
||||||
|
|
||||||
|
+ `Operators Overloading`_
|
||||||
|
|
||||||
* `Structure of Array Types`_
|
* `Structure of Array Types`_
|
||||||
|
|
||||||
+ `Declarations and Initializers`_
|
+ `Declarations and Initializers`_
|
||||||
@@ -279,6 +284,15 @@ Double precision floating point constants are floating point number with
|
|||||||
31.4d-1, 1.d, 1.0d, 1d-2. Note that floating point number without suffix is
|
31.4d-1, 1.d, 1.0d, 1d-2. Note that floating point number without suffix is
|
||||||
treated as single precision constant.
|
treated as single precision constant.
|
||||||
|
|
||||||
|
Updating ISPC Programs For Changes In ISPC 1.6.0
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
This release adds support for `Operators Overloading`_, so a word ``operator``
|
||||||
|
becomes a keyword and it potentially creates a conflict with existing user
|
||||||
|
function. Also a new library function packed_store_active2() was introduced,
|
||||||
|
which also may create a conflict with existing user functions.
|
||||||
|
|
||||||
|
|
||||||
Getting Started with ISPC
|
Getting Started with ISPC
|
||||||
=========================
|
=========================
|
||||||
|
|
||||||
@@ -1325,6 +1339,7 @@ in C:
|
|||||||
* Function overloading by parameter type
|
* Function overloading by parameter type
|
||||||
* Hexadecimal floating-point constants
|
* Hexadecimal floating-point constants
|
||||||
* Dynamic memory allocation with ``new`` and ``delete``.
|
* Dynamic memory allocation with ``new`` and ``delete``.
|
||||||
|
* Limited support for overloaded operators (`Operators Overloading`_).
|
||||||
|
|
||||||
``ispc`` also adds a number of new features that aren't in C89, C99, or
|
``ispc`` also adds a number of new features that aren't in C89, C99, or
|
||||||
C++:
|
C++:
|
||||||
@@ -2122,7 +2137,35 @@ above code, the value of ``f[index]`` needs to be able to store a different
|
|||||||
value of ``Foo::a`` for each program instance. However, a ``varying Foo``
|
value of ``Foo::a`` for each program instance. However, a ``varying Foo``
|
||||||
still has only a single ``a`` member, since ``a`` was declared with
|
still has only a single ``a`` member, since ``a`` was declared with
|
||||||
``uniform`` variability in the declaration of ``Foo``. Therefore, the
|
``uniform`` variability in the declaration of ``Foo``. Therefore, the
|
||||||
indexing operation in the last line results in an error.
|
indexing operation in the last line results in an error.
|
||||||
|
|
||||||
|
|
||||||
|
Operators Overloading
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
ISPC has limited support for overloaded operators for ``struct`` types. Only
|
||||||
|
binary operators are supported currently, namely they are: ``*, /, %, +, -, >>
|
||||||
|
and <<``. Operators overloading support is similar to the one in C++ language.
|
||||||
|
To overload an operator for ``struct S``, you need to declare and implement a
|
||||||
|
function using keyword ``operator``, which accepts two parameters of type
|
||||||
|
``struct S`` or ``struct S&`` and returns either of these types. For example:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
struct S { float re, im;};
|
||||||
|
struct S operator*(struct S a, struct S b) {
|
||||||
|
struct S result;
|
||||||
|
result.re = a.re * b.re - a.im * b.im;
|
||||||
|
result.im = a.re * b.im + a.im * b.re;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void foo(struct S a, struct S b) {
|
||||||
|
struct S mul = a*b;
|
||||||
|
print("a.re: %\na.im: %\n", a.re, a.im);
|
||||||
|
print("b.re: %\nb.im: %\n", b.re, b.im);
|
||||||
|
print("mul.re: %\nmul.im: %\n", mul.re, mul.im);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Structure of Array Types
|
Structure of Array Types
|
||||||
@@ -4050,6 +4093,14 @@ They return the total number of values stored.
|
|||||||
unsigned int val)
|
unsigned int val)
|
||||||
|
|
||||||
|
|
||||||
|
There are also ``packed_store_active2()`` functions with exactly the same
|
||||||
|
signatures and the same semantic except that they may write one extra
|
||||||
|
element to the output array (but still returning the same value as
|
||||||
|
``packed_store_active()``). These functions suggest different branch free
|
||||||
|
implementation on most of supported targets, which usually (but not always)
|
||||||
|
performs better than ``packed_store_active()``. It's advised to test function
|
||||||
|
performance on user's scenarios on particular target hardware before using it.
|
||||||
|
|
||||||
As an example of how these functions can be used, the following code shows
|
As an example of how these functions can be used, the following code shows
|
||||||
the use of ``packed_store_active()``.
|
the use of ``packed_store_active()``.
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,16 @@
|
|||||||
ispc News
|
ispc News
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
ispc 1.6.0 is Released
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
A major update of ``ispc`` has been released. The main focus is on improved
|
||||||
|
performance and stability. Several new targets were added. There are also
|
||||||
|
a number of language and library extensions. Released binaries are based on
|
||||||
|
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
|
||||||
|
to Release Notes for complete set of changes.
|
||||||
|
|
||||||
|
|
||||||
ispc 1.5.0 is Released
|
ispc 1.5.0 is Released
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
|||||||
# This could be handy for archiving the generated documentation or
|
# This could be handy for archiving the generated documentation or
|
||||||
# if some version control system is used.
|
# if some version control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = 1.5.1dev
|
PROJECT_NUMBER = 1.6.1dev
|
||||||
|
|
||||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||||
# base path where the generated documentation will be put.
|
# base path where the generated documentation will be put.
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
|
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -146,24 +146,24 @@
|
|||||||
<PropertyGroup Label="User">
|
<PropertyGroup Label="User">
|
||||||
<ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
|
<ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
|
||||||
<Target_str Condition=" '$(Target_str)' == '' ">$(default_targets)</Target_str>
|
<Target_str Condition=" '$(Target_str)' == '' ">$(default_targets)</Target_str>
|
||||||
<Target_out>$(TargetDir)$(ISPC_file).obj</Target_out>
|
<Target_out>$(ISPC_file).obj</Target_out>
|
||||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse2.obj</Target_out>
|
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(ISPC_file)_sse2.obj</Target_out>
|
||||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse4.obj</Target_out>
|
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(ISPC_file)_sse4.obj</Target_out>
|
||||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx.obj</Target_out>
|
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(ISPC_file)_avx.obj</Target_out>
|
||||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx11.obj</Target_out>
|
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(ISPC_file)_avx11.obj</Target_out>
|
||||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx2.obj</Target_out>
|
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(ISPC_file)_avx2.obj</Target_out>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include='$(ISPC_file).ispc'>
|
<CustomBuild Include='$(ISPC_file).ispc'>
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out)</Outputs>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid>
|
<ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1472,31 +1472,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec16_i32 val,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec16_i32 val,
|
||||||
|
__vec16_i1 mask) {
|
||||||
|
int count = 0;
|
||||||
|
int32_t *ptr_ = ptr;
|
||||||
|
for (int i = 0; i < 16; ++i) {
|
||||||
|
*ptr = val.v[i];
|
||||||
|
ptr += mask.v & 1;
|
||||||
|
mask.v = mask.v >> 1;
|
||||||
|
}
|
||||||
|
return ptr - ptr_;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
||||||
__vec16_i32 *val,
|
__vec16_i32 *val,
|
||||||
__vec16_i1 mask) {
|
__vec16_i1 mask) {
|
||||||
int count = 0;
|
return __packed_load_active((int32_t *)ptr, val, mask);
|
||||||
for (int i = 0; i < 16; ++i) {
|
|
||||||
if ((mask.v & (1 << i)) != 0) {
|
|
||||||
val->v[i] = *ptr++;
|
|
||||||
++count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
||||||
__vec16_i32 val,
|
__vec16_i32 val,
|
||||||
__vec16_i1 mask) {
|
__vec16_i1 mask) {
|
||||||
int count = 0;
|
return __packed_store_active((int32_t *)ptr, val, mask);
|
||||||
for (int i = 0; i < 16; ++i) {
|
}
|
||||||
if ((mask.v & (1 << i)) != 0) {
|
|
||||||
*ptr++ = val.v[i];
|
|
||||||
++count;
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
|
||||||
}
|
__vec16_i32 val,
|
||||||
}
|
__vec16_i1 mask) {
|
||||||
return count;
|
return __packed_store_active2((int32_t *)ptr, val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1523,31 +1523,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec32_i32 val,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec32_i32 val,
|
||||||
|
__vec32_i1 mask) {
|
||||||
|
int count = 0;
|
||||||
|
int32_t *ptr_ = ptr;
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
*ptr = val.v[i];
|
||||||
|
ptr += mask.v & 1;
|
||||||
|
mask.v = mask.v >> 1;
|
||||||
|
}
|
||||||
|
return ptr - ptr_;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
||||||
__vec32_i32 *val,
|
__vec32_i32 *val,
|
||||||
__vec32_i1 mask) {
|
__vec32_i1 mask) {
|
||||||
int count = 0;
|
return __packed_load_active((int32_t *)ptr, val, mask);
|
||||||
for (int i = 0; i < 32; ++i) {
|
|
||||||
if ((mask.v & (1 << i)) != 0) {
|
|
||||||
val->v[i] = *ptr++;
|
|
||||||
++count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
||||||
__vec32_i32 val,
|
__vec32_i32 val,
|
||||||
__vec32_i1 mask) {
|
__vec32_i1 mask) {
|
||||||
int count = 0;
|
return __packed_store_active((int32_t *)ptr, val, mask);
|
||||||
for (int i = 0; i < 32; ++i) {
|
}
|
||||||
if ((mask.v & (1 << i)) != 0) {
|
|
||||||
*ptr++ = val.v[i];
|
|
||||||
++count;
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
|
||||||
}
|
__vec32_i32 val,
|
||||||
}
|
__vec32_i1 mask) {
|
||||||
return count;
|
return __packed_store_active2((int32_t *)ptr, val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1656,31 +1656,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec64_i32 val,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec64_i32 val,
|
||||||
|
__vec64_i1 mask) {
|
||||||
|
int count = 0;
|
||||||
|
int32_t *ptr_ = ptr;
|
||||||
|
for (int i = 0; i < 64; ++i) {
|
||||||
|
*ptr = val.v[i];
|
||||||
|
ptr += mask.v & 1;
|
||||||
|
mask.v = mask.v >> 1;
|
||||||
|
}
|
||||||
|
return ptr - ptr_;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
||||||
__vec64_i32 *val,
|
__vec64_i32 *val,
|
||||||
__vec64_i1 mask) {
|
__vec64_i1 mask) {
|
||||||
int count = 0;
|
return __packed_load_active((int32_t *) ptr, val, mask);
|
||||||
for (int i = 0; i < 64; ++i) {
|
|
||||||
if ((mask.v & (1ull << i)) != 0) {
|
|
||||||
val->v[i] = *ptr++;
|
|
||||||
++count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
||||||
__vec64_i32 val,
|
__vec64_i32 val,
|
||||||
__vec64_i1 mask) {
|
__vec64_i1 mask) {
|
||||||
int count = 0;
|
return __packed_store_active((int32_t *) ptr, val, mask);
|
||||||
for (int i = 0; i < 64; ++i) {
|
}
|
||||||
if ((mask.v & (1ull << i)) != 0) {
|
|
||||||
*ptr++ = val.v[i];
|
|
||||||
++count;
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
|
||||||
}
|
__vec64_i32 val,
|
||||||
}
|
__vec64_i1 mask) {
|
||||||
return count;
|
return __packed_store_active2((int32_t *) ptr, val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2451,20 +2451,24 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val, _
|
|||||||
return _mm_countbits_32(uint32_t(mask));
|
return _mm_countbits_32(uint32_t(mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||||
|
{
|
||||||
|
return __packed_store_active(p, val, mask);
|
||||||
|
}
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec16_i32 *val, __vec16_i1 mask)
|
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec16_i32 *val, __vec16_i1 mask)
|
||||||
{
|
{
|
||||||
__vec16_i32 v = __load<64>(val);
|
return __packed_load_active((uint32_t *)p, val, mask);
|
||||||
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
|
||||||
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
|
||||||
__store<64>(val, v);
|
|
||||||
return _mm_countbits_32(uint32_t(mask));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||||
{
|
{
|
||||||
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
return __packed_store_active((uint32_t *)p, val, mask);
|
||||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
}
|
||||||
return _mm_countbits_32(uint32_t(mask));
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||||
|
{
|
||||||
|
return __packed_store_active(p, val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -2496,20 +2496,23 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec8_i32 val,
|
|||||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||||
return _mm_countbits_32(uint32_t(0xFF & mask));
|
return _mm_countbits_32(uint32_t(0xFF & mask));
|
||||||
}
|
}
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
|
||||||
|
__vec4_i1 mask) {
|
||||||
|
return __packed_store_active(ptr, val, mask);
|
||||||
|
}
|
||||||
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val,
|
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val,
|
||||||
__vec8_i1 mask) {
|
__vec8_i1 mask) {
|
||||||
__vec8_i32 v = __load<64>(val);
|
return __packed_load_active((uint32_t *)p, val, mask);
|
||||||
v = _mm512_mask_extloadunpacklo_epi32(v, 0xFF & mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
|
||||||
v = _mm512_mask_extloadunpackhi_epi32(v, 0xFF & mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
|
||||||
__store<64>(val, v);
|
|
||||||
return _mm_countbits_32(uint32_t(0xFF & mask));
|
|
||||||
}
|
}
|
||||||
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val,
|
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val,
|
||||||
__vec8_i1 mask) {
|
__vec8_i1 mask) {
|
||||||
_mm512_mask_extpackstorelo_epi32(p, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
return __packed_store_active((uint32_t *)p, val, mask);
|
||||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
|
||||||
return _mm_countbits_32(uint32_t(0xFF & mask));
|
|
||||||
}
|
}
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
|
||||||
|
__vec4_i1 mask) {
|
||||||
|
return __packed_store_active(ptr, val, mask);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -1260,6 +1260,13 @@ static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i3
|
|||||||
return __vec16_i64(val.v, _mm512_setzero_epi32());
|
return __vec16_i64(val.v, _mm512_setzero_epi32());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FORCEINLINE __vec16_i32 __cast_sext(const __vec16_i32 &, const __vec16_i1 &val)
|
||||||
|
{
|
||||||
|
__vec16_i32 ret = _mm512_setzero_epi32();
|
||||||
|
__vec16_i32 one = _mm512_set1_epi32(-1);
|
||||||
|
return _mm512_mask_mov_epi32(ret, val, one);
|
||||||
|
}
|
||||||
|
|
||||||
static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 &val)
|
static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 &val)
|
||||||
{
|
{
|
||||||
__vec16_i32 ret = _mm512_setzero_epi32();
|
__vec16_i32 ret = _mm512_setzero_epi32();
|
||||||
@@ -1878,6 +1885,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val,
|
|||||||
return _mm_countbits_32(uint32_t(mask));
|
return _mm_countbits_32(uint32_t(mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||||
|
{
|
||||||
|
return __packed_store_active(p, val, mask);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// prefetch
|
// prefetch
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -3798,6 +3798,25 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec4_i32 val,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
|
||||||
|
__vec4_i1 mask) {
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
ptr[count] = _mm_extract_epi32(val.v, 0);
|
||||||
|
count -= _mm_extract_ps(mask.v, 0);
|
||||||
|
|
||||||
|
ptr[count] = _mm_extract_epi32(val.v, 1);
|
||||||
|
count -= _mm_extract_ps(mask.v, 1);
|
||||||
|
|
||||||
|
ptr[count] = _mm_extract_epi32(val.v, 2);
|
||||||
|
count -= _mm_extract_ps(mask.v, 2);
|
||||||
|
|
||||||
|
ptr[count] = _mm_extract_epi32(val.v, 3);
|
||||||
|
count -= _mm_extract_ps(mask.v, 3);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, __vec4_i32 *val,
|
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, __vec4_i32 *val,
|
||||||
__vec4_i1 mask) {
|
__vec4_i1 mask) {
|
||||||
return __packed_load_active((int32_t *)ptr, val, mask);
|
return __packed_load_active((int32_t *)ptr, val, mask);
|
||||||
@@ -3808,6 +3827,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, __vec4_i32 val,
|
|||||||
return __packed_store_active((int32_t *)ptr, val, mask);
|
return __packed_store_active((int32_t *)ptr, val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
|
||||||
|
__vec4_i1 mask) {
|
||||||
|
return __packed_store_active2((int32_t *)ptr, val, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// aos/soa
|
// aos/soa
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid>
|
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid>
|
<ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
|
<ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
|
<ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid>
|
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{2ef070a1-f62f-4e6a-944b-88d140945c3c}</ProjectGuid>
|
<ProjectGuid>{2ef070a1-f62f-4e6a-944b-88d140945c3c}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
@@ -170,17 +170,44 @@
|
|||||||
|
|
||||||
// Signature of ispc-generated 'task' functions
|
// Signature of ispc-generated 'task' functions
|
||||||
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
||||||
int taskIndex, int taskCount);
|
int taskIndex, int taskCount,
|
||||||
|
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||||
|
int taskCount0, int taskCount1, int taskCount2);
|
||||||
|
|
||||||
// Small structure used to hold the data for each task
|
// Small structure used to hold the data for each task
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__declspec(align(16))
|
||||||
|
#endif
|
||||||
struct TaskInfo {
|
struct TaskInfo {
|
||||||
TaskFuncType func;
|
TaskFuncType func;
|
||||||
void *data;
|
void *data;
|
||||||
int taskIndex, taskCount;
|
int taskIndex;
|
||||||
|
int taskCount3d[3];
|
||||||
#if defined(ISPC_IS_WINDOWS)
|
#if defined(ISPC_IS_WINDOWS)
|
||||||
event taskEvent;
|
event taskEvent;
|
||||||
#endif
|
#endif
|
||||||
};
|
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||||
|
int taskIndex0() const
|
||||||
|
{
|
||||||
|
return taskIndex % taskCount3d[0];
|
||||||
|
}
|
||||||
|
int taskIndex1() const
|
||||||
|
{
|
||||||
|
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
|
||||||
|
}
|
||||||
|
int taskIndex2() const
|
||||||
|
{
|
||||||
|
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
|
||||||
|
}
|
||||||
|
int taskCount0() const { return taskCount3d[0]; }
|
||||||
|
int taskCount1() const { return taskCount3d[1]; }
|
||||||
|
int taskCount2() const { return taskCount3d[2]; }
|
||||||
|
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
|
||||||
|
}
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
__attribute__((aligned(32)));
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
// ispc expects these functions to have C linkage / not be mangled
|
// ispc expects these functions to have C linkage / not be mangled
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@@ -518,7 +545,9 @@ lRunTask(void *ti) {
|
|||||||
|
|
||||||
// Actually run the task
|
// Actually run the task
|
||||||
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
||||||
taskInfo->taskIndex, taskInfo->taskCount);
|
taskInfo->taskIndex, taskInfo->taskCount(),
|
||||||
|
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
|
||||||
|
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -559,7 +588,9 @@ lRunTask(LPVOID param) {
|
|||||||
// will cause bugs in code that uses those.
|
// will cause bugs in code that uses those.
|
||||||
int threadIndex = 0;
|
int threadIndex = 0;
|
||||||
int threadCount = 1;
|
int threadCount = 1;
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
|
|
||||||
// Signal the event that this task is done
|
// Signal the event that this task is done
|
||||||
ti->taskEvent.set();
|
ti->taskEvent.set();
|
||||||
@@ -660,7 +691,9 @@ lTaskEntry(void *arg) {
|
|||||||
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
||||||
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
||||||
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
||||||
myTask->taskCount);
|
myTask->taskCount(),
|
||||||
|
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||||
|
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||||
|
|
||||||
//
|
//
|
||||||
// Decrement the "number of unfinished tasks" counter in the task
|
// Decrement the "number of unfinished tasks" counter in the task
|
||||||
@@ -871,7 +904,9 @@ TaskGroup::Sync() {
|
|||||||
// Do work for _myTask_
|
// Do work for _myTask_
|
||||||
//
|
//
|
||||||
// FIXME: bogus values for thread index/thread count here as well..
|
// FIXME: bogus values for thread index/thread count here as well..
|
||||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
|
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
|
||||||
|
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||||
|
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||||
|
|
||||||
//
|
//
|
||||||
// Decrement the number of unfinished tasks counter
|
// Decrement the number of unfinished tasks counter
|
||||||
@@ -901,7 +936,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
|
|
||||||
// Actually run the task.
|
// Actually run the task.
|
||||||
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
||||||
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -930,7 +967,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
// Actually run the task.
|
// Actually run the task.
|
||||||
int threadIndex = omp_get_thread_num();
|
int threadIndex = omp_get_thread_num();
|
||||||
int threadCount = omp_get_num_threads();
|
int threadCount = omp_get_num_threads();
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -961,7 +1000,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
int threadIndex = ti->taskIndex;
|
int threadIndex = ti->taskIndex;
|
||||||
int threadCount = ti->taskCount;
|
int threadCount = ti->taskCount;
|
||||||
|
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -988,7 +1029,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
|||||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||||
int threadIndex = ti->taskIndex;
|
int threadIndex = ti->taskIndex;
|
||||||
int threadCount = ti->taskCount;
|
int threadCount = ti->taskCount;
|
||||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||||
|
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||||
|
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1041,7 +1084,8 @@ FreeTaskGroup(TaskGroup *tg) {
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void
|
void
|
||||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
|
||||||
|
const int count = count0*count1*count2;
|
||||||
TaskGroup *taskGroup;
|
TaskGroup *taskGroup;
|
||||||
if (*taskGroupPtr == NULL) {
|
if (*taskGroupPtr == NULL) {
|
||||||
InitTaskSystem();
|
InitTaskSystem();
|
||||||
@@ -1057,7 +1101,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
|||||||
ti->func = (TaskFuncType)func;
|
ti->func = (TaskFuncType)func;
|
||||||
ti->data = data;
|
ti->data = data;
|
||||||
ti->taskIndex = i;
|
ti->taskIndex = i;
|
||||||
ti->taskCount = count;
|
ti->taskCount3d[0] = count0;
|
||||||
|
ti->taskCount3d[1] = count1;
|
||||||
|
ti->taskCount3d[2] = count2;
|
||||||
}
|
}
|
||||||
taskGroup->Launch(baseIndex, count);
|
taskGroup->Launch(baseIndex, count);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,23 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
|
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
|||||||
2
ispc.h
2
ispc.h
@@ -38,7 +38,7 @@
|
|||||||
#ifndef ISPC_H
|
#ifndef ISPC_H
|
||||||
#define ISPC_H
|
#define ISPC_H
|
||||||
|
|
||||||
#define ISPC_VERSION "1.5.1dev"
|
#define ISPC_VERSION "1.6.1dev"
|
||||||
|
|
||||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
||||||
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"
|
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"
|
||||||
|
|||||||
5
opt.cpp
5
opt.cpp
@@ -5153,6 +5153,11 @@ FixBooleanSelectPass::runOnFunction(llvm::Function &F) {
|
|||||||
// LLVM 3.3 only
|
// LLVM 3.3 only
|
||||||
#if defined(LLVM_3_3)
|
#if defined(LLVM_3_3)
|
||||||
|
|
||||||
|
// Don't optimize generic targets.
|
||||||
|
if (g->target->getISA() == Target::GENERIC) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
for (llvm::Function::iterator I = F.begin(), E = F.end();
|
for (llvm::Function::iterator I = F.begin(), E = F.end();
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
llvm::BasicBlock* bb = &*I;
|
llvm::BasicBlock* bb = &*I;
|
||||||
|
|||||||
Reference in New Issue
Block a user