merged with master

This commit is contained in:
Evghenii
2013-12-25 21:32:34 +01:00
29 changed files with 633 additions and 112 deletions

View File

@@ -89,7 +89,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
if version_LLVM == "trunk": if version_LLVM == "trunk":
SVN_PATH="trunk" SVN_PATH="trunk"
if version_LLVM == "3.4": if version_LLVM == "3.4":
SVN_PATH="tags/RELEASE_34/rc2" SVN_PATH="tags/RELEASE_34/final"
version_LLVM = "3_4" version_LLVM = "3_4"
if version_LLVM == "3.3": if version_LLVM == "3.3":
SVN_PATH="tags/RELEASE_33/final" SVN_PATH="tags/RELEASE_33/final"
@@ -129,8 +129,23 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ", try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang", "svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
from_validation) from_validation)
os.chdir("..")
if current_OS == "MacOS" and int(current_OS_version.split(".")[0]) >= 13:
# Starting with MacOS 10.9 Maverics, the system doesn't contain headers for standard C++ library and
# the default library is libc++, bit libstdc++. The headers are part of XCode now. But we are checking out
# headers as part of LLVM source tree, so they will be installed in clang location and clang will be able
# to find them. Though they may not match to the library installed in the system, but seems that this should
# not happen.
# Note, that we can also build a libc++ library, but it must be on system default location or should be passed
# to the linker explicitly (either through command line or environment variables). So we are not doing it
# currently to make the build process easier.
os.chdir("projects")
try_do_LLVM("load libcxx http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " libcxx",
from_validation)
os.chdir("..")
if extra == True: if extra == True:
os.chdir("./clang/tools") os.chdir("tools/clang/tools")
try_do_LLVM("load extra clang extra tools ", try_do_LLVM("load extra clang extra tools ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra", "svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
from_validation) from_validation)
@@ -138,7 +153,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
try_do_LLVM("load extra clang compiler-rt ", try_do_LLVM("load extra clang compiler-rt ",
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt", "svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
from_validation) from_validation)
os.chdir("../") os.chdir("..")
else: else:
tar = tarball.split(" ") tar = tarball.split(" ")
os.makedirs(LLVM_SRC) os.makedirs(LLVM_SRC)
@@ -563,6 +578,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
def Main(): def Main():
global current_OS global current_OS
global current_OS_version
current_OS_version = platform.release()
if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True: if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True:
current_OS = "Windows" current_OS = "Windows"
else: else:

View File

@@ -3,13 +3,13 @@
define(`MASK',`i32') define(`MASK',`i32')
define(`WIDTH',`1') define(`WIDTH',`1')
include(`util.m4') include(`util.m4')
rdrand_decls()
; Define some basics for a 1-wide target ; Define some basics for a 1-wide target
stdlib_core() stdlib_core()
packed_load_and_store() packed_load_and_store()
scans() scans()
int64minmax() int64minmax()
aossoa() aossoa()
rdrand_decls()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store ;; masked store
@@ -653,10 +653,121 @@ define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; svml stuff ; svml stuff
include(`svml.m4') declare <1 x float> @__svml_sind(<1 x float>) nounwind readnone alwaysinline
svml_stubs(float,f,WIDTH) declare <1 x float> @__svml_asind(<1 x float>) nounwind readnone alwaysinline
svml_stubs(double,d,WIDTH) declare <1 x float> @__svml_cosd(<1 x float>) nounwind readnone alwaysinline
declare void @__svml_sincosd(<1 x float>, <1 x double> *, <1 x double> *) nounwind readnone alwaysinline
declare <1 x float> @__svml_tand(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_atand(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_atan2d(<1 x float>, <1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_expd(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_logd(<1 x float>) nounwind readnone alwaysinline
declare <1 x float> @__svml_powd(<1 x float>, <1 x float>) nounwind readnone alwaysinline
define <1 x float> @__svml_sinf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.sin.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float,@llvm.sin.f32)
}
define <1 x float> @__svml_asinf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_asinf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.asin.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float,@llvm.asin.f32)
}
define <1 x float> @__svml_cosf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.cos.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float, @llvm.cos.f32)
}
define void @__svml_sincosf(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
; store <1 x float> %s, <1 x float> * %1
; ret void
%sin = call <1 x float> @__svml_sinf(<1 x float> %0)
%cos = call <1 x float> @__svml_cosf(<1 x float> %0)
store <1 x float> %sin, <1 x float> * %1
store <1 x float> %cos, <1 x float> * %2
ret void
}
define <1 x float> @__svml_tanf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm_tan_f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
;unasry1to1(float, @llvm.tan.f32)
; UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_atanf(<1 x float>) nounwind readnone alwaysinline {
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
; ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm_atan_f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
;unsary1to1(float,@llvm.atan.f32)
;UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_atan2f(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
;ret <1 x float> %ret
;%y = extractelement <1 x float> %0, i32 0
;%x = extractelement <1 x float> %1, i32 0
;%q = fdiv float %y, %x
;%a = call float @llvm.atan.f32 (float %q)
;%rv = insertelement <1 x float> undef, float %a, i32 0
;ret <1 x float> %rv
; UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_expf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
;ret <1 x float> %ret
unary1to1(float, @llvm.exp.f32)
}
define <1 x float> @__svml_logf(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
;ret <1 x float> %ret
unary1to1(float, @llvm.log.f32)
}
define <1 x float> @__svml_powf(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
;ret <1 x float> %ret
%r = extractelement <1 x float> %0, i32 0
%e = extractelement <1 x float> %1, i32 0
%s = call float @llvm.pow.f32(float %r,float %e)
%rv = insertelement <1 x float> undef, float %s, i32 0
ret <1 x float> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float min/max ;; float min/max
@@ -881,14 +992,3 @@ declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind read
define_avgs() define_avgs()
;;;;;;; nvptx64
declare i32 @__tid_x() nounwind readnone alwaysinline
declare i32 @__warpsize() nounwind readnone alwaysinline
declare i32 @__ctaid_x() nounwind readnone alwaysinline
declare i32 @__ctaid_y() nounwind readnone alwaysinline
declare i32 @__ctaid_z() nounwind readnone alwaysinline
declare i32 @__nctaid_x() nounwind readnone alwaysinline
declare i32 @__nctaid_y() nounwind readnone alwaysinline
declare i32 @__nctaid_z() nounwind readnone alwaysinline

View File

@@ -371,6 +371,8 @@ declare i32 @__packed_load_active(i32 * nocapture, <WIDTH x i32> * nocapture,
<WIDTH x i1>) nounwind <WIDTH x i1>) nounwind
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals, declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
<WIDTH x i1>) nounwind <WIDTH x i1>) nounwind
declare i32 @__packed_store_active2(i32 * nocapture, <WIDTH x i32> %vals,
<WIDTH x i1>) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -18,6 +18,7 @@ syn keyword ispcConditional cif
syn keyword ispcRepeat cdo cfor cwhile syn keyword ispcRepeat cdo cfor cwhile
syn keyword ispcBuiltin programCount programIndex syn keyword ispcBuiltin programCount programIndex
syn keyword ispcType export uniform varying int8 int16 int32 int64 syn keyword ispcType export uniform varying int8 int16 int32 int64
syn keyword ispcOperator operator
"double precision floating point number, with dot, optional exponent "double precision floating point number, with dot, optional exponent
syn match cFloat display contained "\d\+\.\d*d[-+]\=\d*\>" syn match cFloat display contained "\d\+\.\d*d[-+]\=\d*\>"
@@ -33,6 +34,7 @@ HiLink ispcConditional Conditional
HiLink ispcRepeat Repeat HiLink ispcRepeat Repeat
HiLink ispcBuiltin Statement HiLink ispcBuiltin Statement
HiLink ispcType Type HiLink ispcType Type
HiLink ispcOperator Operator
delcommand HiLink delcommand HiLink
let b:current_syntax = "ispc" let b:current_syntax = "ispc"

View File

@@ -1,3 +1,47 @@
=== v1.6.0 === (19 December 2013)
A major new version of ISPC with major improvements in performance and
stability. Linux and MacOS binaries are based on patched version of LLVM 3.3,
while Windows version is based on LLVM 3.4rc3. LLVM 3.4 significantly improves
stability on Win32 platform, so we've decided not to wait for official LLVM 3.4
release.
The list of the most significant changes is:
* New avx1-i32x4 target was added. It may play well for you, if you are focused
on integer computations or FP unit in your hardware is 128 bit wide.
* Support for calculations in double precision was extended with two new
targets avx1.1-i64x4 and avx2-i64x4.
* Language support for overloaded operators was added.
* New library shift() function was added, which is similar to rotate(), but is
non-circular.
* The language was extended to accept 3 dimensional tasking - a syntactic sugar,
which may facilitate programming of some tasks.
* Regression, which broke --opt=force-aligned-memory is fixed.
If you are not using pre-built binaries, you may notice the following changes:
* VS2012/VS2013 are supported.
* alloy.py (with -b switch) can build LLVM for you on any platform now
(except MacOS 10.9, but we know about the problem and working on it).
This is a preferred way to build LLVM for ISPC, as all required patches for
better performance and stability will automatically apply.
* LLVM 3.5 (current trunk) is supported.
There are also multiple fixes for better performance and stability, most
notable are:
* Fixed performance problem for x2 targets.
* Fixed a problem with incorrect vzeroupper insertion on AVX target on Win32.
=== v1.5.0 === (27 September 2013) === v1.5.0 === (27 September 2013)
A major new version of ISPC with several new targets and important bug fixes. A major new version of ISPC with several new targets and important bug fixes.

View File

@@ -48,6 +48,8 @@ Contents:
+ `Updating ISPC Programs For Changes In ISPC 1.1`_ + `Updating ISPC Programs For Changes In ISPC 1.1`_
+ `Updating ISPC Programs For Changes In ISPC 1.2`_ + `Updating ISPC Programs For Changes In ISPC 1.2`_
+ `Updating ISPC Programs For Changes In ISPC 1.3`_ + `Updating ISPC Programs For Changes In ISPC 1.3`_
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
* `Getting Started with ISPC`_ * `Getting Started with ISPC`_
@@ -97,6 +99,9 @@ Contents:
* `Short Vector Types`_ * `Short Vector Types`_
* `Array Types`_ * `Array Types`_
* `Struct Types`_ * `Struct Types`_
+ `Operators Overloading`_
* `Structure of Array Types`_ * `Structure of Array Types`_
+ `Declarations and Initializers`_ + `Declarations and Initializers`_
@@ -279,6 +284,15 @@ Double precision floating point constants are floating point number with
31.4d-1, 1.d, 1.0d, 1d-2. Note that floating point number without suffix is 31.4d-1, 1.d, 1.0d, 1d-2. Note that floating point number without suffix is
treated as single precision constant. treated as single precision constant.
Updating ISPC Programs For Changes In ISPC 1.6.0
------------------------------------------------
This release adds support for `Operators Overloading`_, so a word ``operator``
becomes a keyword and it potentially creates a conflict with existing user
function. Also a new library function packed_store_active2() was introduced,
which also may create a conflict with existing user functions.
Getting Started with ISPC Getting Started with ISPC
========================= =========================
@@ -1325,6 +1339,7 @@ in C:
* Function overloading by parameter type * Function overloading by parameter type
* Hexadecimal floating-point constants * Hexadecimal floating-point constants
* Dynamic memory allocation with ``new`` and ``delete``. * Dynamic memory allocation with ``new`` and ``delete``.
* Limited support for overloaded operators (`Operators Overloading`_).
``ispc`` also adds a number of new features that aren't in C89, C99, or ``ispc`` also adds a number of new features that aren't in C89, C99, or
C++: C++:
@@ -2122,7 +2137,35 @@ above code, the value of ``f[index]`` needs to be able to store a different
value of ``Foo::a`` for each program instance. However, a ``varying Foo`` value of ``Foo::a`` for each program instance. However, a ``varying Foo``
still has only a single ``a`` member, since ``a`` was declared with still has only a single ``a`` member, since ``a`` was declared with
``uniform`` variability in the declaration of ``Foo``. Therefore, the ``uniform`` variability in the declaration of ``Foo``. Therefore, the
indexing operation in the last line results in an error. indexing operation in the last line results in an error.
Operators Overloading
---------------------
ISPC has limited support for overloaded operators for ``struct`` types. Only
binary operators are supported currently, namely they are: ``*, /, %, +, -, >>
and <<``. Operators overloading support is similar to the one in C++ language.
To overload an operator for ``struct S``, you need to declare and implement a
function using keyword ``operator``, which accepts two parameters of type
``struct S`` or ``struct S&`` and returns either of these types. For example:
::
struct S { float re, im;};
struct S operator*(struct S a, struct S b) {
struct S result;
result.re = a.re * b.re - a.im * b.im;
result.im = a.re * b.im + a.im * b.re;
return result;
}
void foo(struct S a, struct S b) {
struct S mul = a*b;
print("a.re: %\na.im: %\n", a.re, a.im);
print("b.re: %\nb.im: %\n", b.re, b.im);
print("mul.re: %\nmul.im: %\n", mul.re, mul.im);
}
Structure of Array Types Structure of Array Types
@@ -4050,6 +4093,14 @@ They return the total number of values stored.
unsigned int val) unsigned int val)
There are also ``packed_store_active2()`` functions with exactly the same
signatures and the same semantic except that they may write one extra
element to the output array (but still returning the same value as
``packed_store_active()``). These functions suggest different branch free
implementation on most of supported targets, which usually (but not always)
performs better than ``packed_store_active()``. It's advised to test function
performance on user's scenarios on particular target hardware before using it.
As an example of how these functions can be used, the following code shows As an example of how these functions can be used, the following code shows
the use of ``packed_store_active()``. the use of ``packed_store_active()``.

View File

@@ -2,6 +2,16 @@
ispc News ispc News
========= =========
ispc 1.6.0 is Released
----------------------
A major update of ``ispc`` has been released. The main focus is on improved
performance and stability. Several new targets were added. There are also
a number of language and library extensions. Released binaries are based on
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
to Release Notes for complete set of changes.
ispc 1.5.0 is Released ispc 1.5.0 is Released
---------------------- ----------------------

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or # This could be handy for archiving the generated documentation or
# if some version control system is used. # if some version control system is used.
PROJECT_NUMBER = 1.5.1dev PROJECT_NUMBER = 1.6.1dev
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put. # base path where the generated documentation will be put.

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid> <ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -146,24 +146,24 @@
<PropertyGroup Label="User"> <PropertyGroup Label="User">
<ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler> <ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
<Target_str Condition=" '$(Target_str)' == '' ">$(default_targets)</Target_str> <Target_str Condition=" '$(Target_str)' == '' ">$(default_targets)</Target_str>
<Target_out>$(TargetDir)$(ISPC_file).obj</Target_out> <Target_out>$(ISPC_file).obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse2.obj</Target_out> <Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(ISPC_file)_sse2.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse4.obj</Target_out> <Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(ISPC_file)_sse4.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx.obj</Target_out> <Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(ISPC_file)_avx.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx11.obj</Target_out> <Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(ISPC_file)_avx11.obj</Target_out>
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx2.obj</Target_out> <Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(ISPC_file)_avx2.obj</Target_out>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include='$(ISPC_file).ispc'> <CustomBuild Include='$(ISPC_file).ispc'>
<FileType>Document</FileType> <FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command> <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command> <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out)</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command> <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command> <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out)</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out)</Outputs>
</CustomBuild> </CustomBuild>
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid> <ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1472,31 +1472,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec16_i32 val,
return count; return count;
} }
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec16_i32 val,
__vec16_i1 mask) {
int count = 0;
int32_t *ptr_ = ptr;
for (int i = 0; i < 16; ++i) {
*ptr = val.v[i];
ptr += mask.v & 1;
mask.v = mask.v >> 1;
}
return ptr - ptr_;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
__vec16_i32 *val, __vec16_i32 *val,
__vec16_i1 mask) { __vec16_i1 mask) {
int count = 0; return __packed_load_active((int32_t *)ptr, val, mask);
for (int i = 0; i < 16; ++i) {
if ((mask.v & (1 << i)) != 0) {
val->v[i] = *ptr++;
++count;
}
}
return count;
} }
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
__vec16_i32 val, __vec16_i32 val,
__vec16_i1 mask) { __vec16_i1 mask) {
int count = 0; return __packed_store_active((int32_t *)ptr, val, mask);
for (int i = 0; i < 16; ++i) { }
if ((mask.v & (1 << i)) != 0) {
*ptr++ = val.v[i];
++count; static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
} __vec16_i32 val,
} __vec16_i1 mask) {
return count; return __packed_store_active2((int32_t *)ptr, val, mask);
} }

View File

@@ -1523,31 +1523,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec32_i32 val,
return count; return count;
} }
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec32_i32 val,
__vec32_i1 mask) {
int count = 0;
int32_t *ptr_ = ptr;
for (int i = 0; i < 32; ++i) {
*ptr = val.v[i];
ptr += mask.v & 1;
mask.v = mask.v >> 1;
}
return ptr - ptr_;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
__vec32_i32 *val, __vec32_i32 *val,
__vec32_i1 mask) { __vec32_i1 mask) {
int count = 0; return __packed_load_active((int32_t *)ptr, val, mask);
for (int i = 0; i < 32; ++i) {
if ((mask.v & (1 << i)) != 0) {
val->v[i] = *ptr++;
++count;
}
}
return count;
} }
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
__vec32_i32 val, __vec32_i32 val,
__vec32_i1 mask) { __vec32_i1 mask) {
int count = 0; return __packed_store_active((int32_t *)ptr, val, mask);
for (int i = 0; i < 32; ++i) { }
if ((mask.v & (1 << i)) != 0) {
*ptr++ = val.v[i];
++count; static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
} __vec32_i32 val,
} __vec32_i1 mask) {
return count; return __packed_store_active2((int32_t *)ptr, val, mask);
} }

View File

@@ -1656,31 +1656,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec64_i32 val,
return count; return count;
} }
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec64_i32 val,
__vec64_i1 mask) {
int count = 0;
int32_t *ptr_ = ptr;
for (int i = 0; i < 64; ++i) {
*ptr = val.v[i];
ptr += mask.v & 1;
mask.v = mask.v >> 1;
}
return ptr - ptr_;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
__vec64_i32 *val, __vec64_i32 *val,
__vec64_i1 mask) { __vec64_i1 mask) {
int count = 0; return __packed_load_active((int32_t *) ptr, val, mask);
for (int i = 0; i < 64; ++i) {
if ((mask.v & (1ull << i)) != 0) {
val->v[i] = *ptr++;
++count;
}
}
return count;
} }
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
__vec64_i32 val, __vec64_i32 val,
__vec64_i1 mask) { __vec64_i1 mask) {
int count = 0; return __packed_store_active((int32_t *) ptr, val, mask);
for (int i = 0; i < 64; ++i) { }
if ((mask.v & (1ull << i)) != 0) {
*ptr++ = val.v[i];
++count; static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
} __vec64_i32 val,
} __vec64_i1 mask) {
return count; return __packed_store_active2((int32_t *) ptr, val, mask);
} }

View File

@@ -2451,20 +2451,24 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val, _
return _mm_countbits_32(uint32_t(mask)); return _mm_countbits_32(uint32_t(mask));
} }
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
return __packed_store_active(p, val, mask);
}
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec16_i32 *val, __vec16_i1 mask) static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec16_i32 *val, __vec16_i1 mask)
{ {
__vec16_i32 v = __load<64>(val); return __packed_load_active((uint32_t *)p, val, mask);
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
__store<64>(val, v);
return _mm_countbits_32(uint32_t(mask));
} }
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec16_i32 val, __vec16_i1 mask) static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
{ {
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); return __packed_store_active((uint32_t *)p, val, mask);
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); }
return _mm_countbits_32(uint32_t(mask));
static FORCEINLINE int32_t __packed_store_active2(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
return __packed_store_active(p, val, mask);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@@ -2496,20 +2496,23 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec8_i32 val,
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); _mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(0xFF & mask)); return _mm_countbits_32(uint32_t(0xFF & mask));
} }
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active(ptr, val, mask);
}
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val, static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val,
__vec8_i1 mask) { __vec8_i1 mask) {
__vec8_i32 v = __load<64>(val); return __packed_load_active((uint32_t *)p, val, mask);
v = _mm512_mask_extloadunpacklo_epi32(v, 0xFF & mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
v = _mm512_mask_extloadunpackhi_epi32(v, 0xFF & mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
__store<64>(val, v);
return _mm_countbits_32(uint32_t(0xFF & mask));
} }
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val, static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val,
__vec8_i1 mask) { __vec8_i1 mask) {
_mm512_mask_extpackstorelo_epi32(p, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE); return __packed_store_active((uint32_t *)p, val, mask);
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(0xFF & mask));
} }
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active(ptr, val, mask);
}
#endif #endif
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@@ -1260,6 +1260,13 @@ static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i3
return __vec16_i64(val.v, _mm512_setzero_epi32()); return __vec16_i64(val.v, _mm512_setzero_epi32());
} }
static FORCEINLINE __vec16_i32 __cast_sext(const __vec16_i32 &, const __vec16_i1 &val)
{
__vec16_i32 ret = _mm512_setzero_epi32();
__vec16_i32 one = _mm512_set1_epi32(-1);
return _mm512_mask_mov_epi32(ret, val, one);
}
static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 &val) static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 &val)
{ {
__vec16_i32 ret = _mm512_setzero_epi32(); __vec16_i32 ret = _mm512_setzero_epi32();
@@ -1878,6 +1885,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val,
return _mm_countbits_32(uint32_t(mask)); return _mm_countbits_32(uint32_t(mask));
} }
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
{
return __packed_store_active(p, val, mask);
}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// prefetch // prefetch
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@@ -3798,6 +3798,25 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec4_i32 val,
return count; return count;
} }
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
int count = 0;
ptr[count] = _mm_extract_epi32(val.v, 0);
count -= _mm_extract_ps(mask.v, 0);
ptr[count] = _mm_extract_epi32(val.v, 1);
count -= _mm_extract_ps(mask.v, 1);
ptr[count] = _mm_extract_epi32(val.v, 2);
count -= _mm_extract_ps(mask.v, 2);
ptr[count] = _mm_extract_epi32(val.v, 3);
count -= _mm_extract_ps(mask.v, 3);
return count;
}
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, __vec4_i32 *val, static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, __vec4_i32 *val,
__vec4_i1 mask) { __vec4_i1 mask) {
return __packed_load_active((int32_t *)ptr, val, mask); return __packed_load_active((int32_t *)ptr, val, mask);
@@ -3808,6 +3827,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, __vec4_i32 val,
return __packed_store_active((int32_t *)ptr, val, mask); return __packed_store_active((int32_t *)ptr, val, mask);
} }
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active2((int32_t *)ptr, val, mask);
}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// aos/soa // aos/soa

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid> <ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid> <ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid> <ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid> <ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid> <ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid> <ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{2ef070a1-f62f-4e6a-944b-88d140945c3c}</ProjectGuid> <ProjectGuid>{2ef070a1-f62f-4e6a-944b-88d140945c3c}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

View File

@@ -170,17 +170,44 @@
// Signature of ispc-generated 'task' functions // Signature of ispc-generated 'task' functions
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
int taskIndex, int taskCount); int taskIndex, int taskCount,
int taskIndex0, int taskIndex1, int taskIndex2,
int taskCount0, int taskCount1, int taskCount2);
// Small structure used to hold the data for each task // Small structure used to hold the data for each task
#ifdef _MSC_VER
__declspec(align(16))
#endif
struct TaskInfo { struct TaskInfo {
TaskFuncType func; TaskFuncType func;
void *data; void *data;
int taskIndex, taskCount; int taskIndex;
int taskCount3d[3];
#if defined(ISPC_IS_WINDOWS) #if defined(ISPC_IS_WINDOWS)
event taskEvent; event taskEvent;
#endif #endif
}; int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
int taskIndex0() const
{
return taskIndex % taskCount3d[0];
}
int taskIndex1() const
{
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
}
int taskIndex2() const
{
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
}
int taskCount0() const { return taskCount3d[0]; }
int taskCount1() const { return taskCount3d[1]; }
int taskCount2() const { return taskCount3d[2]; }
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
}
#ifndef _MSC_VER
__attribute__((aligned(32)));
#endif
;
// ispc expects these functions to have C linkage / not be mangled // ispc expects these functions to have C linkage / not be mangled
extern "C" { extern "C" {
@@ -518,7 +545,9 @@ lRunTask(void *ti) {
// Actually run the task // Actually run the task
taskInfo->func(taskInfo->data, threadIndex, threadCount, taskInfo->func(taskInfo->data, threadIndex, threadCount,
taskInfo->taskIndex, taskInfo->taskCount); taskInfo->taskIndex, taskInfo->taskCount(),
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
} }
@@ -559,7 +588,9 @@ lRunTask(LPVOID param) {
// will cause bugs in code that uses those. // will cause bugs in code that uses those.
int threadIndex = 0; int threadIndex = 0;
int threadCount = 1; int threadCount = 1;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
// Signal the event that this task is done // Signal the event that this task is done
ti->taskEvent.set(); ti->taskEvent.set();
@@ -660,7 +691,9 @@ lTaskEntry(void *arg) {
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
TaskInfo *myTask = tg->GetTaskInfo(taskNumber); TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
myTask->taskCount); myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
// //
// Decrement the "number of unfinished tasks" counter in the task // Decrement the "number of unfinished tasks" counter in the task
@@ -871,7 +904,9 @@ TaskGroup::Sync() {
// Do work for _myTask_ // Do work for _myTask_
// //
// FIXME: bogus values for thread index/thread count here as well.. // FIXME: bogus values for thread index/thread count here as well..
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount); myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
// //
// Decrement the number of unfinished tasks counter // Decrement the number of unfinished tasks counter
@@ -901,7 +936,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task. // Actually run the task.
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1 // Cilk does not expose the task -> thread mapping so we pretend it's 1:1
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
} }
} }
@@ -930,7 +967,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// Actually run the task. // Actually run the task.
int threadIndex = omp_get_thread_num(); int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads(); int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
} }
} }
@@ -961,7 +1000,9 @@ TaskGroup::Launch(int baseIndex, int count) {
int threadIndex = ti->taskIndex; int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount; int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}); });
} }
@@ -988,7 +1029,9 @@ TaskGroup::Launch(int baseIndex, int count) {
// TBB does not expose the task -> thread mapping so we pretend it's 1:1 // TBB does not expose the task -> thread mapping so we pretend it's 1:1
int threadIndex = ti->taskIndex; int threadIndex = ti->taskIndex;
int threadCount = ti->taskCount; int threadCount = ti->taskCount;
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
}); });
} }
} }
@@ -1041,7 +1084,8 @@ FreeTaskGroup(TaskGroup *tg) {
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
void void
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
const int count = count0*count1*count2;
TaskGroup *taskGroup; TaskGroup *taskGroup;
if (*taskGroupPtr == NULL) { if (*taskGroupPtr == NULL) {
InitTaskSystem(); InitTaskSystem();
@@ -1057,7 +1101,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
ti->func = (TaskFuncType)func; ti->func = (TaskFuncType)func;
ti->data = data; ti->data = data;
ti->taskIndex = i; ti->taskIndex = i;
ti->taskCount = count; ti->taskCount3d[0] = count0;
ti->taskCount3d[1] = count1;
ti->taskCount3d[2] = count2;
} }
taskGroup->Launch(baseIndex, count); taskGroup->Launch(baseIndex, count);
} }

View File

@@ -1,5 +1,23 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid> <ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>

2
ispc.h
View File

@@ -38,7 +38,7 @@
#ifndef ISPC_H #ifndef ISPC_H
#define ISPC_H #define ISPC_H
#define ISPC_VERSION "1.5.1dev" #define ISPC_VERSION "1.6.1dev"
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) #if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported" #error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"

View File

@@ -5153,6 +5153,11 @@ FixBooleanSelectPass::runOnFunction(llvm::Function &F) {
// LLVM 3.3 only // LLVM 3.3 only
#if defined(LLVM_3_3) #if defined(LLVM_3_3)
// Don't optimize generic targets.
if (g->target->getISA() == Target::GENERIC) {
return false;
}
for (llvm::Function::iterator I = F.begin(), E = F.end(); for (llvm::Function::iterator I = F.begin(), E = F.end();
I != E; ++I) { I != E; ++I) {
llvm::BasicBlock* bb = &*I; llvm::BasicBlock* bb = &*I;