merged with master
This commit is contained in:
23
alloy.py
23
alloy.py
@@ -89,7 +89,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
if version_LLVM == "trunk":
|
||||
SVN_PATH="trunk"
|
||||
if version_LLVM == "3.4":
|
||||
SVN_PATH="tags/RELEASE_34/rc2"
|
||||
SVN_PATH="tags/RELEASE_34/final"
|
||||
version_LLVM = "3_4"
|
||||
if version_LLVM == "3.3":
|
||||
SVN_PATH="tags/RELEASE_33/final"
|
||||
@@ -129,8 +129,23 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
try_do_LLVM("load clang from http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/cfe/" + SVN_PATH + " clang",
|
||||
from_validation)
|
||||
os.chdir("..")
|
||||
if current_OS == "MacOS" and int(current_OS_version.split(".")[0]) >= 13:
|
||||
# Starting with MacOS 10.9 Maverics, the system doesn't contain headers for standard C++ library and
|
||||
# the default library is libc++, bit libstdc++. The headers are part of XCode now. But we are checking out
|
||||
# headers as part of LLVM source tree, so they will be installed in clang location and clang will be able
|
||||
# to find them. Though they may not match to the library installed in the system, but seems that this should
|
||||
# not happen.
|
||||
# Note, that we can also build a libc++ library, but it must be on system default location or should be passed
|
||||
# to the linker explicitly (either through command line or environment variables). So we are not doing it
|
||||
# currently to make the build process easier.
|
||||
os.chdir("projects")
|
||||
try_do_LLVM("load libcxx http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/libcxx/" + SVN_PATH + " libcxx",
|
||||
from_validation)
|
||||
os.chdir("..")
|
||||
if extra == True:
|
||||
os.chdir("./clang/tools")
|
||||
os.chdir("tools/clang/tools")
|
||||
try_do_LLVM("load extra clang extra tools ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/clang-tools-extra/" + SVN_PATH + " extra",
|
||||
from_validation)
|
||||
@@ -138,7 +153,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
try_do_LLVM("load extra clang compiler-rt ",
|
||||
"svn co " + revision + " http://llvm.org/svn/llvm-project/compiler-rt/" + SVN_PATH + " compiler-rt",
|
||||
from_validation)
|
||||
os.chdir("../")
|
||||
os.chdir("..")
|
||||
else:
|
||||
tar = tarball.split(" ")
|
||||
os.makedirs(LLVM_SRC)
|
||||
@@ -563,6 +578,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update,
|
||||
|
||||
def Main():
|
||||
global current_OS
|
||||
global current_OS_version
|
||||
current_OS_version = platform.release()
|
||||
if (platform.system() == 'Windows' or 'CYGWIN_NT' in platform.system()) == True:
|
||||
current_OS = "Windows"
|
||||
else:
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
define(`MASK',`i32')
|
||||
define(`WIDTH',`1')
|
||||
include(`util.m4')
|
||||
rdrand_decls()
|
||||
; Define some basics for a 1-wide target
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
aossoa()
|
||||
rdrand_decls()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
@@ -653,10 +653,121 @@ define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
include(`svml.m4')
|
||||
svml_stubs(float,f,WIDTH)
|
||||
svml_stubs(double,d,WIDTH)
|
||||
declare <1 x float> @__svml_sind(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_asind(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_cosd(<1 x float>) nounwind readnone alwaysinline
|
||||
declare void @__svml_sincosd(<1 x float>, <1 x double> *, <1 x double> *) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_tand(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_atand(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_atan2d(<1 x float>, <1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_expd(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_logd(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_powd(<1 x float>, <1 x float>) nounwind readnone alwaysinline
|
||||
|
||||
define <1 x float> @__svml_sinf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.sin.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float,@llvm.sin.f32)
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_asinf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_asinf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.asin.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float,@llvm.asin.f32)
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_cosf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.cos.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float, @llvm.cos.f32)
|
||||
|
||||
}
|
||||
|
||||
define void @__svml_sincosf(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
|
||||
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
|
||||
; store <1 x float> %s, <1 x float> * %1
|
||||
; ret void
|
||||
%sin = call <1 x float> @__svml_sinf(<1 x float> %0)
|
||||
%cos = call <1 x float> @__svml_cosf(<1 x float> %0)
|
||||
store <1 x float> %sin, <1 x float> * %1
|
||||
store <1 x float> %cos, <1 x float> * %2
|
||||
ret void
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_tanf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm_tan_f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
;unasry1to1(float, @llvm.tan.f32)
|
||||
; UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atanf(<1 x float>) nounwind readnone alwaysinline {
|
||||
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
|
||||
; ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm_atan_f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
;unsary1to1(float,@llvm.atan.f32)
|
||||
;UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan2f(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
;%y = extractelement <1 x float> %0, i32 0
|
||||
;%x = extractelement <1 x float> %1, i32 0
|
||||
;%q = fdiv float %y, %x
|
||||
;%a = call float @llvm.atan.f32 (float %q)
|
||||
;%rv = insertelement <1 x float> undef, float %a, i32 0
|
||||
;ret <1 x float> %rv
|
||||
; UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_expf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.exp.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_logf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.log.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_powf(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
%e = extractelement <1 x float> %1, i32 0
|
||||
%s = call float @llvm.pow.f32(float %r,float %e)
|
||||
%rv = insertelement <1 x float> undef, float %s, i32 0
|
||||
ret <1 x float> %rv
|
||||
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
@@ -881,14 +992,3 @@ declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind read
|
||||
|
||||
define_avgs()
|
||||
|
||||
;;;;;;; nvptx64
|
||||
|
||||
declare i32 @__tid_x() nounwind readnone alwaysinline
|
||||
declare i32 @__warpsize() nounwind readnone alwaysinline
|
||||
declare i32 @__ctaid_x() nounwind readnone alwaysinline
|
||||
declare i32 @__ctaid_y() nounwind readnone alwaysinline
|
||||
declare i32 @__ctaid_z() nounwind readnone alwaysinline
|
||||
declare i32 @__nctaid_x() nounwind readnone alwaysinline
|
||||
declare i32 @__nctaid_y() nounwind readnone alwaysinline
|
||||
declare i32 @__nctaid_z() nounwind readnone alwaysinline
|
||||
|
||||
|
||||
@@ -371,6 +371,8 @@ declare i32 @__packed_load_active(i32 * nocapture, <WIDTH x i32> * nocapture,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare i32 @__packed_store_active2(i32 * nocapture, <WIDTH x i32> %vals,
|
||||
<WIDTH x i1>) nounwind
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -18,6 +18,7 @@ syn keyword ispcConditional cif
|
||||
syn keyword ispcRepeat cdo cfor cwhile
|
||||
syn keyword ispcBuiltin programCount programIndex
|
||||
syn keyword ispcType export uniform varying int8 int16 int32 int64
|
||||
syn keyword ispcOperator operator
|
||||
|
||||
"double precision floating point number, with dot, optional exponent
|
||||
syn match cFloat display contained "\d\+\.\d*d[-+]\=\d*\>"
|
||||
@@ -33,6 +34,7 @@ HiLink ispcConditional Conditional
|
||||
HiLink ispcRepeat Repeat
|
||||
HiLink ispcBuiltin Statement
|
||||
HiLink ispcType Type
|
||||
HiLink ispcOperator Operator
|
||||
delcommand HiLink
|
||||
|
||||
let b:current_syntax = "ispc"
|
||||
|
||||
@@ -1,3 +1,47 @@
|
||||
=== v1.6.0 === (19 December 2013)
|
||||
|
||||
A major new version of ISPC with major improvements in performance and
|
||||
stability. Linux and MacOS binaries are based on patched version of LLVM 3.3,
|
||||
while Windows version is based on LLVM 3.4rc3. LLVM 3.4 significantly improves
|
||||
stability on Win32 platform, so we've decided not to wait for official LLVM 3.4
|
||||
release.
|
||||
|
||||
The list of the most significant changes is:
|
||||
|
||||
* New avx1-i32x4 target was added. It may play well for you, if you are focused
|
||||
on integer computations or FP unit in your hardware is 128 bit wide.
|
||||
|
||||
* Support for calculations in double precision was extended with two new
|
||||
targets avx1.1-i64x4 and avx2-i64x4.
|
||||
|
||||
* Language support for overloaded operators was added.
|
||||
|
||||
* New library shift() function was added, which is similar to rotate(), but is
|
||||
non-circular.
|
||||
|
||||
* The language was extended to accept 3 dimensional tasking - a syntactic sugar,
|
||||
which may facilitate programming of some tasks.
|
||||
|
||||
* Regression, which broke --opt=force-aligned-memory is fixed.
|
||||
|
||||
If you are not using pre-built binaries, you may notice the following changes:
|
||||
|
||||
* VS2012/VS2013 are supported.
|
||||
|
||||
* alloy.py (with -b switch) can build LLVM for you on any platform now
|
||||
(except MacOS 10.9, but we know about the problem and working on it).
|
||||
This is a preferred way to build LLVM for ISPC, as all required patches for
|
||||
better performance and stability will automatically apply.
|
||||
|
||||
* LLVM 3.5 (current trunk) is supported.
|
||||
|
||||
There are also multiple fixes for better performance and stability, most
|
||||
notable are:
|
||||
|
||||
* Fixed performance problem for x2 targets.
|
||||
|
||||
* Fixed a problem with incorrect vzeroupper insertion on AVX target on Win32.
|
||||
|
||||
=== v1.5.0 === (27 September 2013)
|
||||
|
||||
A major new version of ISPC with several new targets and important bug fixes.
|
||||
|
||||
@@ -48,6 +48,8 @@ Contents:
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.1`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.2`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.3`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.5.0`_
|
||||
+ `Updating ISPC Programs For Changes In ISPC 1.6.0`_
|
||||
|
||||
* `Getting Started with ISPC`_
|
||||
|
||||
@@ -97,6 +99,9 @@ Contents:
|
||||
* `Short Vector Types`_
|
||||
* `Array Types`_
|
||||
* `Struct Types`_
|
||||
|
||||
+ `Operators Overloading`_
|
||||
|
||||
* `Structure of Array Types`_
|
||||
|
||||
+ `Declarations and Initializers`_
|
||||
@@ -279,6 +284,15 @@ Double precision floating point constants are floating point number with
|
||||
31.4d-1, 1.d, 1.0d, 1d-2. Note that floating point number without suffix is
|
||||
treated as single precision constant.
|
||||
|
||||
Updating ISPC Programs For Changes In ISPC 1.6.0
|
||||
------------------------------------------------
|
||||
|
||||
This release adds support for `Operators Overloading`_, so a word ``operator``
|
||||
becomes a keyword and it potentially creates a conflict with existing user
|
||||
function. Also a new library function packed_store_active2() was introduced,
|
||||
which also may create a conflict with existing user functions.
|
||||
|
||||
|
||||
Getting Started with ISPC
|
||||
=========================
|
||||
|
||||
@@ -1325,6 +1339,7 @@ in C:
|
||||
* Function overloading by parameter type
|
||||
* Hexadecimal floating-point constants
|
||||
* Dynamic memory allocation with ``new`` and ``delete``.
|
||||
* Limited support for overloaded operators (`Operators Overloading`_).
|
||||
|
||||
``ispc`` also adds a number of new features that aren't in C89, C99, or
|
||||
C++:
|
||||
@@ -2125,6 +2140,34 @@ still has only a single ``a`` member, since ``a`` was declared with
|
||||
indexing operation in the last line results in an error.
|
||||
|
||||
|
||||
Operators Overloading
|
||||
---------------------
|
||||
|
||||
ISPC has limited support for overloaded operators for ``struct`` types. Only
|
||||
binary operators are supported currently, namely they are: ``*, /, %, +, -, >>
|
||||
and <<``. Operators overloading support is similar to the one in C++ language.
|
||||
To overload an operator for ``struct S``, you need to declare and implement a
|
||||
function using keyword ``operator``, which accepts two parameters of type
|
||||
``struct S`` or ``struct S&`` and returns either of these types. For example:
|
||||
|
||||
::
|
||||
|
||||
struct S { float re, im;};
|
||||
struct S operator*(struct S a, struct S b) {
|
||||
struct S result;
|
||||
result.re = a.re * b.re - a.im * b.im;
|
||||
result.im = a.re * b.im + a.im * b.re;
|
||||
return result;
|
||||
}
|
||||
|
||||
void foo(struct S a, struct S b) {
|
||||
struct S mul = a*b;
|
||||
print("a.re: %\na.im: %\n", a.re, a.im);
|
||||
print("b.re: %\nb.im: %\n", b.re, b.im);
|
||||
print("mul.re: %\nmul.im: %\n", mul.re, mul.im);
|
||||
}
|
||||
|
||||
|
||||
Structure of Array Types
|
||||
------------------------
|
||||
|
||||
@@ -4050,6 +4093,14 @@ They return the total number of values stored.
|
||||
unsigned int val)
|
||||
|
||||
|
||||
There are also ``packed_store_active2()`` functions with exactly the same
|
||||
signatures and the same semantic except that they may write one extra
|
||||
element to the output array (but still returning the same value as
|
||||
``packed_store_active()``). These functions suggest different branch free
|
||||
implementation on most of supported targets, which usually (but not always)
|
||||
performs better than ``packed_store_active()``. It's advised to test function
|
||||
performance on user's scenarios on particular target hardware before using it.
|
||||
|
||||
As an example of how these functions can be used, the following code shows
|
||||
the use of ``packed_store_active()``.
|
||||
|
||||
|
||||
@@ -2,6 +2,16 @@
|
||||
ispc News
|
||||
=========
|
||||
|
||||
ispc 1.6.0 is Released
|
||||
----------------------
|
||||
|
||||
A major update of ``ispc`` has been released. The main focus is on improved
|
||||
performance and stability. Several new targets were added. There are also
|
||||
a number of language and library extensions. Released binaries are based on
|
||||
patched LLVM 3.3 on Linux and MacOS and LLVM 3.4rc3 on Windows. Please refer
|
||||
to Release Notes for complete set of changes.
|
||||
|
||||
|
||||
ispc 1.5.0 is Released
|
||||
----------------------
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.5.1dev
|
||||
PROJECT_NUMBER = 1.6.1dev
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -146,24 +146,24 @@
|
||||
<PropertyGroup Label="User">
|
||||
<ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
|
||||
<Target_str Condition=" '$(Target_str)' == '' ">$(default_targets)</Target_str>
|
||||
<Target_out>$(TargetDir)$(ISPC_file).obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse2.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(TargetDir)$(ISPC_file)_sse4.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx11.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(TargetDir)$(ISPC_file)_avx2.obj</Target_out>
|
||||
<Target_out>$(ISPC_file).obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse2')))">$(Target_out);$(ISPC_file)_sse2.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('sse4')))">$(Target_out);$(ISPC_file)_sse4.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1-')))">$(Target_out);$(ISPC_file)_avx.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx1.1')))">$(Target_out);$(ISPC_file)_avx11.obj</Target_out>
|
||||
<Target_out Condition="($(Target_str.Contains(',')) And $(Target_str.Contains('avx2')))">$(Target_out);$(ISPC_file)_avx2.obj</Target_out>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include='$(ISPC_file).ispc'>
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str)</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out);$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out)</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out)</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out)</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1472,31 +1472,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec16_i32 val,
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec16_i32 val,
|
||||
__vec16_i1 mask) {
|
||||
int count = 0;
|
||||
int32_t *ptr_ = ptr;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
*ptr = val.v[i];
|
||||
ptr += mask.v & 1;
|
||||
mask.v = mask.v >> 1;
|
||||
}
|
||||
return ptr - ptr_;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
||||
__vec16_i32 *val,
|
||||
__vec16_i1 mask) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if ((mask.v & (1 << i)) != 0) {
|
||||
val->v[i] = *ptr++;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return __packed_load_active((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
||||
__vec16_i32 val,
|
||||
__vec16_i1 mask) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if ((mask.v & (1 << i)) != 0) {
|
||||
*ptr++ = val.v[i];
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return __packed_store_active((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
|
||||
__vec16_i32 val,
|
||||
__vec16_i1 mask) {
|
||||
return __packed_store_active2((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1523,31 +1523,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec32_i32 val,
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec32_i32 val,
|
||||
__vec32_i1 mask) {
|
||||
int count = 0;
|
||||
int32_t *ptr_ = ptr;
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
*ptr = val.v[i];
|
||||
ptr += mask.v & 1;
|
||||
mask.v = mask.v >> 1;
|
||||
}
|
||||
return ptr - ptr_;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
||||
__vec32_i32 *val,
|
||||
__vec32_i1 mask) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if ((mask.v & (1 << i)) != 0) {
|
||||
val->v[i] = *ptr++;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return __packed_load_active((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
||||
__vec32_i32 val,
|
||||
__vec32_i1 mask) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if ((mask.v & (1 << i)) != 0) {
|
||||
*ptr++ = val.v[i];
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return __packed_store_active((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
|
||||
__vec32_i32 val,
|
||||
__vec32_i1 mask) {
|
||||
return __packed_store_active2((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1656,31 +1656,38 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec64_i32 val,
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec64_i32 val,
|
||||
__vec64_i1 mask) {
|
||||
int count = 0;
|
||||
int32_t *ptr_ = ptr;
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
*ptr = val.v[i];
|
||||
ptr += mask.v & 1;
|
||||
mask.v = mask.v >> 1;
|
||||
}
|
||||
return ptr - ptr_;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr,
|
||||
__vec64_i32 *val,
|
||||
__vec64_i1 mask) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
if ((mask.v & (1ull << i)) != 0) {
|
||||
val->v[i] = *ptr++;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return __packed_load_active((int32_t *) ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr,
|
||||
__vec64_i32 val,
|
||||
__vec64_i1 mask) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
if ((mask.v & (1ull << i)) != 0) {
|
||||
*ptr++ = val.v[i];
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return __packed_store_active((int32_t *) ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr,
|
||||
__vec64_i32 val,
|
||||
__vec64_i1 mask) {
|
||||
return __packed_store_active2((int32_t *) ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2451,20 +2451,24 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val, _
|
||||
return _mm_countbits_32(uint32_t(mask));
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||
{
|
||||
return __packed_store_active(p, val, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec16_i32 *val, __vec16_i1 mask)
|
||||
{
|
||||
__vec16_i32 v = __load<64>(val);
|
||||
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
__store<64>(val, v);
|
||||
return _mm_countbits_32(uint32_t(mask));
|
||||
return __packed_load_active((uint32_t *)p, val, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||
{
|
||||
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
return _mm_countbits_32(uint32_t(mask));
|
||||
return __packed_store_active((uint32_t *)p, val, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(int32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||
{
|
||||
return __packed_store_active(p, val, mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -2496,20 +2496,23 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec8_i32 val,
|
||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
return _mm_countbits_32(uint32_t(0xFF & mask));
|
||||
}
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
|
||||
__vec4_i1 mask) {
|
||||
return __packed_store_active(ptr, val, mask);
|
||||
}
|
||||
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val,
|
||||
__vec8_i1 mask) {
|
||||
__vec8_i32 v = __load<64>(val);
|
||||
v = _mm512_mask_extloadunpacklo_epi32(v, 0xFF & mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
v = _mm512_mask_extloadunpackhi_epi32(v, 0xFF & mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
__store<64>(val, v);
|
||||
return _mm_countbits_32(uint32_t(0xFF & mask));
|
||||
return __packed_load_active((uint32_t *)p, val, mask);
|
||||
}
|
||||
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val,
|
||||
__vec8_i1 mask) {
|
||||
_mm512_mask_extpackstorelo_epi32(p, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
return _mm_countbits_32(uint32_t(0xFF & mask));
|
||||
return __packed_store_active((uint32_t *)p, val, mask);
|
||||
}
|
||||
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
|
||||
__vec4_i1 mask) {
|
||||
return __packed_store_active(ptr, val, mask);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -1260,6 +1260,13 @@ static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i3
|
||||
return __vec16_i64(val.v, _mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i32 __cast_sext(const __vec16_i32 &, const __vec16_i1 &val)
|
||||
{
|
||||
__vec16_i32 ret = _mm512_setzero_epi32();
|
||||
__vec16_i32 one = _mm512_set1_epi32(-1);
|
||||
return _mm512_mask_mov_epi32(ret, val, one);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 &val)
|
||||
{
|
||||
__vec16_i32 ret = _mm512_setzero_epi32();
|
||||
@@ -1878,6 +1885,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val,
|
||||
return _mm_countbits_32(uint32_t(mask));
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *p, __vec16_i32 val, __vec16_i1 mask)
|
||||
{
|
||||
return __packed_store_active(p, val, mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// prefetch
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -3798,6 +3798,25 @@ static FORCEINLINE int32_t __packed_store_active(int32_t *ptr, __vec4_i32 val,
|
||||
return count;
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
|
||||
__vec4_i1 mask) {
|
||||
int count = 0;
|
||||
|
||||
ptr[count] = _mm_extract_epi32(val.v, 0);
|
||||
count -= _mm_extract_ps(mask.v, 0);
|
||||
|
||||
ptr[count] = _mm_extract_epi32(val.v, 1);
|
||||
count -= _mm_extract_ps(mask.v, 1);
|
||||
|
||||
ptr[count] = _mm_extract_epi32(val.v, 2);
|
||||
count -= _mm_extract_ps(mask.v, 2);
|
||||
|
||||
ptr[count] = _mm_extract_epi32(val.v, 3);
|
||||
count -= _mm_extract_ps(mask.v, 3);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *ptr, __vec4_i32 *val,
|
||||
__vec4_i1 mask) {
|
||||
return __packed_load_active((int32_t *)ptr, val, mask);
|
||||
@@ -3808,6 +3827,11 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *ptr, __vec4_i32 val,
|
||||
return __packed_store_active((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
|
||||
__vec4_i1 mask) {
|
||||
return __packed_store_active2((int32_t *)ptr, val, mask);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// aos/soa
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{2ef070a1-f62f-4e6a-944b-88d140945c3c}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
@@ -170,17 +170,44 @@
|
||||
|
||||
// Signature of ispc-generated 'task' functions
|
||||
typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount,
|
||||
int taskIndex, int taskCount);
|
||||
int taskIndex, int taskCount,
|
||||
int taskIndex0, int taskIndex1, int taskIndex2,
|
||||
int taskCount0, int taskCount1, int taskCount2);
|
||||
|
||||
// Small structure used to hold the data for each task
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(16))
|
||||
#endif
|
||||
struct TaskInfo {
|
||||
TaskFuncType func;
|
||||
void *data;
|
||||
int taskIndex, taskCount;
|
||||
int taskIndex;
|
||||
int taskCount3d[3];
|
||||
#if defined(ISPC_IS_WINDOWS)
|
||||
event taskEvent;
|
||||
#endif
|
||||
};
|
||||
int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; }
|
||||
int taskIndex0() const
|
||||
{
|
||||
return taskIndex % taskCount3d[0];
|
||||
}
|
||||
int taskIndex1() const
|
||||
{
|
||||
return ( taskIndex / taskCount3d[0] ) % taskCount3d[1];
|
||||
}
|
||||
int taskIndex2() const
|
||||
{
|
||||
return taskIndex / ( taskCount3d[0]*taskCount3d[1] );
|
||||
}
|
||||
int taskCount0() const { return taskCount3d[0]; }
|
||||
int taskCount1() const { return taskCount3d[1]; }
|
||||
int taskCount2() const { return taskCount3d[2]; }
|
||||
TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
__attribute__((aligned(32)));
|
||||
#endif
|
||||
;
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
@@ -518,7 +545,9 @@ lRunTask(void *ti) {
|
||||
|
||||
// Actually run the task
|
||||
taskInfo->func(taskInfo->data, threadIndex, threadCount,
|
||||
taskInfo->taskIndex, taskInfo->taskCount);
|
||||
taskInfo->taskIndex, taskInfo->taskCount(),
|
||||
taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(),
|
||||
taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2());
|
||||
}
|
||||
|
||||
|
||||
@@ -559,7 +588,9 @@ lRunTask(LPVOID param) {
|
||||
// will cause bugs in code that uses those.
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
|
||||
// Signal the event that this task is done
|
||||
ti->taskEvent.set();
|
||||
@@ -660,7 +691,9 @@ lTaskEntry(void *arg) {
|
||||
DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg));
|
||||
TaskInfo *myTask = tg->GetTaskInfo(taskNumber);
|
||||
myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex,
|
||||
myTask->taskCount);
|
||||
myTask->taskCount(),
|
||||
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||
|
||||
//
|
||||
// Decrement the "number of unfinished tasks" counter in the task
|
||||
@@ -871,7 +904,9 @@ TaskGroup::Sync() {
|
||||
// Do work for _myTask_
|
||||
//
|
||||
// FIXME: bogus values for thread index/thread count here as well..
|
||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount);
|
||||
myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(),
|
||||
myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(),
|
||||
myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2());
|
||||
|
||||
//
|
||||
// Decrement the number of unfinished tasks counter
|
||||
@@ -901,7 +936,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
|
||||
// Actually run the task.
|
||||
// Cilk does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -930,7 +967,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// Actually run the task.
|
||||
int threadIndex = omp_get_thread_num();
|
||||
int threadCount = omp_get_num_threads();
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -961,7 +1000,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -988,7 +1029,9 @@ TaskGroup::Launch(int baseIndex, int count) {
|
||||
// TBB does not expose the task -> thread mapping so we pretend it's 1:1
|
||||
int threadIndex = ti->taskIndex;
|
||||
int threadCount = ti->taskCount;
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount);
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1041,7 +1084,8 @@ FreeTaskGroup(TaskGroup *tg) {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
||||
ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) {
|
||||
const int count = count0*count1*count2;
|
||||
TaskGroup *taskGroup;
|
||||
if (*taskGroupPtr == NULL) {
|
||||
InitTaskSystem();
|
||||
@@ -1057,7 +1101,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) {
|
||||
ti->func = (TaskFuncType)func;
|
||||
ti->data = data;
|
||||
ti->taskIndex = i;
|
||||
ti->taskCount = count;
|
||||
ti->taskCount3d[0] = count0;
|
||||
ti->taskCount3d[1] = count1;
|
||||
ti->taskCount3d[2] = count2;
|
||||
}
|
||||
taskGroup->Launch(baseIndex, count);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
||||
2
ispc.h
2
ispc.h
@@ -38,7 +38,7 @@
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#define ISPC_VERSION "1.5.1dev"
|
||||
#define ISPC_VERSION "1.6.1dev"
|
||||
|
||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
||||
#error "Only LLVM 3.1, 3.2, 3.3, 3.4 and the 3.5 development branch are supported"
|
||||
|
||||
5
opt.cpp
5
opt.cpp
@@ -5153,6 +5153,11 @@ FixBooleanSelectPass::runOnFunction(llvm::Function &F) {
|
||||
// LLVM 3.3 only
|
||||
#if defined(LLVM_3_3)
|
||||
|
||||
// Don't optimize generic targets.
|
||||
if (g->target->getISA() == Target::GENERIC) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (llvm::Function::iterator I = F.begin(), E = F.end();
|
||||
I != E; ++I) {
|
||||
llvm::BasicBlock* bb = &*I;
|
||||
|
||||
Reference in New Issue
Block a user